Add API debug output and optimize comment fetching for completed tasks
This commit is contained in:
@ -9,6 +9,7 @@ Todoist is a SaaS task manager. Todoist provides backups of current tasks, but d
|
|||||||
- Downloads attachments to `output/attachments/` and references them in the JSON and HTML output
|
- Downloads attachments to `output/attachments/` and references them in the JSON and HTML output
|
||||||
- JSON and HTML files are named with the current date when the script is run
|
- JSON and HTML files are named with the current date when the script is run
|
||||||
- Maintains `Todoist-Completed-History.json` so completed tasks older than Todoist's 90-day API window stay in future exports
|
- Maintains `Todoist-Completed-History.json` so completed tasks older than Todoist's 90-day API window stay in future exports
|
||||||
|
- Reuses archived comments for completed tasks to avoid unnecessary API calls (assumes no new comments after completion)
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
- Ensure you have Python 3.8 or newer installed. Check with `python --version` on the command line.
|
- Ensure you have Python 3.8 or newer installed. Check with `python --version` on the command line.
|
||||||
@ -26,8 +27,8 @@ Todoist is a SaaS task manager. Todoist provides backups of current tasks, but d
|
|||||||
```bash
|
```bash
|
||||||
python export_todoist.py export
|
python export_todoist.py export
|
||||||
```
|
```
|
||||||
This will create `output/Todoist-Actual-Backup-YYYY-MM-DD.json` and `output/Todoist-Actual-Backup-YYYY-MM-DD.html`, and it will update `output/attachments/` with any downloaded files while leaving `Todoist-Completed-History.json` in the project root.
|
This will create `output/Todoist-Actual-Backup-YYYY-MM-DD.json` and `output/Todoist-Actual-Backup-YYYY-MM-DD.html`, and it will update `output/attachments/` with any downloaded files while leaving `Todoist-Completed-History.json` in the project root.
|
||||||
Keep `Todoist-Completed-History.json` somewhere safe (e.g., in source control or a backup location); it is the only way the exporter can retain completions older than Todoist's 90-day API retention window.
|
Keep `Todoist-Completed-History.json` somewhere safe (e.g., in source control or a backup location); it is the only way the exporter can retain completions older than Todoist's 90-day API retention window.
|
||||||
4. To see usage instructions, run the script with no arguments or any argument other than `export`.
|
4. To see usage instructions, run the script with no arguments or any argument other than `export`.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
@ -4,7 +4,9 @@ import json
|
|||||||
import time
|
import time
|
||||||
import getpass
|
import getpass
|
||||||
import shutil
|
import shutil
|
||||||
|
import copy
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from urllib.parse import quote_plus
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from todoist_api_python.api import TodoistAPI
|
from todoist_api_python.api import TodoistAPI
|
||||||
@ -15,6 +17,12 @@ ATTACHMENTS_DIR = os.path.join(OUTPUT_DIR, "attachments")
|
|||||||
LEGACY_ATTACHMENTS_DIR = "attachments"
|
LEGACY_ATTACHMENTS_DIR = "attachments"
|
||||||
TODOIST_API_TOKEN: str | None = None
|
TODOIST_API_TOKEN: str | None = None
|
||||||
COMPLETED_HISTORY_FILE = "Todoist-Completed-History.json"
|
COMPLETED_HISTORY_FILE = "Todoist-Completed-History.json"
|
||||||
|
COMMENT_REQUEST_MIN_INTERVAL = 0.5 # seconds
|
||||||
|
COMMENT_MAX_ATTEMPTS = 8
|
||||||
|
PROJECTS_URL = "https://api.todoist.com/rest/v2/projects"
|
||||||
|
TASKS_URL = "https://api.todoist.com/rest/v2/tasks"
|
||||||
|
COMPLETED_TASKS_URL = "https://api.todoist.com/api/v1/tasks/completed/by_completion_date"
|
||||||
|
COMMENTS_URL = "https://api.todoist.com/api/v1/comments"
|
||||||
|
|
||||||
|
|
||||||
def json_serial(obj):
|
def json_serial(obj):
|
||||||
@ -119,28 +127,63 @@ def normalize_timestamp(value):
|
|||||||
return str(value)
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
|
def make_completed_task_key_from_dict(task):
|
||||||
|
task_id = str(task.get('id', '')) if isinstance(task, dict) else ""
|
||||||
|
if not task_id:
|
||||||
|
return None
|
||||||
|
completed_at = normalize_timestamp(task.get('completed_at'))
|
||||||
|
if not completed_at:
|
||||||
|
completed_at = normalize_timestamp(task.get('updated_at'))
|
||||||
|
return (task_id, completed_at)
|
||||||
|
|
||||||
|
|
||||||
|
def make_completed_task_key_from_api(task):
|
||||||
|
task_id = getattr(task, "id", None)
|
||||||
|
if not task_id:
|
||||||
|
return None
|
||||||
|
completed_at = normalize_timestamp(getattr(task, "completed_at", None))
|
||||||
|
if not completed_at:
|
||||||
|
completed_at = normalize_timestamp(getattr(task, "updated_at", None))
|
||||||
|
return (str(task_id), completed_at)
|
||||||
|
|
||||||
|
|
||||||
def merge_completed_lists(history_tasks, new_tasks):
|
def merge_completed_lists(history_tasks, new_tasks):
|
||||||
merged = []
|
merged = []
|
||||||
seen = set()
|
index_by_key = {}
|
||||||
|
|
||||||
def make_key(task):
|
def merge_task_dicts(primary, secondary, prefer_primary=True):
|
||||||
task_id = str(task.get('id', ''))
|
for key, value in secondary.items():
|
||||||
completed_at = normalize_timestamp(task.get('completed_at'))
|
if key == 'comments':
|
||||||
if not completed_at:
|
if (not primary.get('comments')) and value:
|
||||||
completed_at = normalize_timestamp(task.get('updated_at'))
|
primary['comments'] = value
|
||||||
return (task_id, completed_at)
|
continue
|
||||||
|
if key == 'attachments':
|
||||||
|
if (not primary.get('attachments')) and value:
|
||||||
|
primary['attachments'] = value
|
||||||
|
continue
|
||||||
|
if key not in primary or primary[key] in (None, "", [], {}):
|
||||||
|
primary[key] = value
|
||||||
|
continue
|
||||||
|
if not prefer_primary:
|
||||||
|
primary[key] = value
|
||||||
|
return primary
|
||||||
|
|
||||||
def add_task(task):
|
def add_or_merge(task, prefer_existing=True):
|
||||||
key = make_key(task)
|
key = make_completed_task_key_from_dict(task)
|
||||||
if key in seen:
|
if key is None:
|
||||||
|
merged.append(task)
|
||||||
return
|
return
|
||||||
seen.add(key)
|
if key in index_by_key:
|
||||||
merged.append(task)
|
idx = index_by_key[key]
|
||||||
|
merge_task_dicts(merged[idx], task, prefer_primary=prefer_existing)
|
||||||
|
else:
|
||||||
|
merged.append(task)
|
||||||
|
index_by_key[key] = len(merged) - 1
|
||||||
|
|
||||||
for item in new_tasks:
|
for item in new_tasks:
|
||||||
add_task(item)
|
add_or_merge(item, prefer_existing=True)
|
||||||
for item in history_tasks:
|
for item in history_tasks:
|
||||||
add_task(item)
|
add_or_merge(item, prefer_existing=True)
|
||||||
|
|
||||||
def sort_key(task):
|
def sort_key(task):
|
||||||
completed_at = normalize_timestamp(task.get('completed_at'))
|
completed_at = normalize_timestamp(task.get('completed_at'))
|
||||||
@ -232,11 +275,12 @@ def _get_retry_delay(response, attempt, base_delay=5, max_delay=120):
|
|||||||
return min(max_delay, base_delay * (2 ** attempt))
|
return min(max_delay, base_delay * (2 ** attempt))
|
||||||
|
|
||||||
|
|
||||||
def execute_with_rate_limit(func, *args, **kwargs):
|
def execute_with_rate_limit(func, *args, max_attempts=5, request_desc=None, **kwargs):
|
||||||
attempts = 0
|
attempts = 0
|
||||||
max_attempts = 5
|
desc = request_desc or getattr(func, "__name__", "call")
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
print(f" Calling {desc}")
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
except Exception as error: # pylint: disable=broad-except
|
except Exception as error: # pylint: disable=broad-except
|
||||||
status_code = getattr(error, "status_code", None)
|
status_code = getattr(error, "status_code", None)
|
||||||
@ -246,7 +290,9 @@ def execute_with_rate_limit(func, *args, **kwargs):
|
|||||||
if status_code == 429 and attempts < max_attempts:
|
if status_code == 429 and attempts < max_attempts:
|
||||||
delay = _get_retry_delay(response, attempts)
|
delay = _get_retry_delay(response, attempts)
|
||||||
attempts += 1
|
attempts += 1
|
||||||
print(f"Rate limit hit for {func.__name__}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...")
|
print(f" Rate limit hit for {desc}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...")
|
||||||
|
if delay > 1:
|
||||||
|
print(f" Waiting {delay} seconds due to rate limiting")
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
@ -255,7 +301,10 @@ def execute_with_rate_limit(func, *args, **kwargs):
|
|||||||
def fetch_all_projects(api):
|
def fetch_all_projects(api):
|
||||||
projects_by_id = {}
|
projects_by_id = {}
|
||||||
try:
|
try:
|
||||||
projects_iter = execute_with_rate_limit(api.get_projects)
|
projects_iter = execute_with_rate_limit(
|
||||||
|
api.get_projects,
|
||||||
|
request_desc=f"GET {PROJECTS_URL}"
|
||||||
|
)
|
||||||
for batch in projects_iter:
|
for batch in projects_iter:
|
||||||
for project in batch:
|
for project in batch:
|
||||||
projects_by_id[str(getattr(project, "id", ""))] = project
|
projects_by_id[str(getattr(project, "id", ""))] = project
|
||||||
@ -267,7 +316,10 @@ def fetch_all_projects(api):
|
|||||||
def fetch_active_tasks_by_project(api):
|
def fetch_active_tasks_by_project(api):
|
||||||
tasks_by_project = defaultdict(list)
|
tasks_by_project = defaultdict(list)
|
||||||
try:
|
try:
|
||||||
tasks_iter = execute_with_rate_limit(api.get_tasks)
|
tasks_iter = execute_with_rate_limit(
|
||||||
|
api.get_tasks,
|
||||||
|
request_desc=f"GET {TASKS_URL}"
|
||||||
|
)
|
||||||
for batch in tasks_iter:
|
for batch in tasks_iter:
|
||||||
for task in batch:
|
for task in batch:
|
||||||
tasks_by_project[str(getattr(task, "project_id", ""))].append(task)
|
tasks_by_project[str(getattr(task, "project_id", ""))].append(task)
|
||||||
@ -280,8 +332,10 @@ def fetch_active_tasks_by_project(api):
|
|||||||
def fetch_completed_tasks_by_project(api, since, until):
|
def fetch_completed_tasks_by_project(api, since, until):
|
||||||
tasks_by_project = defaultdict(list)
|
tasks_by_project = defaultdict(list)
|
||||||
try:
|
try:
|
||||||
|
query = f"?since={since.isoformat()}&until={until.isoformat()}"
|
||||||
completed_iter = execute_with_rate_limit(
|
completed_iter = execute_with_rate_limit(
|
||||||
api.get_completed_tasks_by_completion_date,
|
api.get_completed_tasks_by_completion_date,
|
||||||
|
request_desc=f"GET {COMPLETED_TASKS_URL}{query}",
|
||||||
since=since,
|
since=since,
|
||||||
until=until,
|
until=until,
|
||||||
)
|
)
|
||||||
@ -297,29 +351,78 @@ def fetch_completed_tasks_by_project(api, since, until):
|
|||||||
def fetch_comments_by_task(api, project_ids, task_ids):
|
def fetch_comments_by_task(api, project_ids, task_ids):
|
||||||
comments_by_task = defaultdict(list)
|
comments_by_task = defaultdict(list)
|
||||||
total_comments = 0
|
total_comments = 0
|
||||||
|
last_comment_call = 0.0
|
||||||
|
|
||||||
|
def throttled_get_comments(**kwargs):
|
||||||
|
nonlocal last_comment_call
|
||||||
|
elapsed = time.time() - last_comment_call
|
||||||
|
if elapsed < COMMENT_REQUEST_MIN_INTERVAL:
|
||||||
|
time.sleep(COMMENT_REQUEST_MIN_INTERVAL - elapsed)
|
||||||
|
params = []
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
params.append(f"{key}={quote_plus(str(value))}")
|
||||||
|
query = "&".join(params)
|
||||||
|
desc = f"GET {COMMENTS_URL}{('?' + query) if query else ''}"
|
||||||
|
result = execute_with_rate_limit(
|
||||||
|
api.get_comments,
|
||||||
|
max_attempts=COMMENT_MAX_ATTEMPTS,
|
||||||
|
request_desc=desc,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
last_comment_call = time.time()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def handle_comment_error(scope, identifier, error):
|
||||||
|
status_code = getattr(error, "status_code", None)
|
||||||
|
response = getattr(error, "response", None)
|
||||||
|
if status_code is None and response is not None:
|
||||||
|
status_code = getattr(response, "status_code", None)
|
||||||
|
if status_code == 404:
|
||||||
|
print(f" Comments not found for {scope} {identifier} (404). Skipping.")
|
||||||
|
return False
|
||||||
|
if status_code == 429:
|
||||||
|
delay = _get_retry_delay(response, COMMENT_MAX_ATTEMPTS)
|
||||||
|
print(
|
||||||
|
f" Rate limit while fetching comments for {scope} {identifier} after retries; waiting {delay} seconds before continuing."
|
||||||
|
)
|
||||||
|
if delay > 1:
|
||||||
|
print(f" Waiting {delay} seconds due to rate limiting")
|
||||||
|
time.sleep(delay)
|
||||||
|
return True
|
||||||
|
print(f" Error fetching comments for {scope} {identifier}: {error}")
|
||||||
|
return False
|
||||||
|
|
||||||
for project_id in project_ids:
|
for project_id in project_ids:
|
||||||
try:
|
while True:
|
||||||
comments_iter = execute_with_rate_limit(api.get_comments, project_id=project_id)
|
try:
|
||||||
for batch in comments_iter:
|
comments_iter = throttled_get_comments(project_id=project_id)
|
||||||
for comment in batch:
|
for batch in comments_iter:
|
||||||
task_id = str(getattr(comment, "task_id", ""))
|
for comment in batch:
|
||||||
if task_id:
|
task_id = str(getattr(comment, "task_id", ""))
|
||||||
comments_by_task[task_id].append(comment)
|
if task_id:
|
||||||
total_comments += 1
|
comments_by_task[task_id].append(comment)
|
||||||
except Exception as error: # pylint: disable=broad-except
|
total_comments += 1
|
||||||
print(f"Error fetching comments for project {project_id}: {error}")
|
break
|
||||||
|
except Exception as error: # pylint: disable=broad-except
|
||||||
|
if not handle_comment_error("project", project_id, error):
|
||||||
|
break
|
||||||
missing_task_ids = [task_id for task_id in task_ids if task_id not in comments_by_task]
|
missing_task_ids = [task_id for task_id in task_ids if task_id not in comments_by_task]
|
||||||
for task_id in missing_task_ids:
|
for task_id in missing_task_ids:
|
||||||
try:
|
while True:
|
||||||
comments_iter = execute_with_rate_limit(api.get_comments, task_id=task_id)
|
try:
|
||||||
for batch in comments_iter:
|
comments_iter = throttled_get_comments(task_id=task_id)
|
||||||
for comment in batch:
|
for batch in comments_iter:
|
||||||
key = str(getattr(comment, "task_id", ""))
|
for comment in batch:
|
||||||
if key:
|
key = str(getattr(comment, "task_id", ""))
|
||||||
comments_by_task[key].append(comment)
|
if key:
|
||||||
total_comments += 1
|
comments_by_task[key].append(comment)
|
||||||
except Exception as error: # pylint: disable=broad-except
|
total_comments += 1
|
||||||
print(f"Error fetching comments for task {task_id}: {error}")
|
break
|
||||||
|
except Exception as error: # pylint: disable=broad-except
|
||||||
|
if not handle_comment_error("task", task_id, error):
|
||||||
|
break
|
||||||
print(
|
print(
|
||||||
f"Fetched {total_comments} comments mapped to {len(comments_by_task)} tasks"
|
f"Fetched {total_comments} comments mapped to {len(comments_by_task)} tasks"
|
||||||
)
|
)
|
||||||
@ -429,26 +532,37 @@ def main():
|
|||||||
until = datetime.now()
|
until = datetime.now()
|
||||||
active_tasks_by_project = fetch_active_tasks_by_project(api)
|
active_tasks_by_project = fetch_active_tasks_by_project(api)
|
||||||
completed_tasks_by_project = fetch_completed_tasks_by_project(api, since=since, until=until)
|
completed_tasks_by_project = fetch_completed_tasks_by_project(api, since=since, until=until)
|
||||||
comment_project_ids = sorted(
|
completed_history = load_completed_history()
|
||||||
|
history_by_key = {}
|
||||||
|
for task_list in completed_history.values():
|
||||||
|
for stored_task in task_list:
|
||||||
|
key = make_completed_task_key_from_dict(stored_task)
|
||||||
|
if key:
|
||||||
|
history_by_key[key] = stored_task
|
||||||
|
|
||||||
|
active_comment_project_ids = sorted(
|
||||||
pid
|
pid
|
||||||
for pid in (set(active_tasks_by_project.keys()) | set(completed_tasks_by_project.keys()))
|
for pid, tasks in active_tasks_by_project.items()
|
||||||
if pid
|
if pid and tasks
|
||||||
)
|
)
|
||||||
task_ids_for_comments: set[str] = set()
|
completed_task_ids_for_comments: set[str] = set()
|
||||||
for task_list in active_tasks_by_project.values():
|
skipped_completed_history = {}
|
||||||
for task in task_list:
|
|
||||||
task_id = getattr(task, "id", None)
|
|
||||||
if task_id:
|
|
||||||
task_ids_for_comments.add(str(task_id))
|
|
||||||
for task_list in completed_tasks_by_project.values():
|
for task_list in completed_tasks_by_project.values():
|
||||||
for task in task_list:
|
for task in task_list:
|
||||||
task_id = getattr(task, "id", None)
|
key = make_completed_task_key_from_api(task)
|
||||||
if task_id:
|
if key is None:
|
||||||
task_ids_for_comments.add(str(task_id))
|
continue
|
||||||
|
history_entry = history_by_key.get(key)
|
||||||
|
if history_entry:
|
||||||
|
skipped_completed_history[key] = history_entry
|
||||||
|
else:
|
||||||
|
completed_task_ids_for_comments.add(key[0])
|
||||||
|
|
||||||
comments_by_task = fetch_comments_by_task(
|
comments_by_task = fetch_comments_by_task(
|
||||||
api, comment_project_ids, sorted(task_ids_for_comments)
|
api,
|
||||||
|
active_comment_project_ids,
|
||||||
|
sorted(completed_task_ids_for_comments),
|
||||||
)
|
)
|
||||||
completed_history = load_completed_history()
|
|
||||||
updated_history = {}
|
updated_history = {}
|
||||||
data = []
|
data = []
|
||||||
for project in projects:
|
for project in projects:
|
||||||
@ -460,6 +574,15 @@ def main():
|
|||||||
processed_active = [process_task(t, comments_by_task) for t in active_tasks]
|
processed_active = [process_task(t, comments_by_task) for t in active_tasks]
|
||||||
processed_completed = [process_task(t, comments_by_task) for t in completed_tasks]
|
processed_completed = [process_task(t, comments_by_task) for t in completed_tasks]
|
||||||
|
|
||||||
|
for task in processed_completed:
|
||||||
|
key = make_completed_task_key_from_dict(task)
|
||||||
|
history_entry = skipped_completed_history.get(key) if key else None
|
||||||
|
if history_entry:
|
||||||
|
if (not task.get('comments')) and history_entry.get('comments'):
|
||||||
|
task['comments'] = copy.deepcopy(history_entry['comments'])
|
||||||
|
if (not task.get('attachments')) and history_entry.get('attachments'):
|
||||||
|
task['attachments'] = copy.deepcopy(history_entry['attachments'])
|
||||||
|
|
||||||
# Build hierarchy for active tasks
|
# Build hierarchy for active tasks
|
||||||
project_dict['tasks'] = build_task_hierarchy(processed_active)
|
project_dict['tasks'] = build_task_hierarchy(processed_active)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user