Add API debug output and optimize comment fetching for completed tasks

This commit is contained in:
2025-10-18 13:14:35 -04:00
parent c0cbbb00ba
commit e1f41307f2
2 changed files with 179 additions and 55 deletions

View File

@ -9,6 +9,7 @@ Todoist is a SaaS task manager. Todoist provides backups of current tasks, but d
- Downloads attachments to `output/attachments/` and references them in the JSON and HTML output
- JSON and HTML files are named with the current date when the script is run
- Maintains `Todoist-Completed-History.json` so completed tasks older than Todoist's 90-day API window stay in future exports
- Reuses archived comments for completed tasks to avoid unnecessary API calls (assumes no new comments after completion)
## Setup
- Ensure you have Python 3.8 or newer installed. Check with `python --version` on the command line.
@ -26,8 +27,8 @@ Todoist is a SaaS task manager. Todoist provides backups of current tasks, but d
```bash
python export_todoist.py export
```
This will create `output/Todoist-Actual-Backup-YYYY-MM-DD.json` and `output/Todoist-Actual-Backup-YYYY-MM-DD.html`, and it will update `output/attachments/` with any downloaded files while leaving `Todoist-Completed-History.json` in the project root.
Keep `Todoist-Completed-History.json` somewhere safe (e.g., in source control or a backup location); it is the only way the exporter can retain completions older than Todoist's 90-day API retention window.
This will create `output/Todoist-Actual-Backup-YYYY-MM-DD.json` and `output/Todoist-Actual-Backup-YYYY-MM-DD.html`, and it will update `output/attachments/` with any downloaded files while leaving `Todoist-Completed-History.json` in the project root.
Keep `Todoist-Completed-History.json` somewhere safe (e.g., in source control or a backup location); it is the only way the exporter can retain completions older than Todoist's 90-day API retention window.
4. To see usage instructions, run the script with no arguments or any argument other than `export`.
## Requirements

View File

@ -4,7 +4,9 @@ import json
import time
import getpass
import shutil
import copy
from collections import defaultdict
from urllib.parse import quote_plus
import requests
from datetime import datetime, timedelta
from todoist_api_python.api import TodoistAPI
@ -15,6 +17,12 @@ ATTACHMENTS_DIR = os.path.join(OUTPUT_DIR, "attachments")
LEGACY_ATTACHMENTS_DIR = "attachments"
TODOIST_API_TOKEN: str | None = None
COMPLETED_HISTORY_FILE = "Todoist-Completed-History.json"
COMMENT_REQUEST_MIN_INTERVAL = 0.5 # seconds
COMMENT_MAX_ATTEMPTS = 8
PROJECTS_URL = "https://api.todoist.com/rest/v2/projects"
TASKS_URL = "https://api.todoist.com/rest/v2/tasks"
COMPLETED_TASKS_URL = "https://api.todoist.com/api/v1/tasks/completed/by_completion_date"
COMMENTS_URL = "https://api.todoist.com/api/v1/comments"
def json_serial(obj):
@ -119,28 +127,63 @@ def normalize_timestamp(value):
return str(value)
def make_completed_task_key_from_dict(task):
task_id = str(task.get('id', '')) if isinstance(task, dict) else ""
if not task_id:
return None
completed_at = normalize_timestamp(task.get('completed_at'))
if not completed_at:
completed_at = normalize_timestamp(task.get('updated_at'))
return (task_id, completed_at)
def make_completed_task_key_from_api(task):
task_id = getattr(task, "id", None)
if not task_id:
return None
completed_at = normalize_timestamp(getattr(task, "completed_at", None))
if not completed_at:
completed_at = normalize_timestamp(getattr(task, "updated_at", None))
return (str(task_id), completed_at)
def merge_completed_lists(history_tasks, new_tasks):
merged = []
seen = set()
index_by_key = {}
def make_key(task):
task_id = str(task.get('id', ''))
completed_at = normalize_timestamp(task.get('completed_at'))
if not completed_at:
completed_at = normalize_timestamp(task.get('updated_at'))
return (task_id, completed_at)
def merge_task_dicts(primary, secondary, prefer_primary=True):
for key, value in secondary.items():
if key == 'comments':
if (not primary.get('comments')) and value:
primary['comments'] = value
continue
if key == 'attachments':
if (not primary.get('attachments')) and value:
primary['attachments'] = value
continue
if key not in primary or primary[key] in (None, "", [], {}):
primary[key] = value
continue
if not prefer_primary:
primary[key] = value
return primary
def add_task(task):
key = make_key(task)
if key in seen:
def add_or_merge(task, prefer_existing=True):
key = make_completed_task_key_from_dict(task)
if key is None:
merged.append(task)
return
seen.add(key)
merged.append(task)
if key in index_by_key:
idx = index_by_key[key]
merge_task_dicts(merged[idx], task, prefer_primary=prefer_existing)
else:
merged.append(task)
index_by_key[key] = len(merged) - 1
for item in new_tasks:
add_task(item)
add_or_merge(item, prefer_existing=True)
for item in history_tasks:
add_task(item)
add_or_merge(item, prefer_existing=True)
def sort_key(task):
completed_at = normalize_timestamp(task.get('completed_at'))
@ -232,11 +275,12 @@ def _get_retry_delay(response, attempt, base_delay=5, max_delay=120):
return min(max_delay, base_delay * (2 ** attempt))
def execute_with_rate_limit(func, *args, **kwargs):
def execute_with_rate_limit(func, *args, max_attempts=5, request_desc=None, **kwargs):
attempts = 0
max_attempts = 5
desc = request_desc or getattr(func, "__name__", "call")
while True:
try:
print(f" Calling {desc}")
return func(*args, **kwargs)
except Exception as error: # pylint: disable=broad-except
status_code = getattr(error, "status_code", None)
@ -246,7 +290,9 @@ def execute_with_rate_limit(func, *args, **kwargs):
if status_code == 429 and attempts < max_attempts:
delay = _get_retry_delay(response, attempts)
attempts += 1
print(f"Rate limit hit for {func.__name__}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...")
print(f" Rate limit hit for {desc}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...")
if delay > 1:
print(f" Waiting {delay} seconds due to rate limiting")
time.sleep(delay)
continue
raise
@ -255,7 +301,10 @@ def execute_with_rate_limit(func, *args, **kwargs):
def fetch_all_projects(api):
projects_by_id = {}
try:
projects_iter = execute_with_rate_limit(api.get_projects)
projects_iter = execute_with_rate_limit(
api.get_projects,
request_desc=f"GET {PROJECTS_URL}"
)
for batch in projects_iter:
for project in batch:
projects_by_id[str(getattr(project, "id", ""))] = project
@ -267,7 +316,10 @@ def fetch_all_projects(api):
def fetch_active_tasks_by_project(api):
tasks_by_project = defaultdict(list)
try:
tasks_iter = execute_with_rate_limit(api.get_tasks)
tasks_iter = execute_with_rate_limit(
api.get_tasks,
request_desc=f"GET {TASKS_URL}"
)
for batch in tasks_iter:
for task in batch:
tasks_by_project[str(getattr(task, "project_id", ""))].append(task)
@ -280,8 +332,10 @@ def fetch_active_tasks_by_project(api):
def fetch_completed_tasks_by_project(api, since, until):
tasks_by_project = defaultdict(list)
try:
query = f"?since={since.isoformat()}&until={until.isoformat()}"
completed_iter = execute_with_rate_limit(
api.get_completed_tasks_by_completion_date,
request_desc=f"GET {COMPLETED_TASKS_URL}{query}",
since=since,
until=until,
)
@ -297,29 +351,78 @@ def fetch_completed_tasks_by_project(api, since, until):
def fetch_comments_by_task(api, project_ids, task_ids):
comments_by_task = defaultdict(list)
total_comments = 0
last_comment_call = 0.0
def throttled_get_comments(**kwargs):
nonlocal last_comment_call
elapsed = time.time() - last_comment_call
if elapsed < COMMENT_REQUEST_MIN_INTERVAL:
time.sleep(COMMENT_REQUEST_MIN_INTERVAL - elapsed)
params = []
for key, value in kwargs.items():
if value is None:
continue
params.append(f"{key}={quote_plus(str(value))}")
query = "&".join(params)
desc = f"GET {COMMENTS_URL}{('?' + query) if query else ''}"
result = execute_with_rate_limit(
api.get_comments,
max_attempts=COMMENT_MAX_ATTEMPTS,
request_desc=desc,
**kwargs,
)
last_comment_call = time.time()
return result
def handle_comment_error(scope, identifier, error):
status_code = getattr(error, "status_code", None)
response = getattr(error, "response", None)
if status_code is None and response is not None:
status_code = getattr(response, "status_code", None)
if status_code == 404:
print(f" Comments not found for {scope} {identifier} (404). Skipping.")
return False
if status_code == 429:
delay = _get_retry_delay(response, COMMENT_MAX_ATTEMPTS)
print(
f" Rate limit while fetching comments for {scope} {identifier} after retries; waiting {delay} seconds before continuing."
)
if delay > 1:
print(f" Waiting {delay} seconds due to rate limiting")
time.sleep(delay)
return True
print(f" Error fetching comments for {scope} {identifier}: {error}")
return False
for project_id in project_ids:
try:
comments_iter = execute_with_rate_limit(api.get_comments, project_id=project_id)
for batch in comments_iter:
for comment in batch:
task_id = str(getattr(comment, "task_id", ""))
if task_id:
comments_by_task[task_id].append(comment)
total_comments += 1
except Exception as error: # pylint: disable=broad-except
print(f"Error fetching comments for project {project_id}: {error}")
while True:
try:
comments_iter = throttled_get_comments(project_id=project_id)
for batch in comments_iter:
for comment in batch:
task_id = str(getattr(comment, "task_id", ""))
if task_id:
comments_by_task[task_id].append(comment)
total_comments += 1
break
except Exception as error: # pylint: disable=broad-except
if not handle_comment_error("project", project_id, error):
break
missing_task_ids = [task_id for task_id in task_ids if task_id not in comments_by_task]
for task_id in missing_task_ids:
try:
comments_iter = execute_with_rate_limit(api.get_comments, task_id=task_id)
for batch in comments_iter:
for comment in batch:
key = str(getattr(comment, "task_id", ""))
if key:
comments_by_task[key].append(comment)
total_comments += 1
except Exception as error: # pylint: disable=broad-except
print(f"Error fetching comments for task {task_id}: {error}")
while True:
try:
comments_iter = throttled_get_comments(task_id=task_id)
for batch in comments_iter:
for comment in batch:
key = str(getattr(comment, "task_id", ""))
if key:
comments_by_task[key].append(comment)
total_comments += 1
break
except Exception as error: # pylint: disable=broad-except
if not handle_comment_error("task", task_id, error):
break
print(
f"Fetched {total_comments} comments mapped to {len(comments_by_task)} tasks"
)
@ -429,26 +532,37 @@ def main():
until = datetime.now()
active_tasks_by_project = fetch_active_tasks_by_project(api)
completed_tasks_by_project = fetch_completed_tasks_by_project(api, since=since, until=until)
comment_project_ids = sorted(
completed_history = load_completed_history()
history_by_key = {}
for task_list in completed_history.values():
for stored_task in task_list:
key = make_completed_task_key_from_dict(stored_task)
if key:
history_by_key[key] = stored_task
active_comment_project_ids = sorted(
pid
for pid in (set(active_tasks_by_project.keys()) | set(completed_tasks_by_project.keys()))
if pid
for pid, tasks in active_tasks_by_project.items()
if pid and tasks
)
task_ids_for_comments: set[str] = set()
for task_list in active_tasks_by_project.values():
for task in task_list:
task_id = getattr(task, "id", None)
if task_id:
task_ids_for_comments.add(str(task_id))
completed_task_ids_for_comments: set[str] = set()
skipped_completed_history = {}
for task_list in completed_tasks_by_project.values():
for task in task_list:
task_id = getattr(task, "id", None)
if task_id:
task_ids_for_comments.add(str(task_id))
key = make_completed_task_key_from_api(task)
if key is None:
continue
history_entry = history_by_key.get(key)
if history_entry:
skipped_completed_history[key] = history_entry
else:
completed_task_ids_for_comments.add(key[0])
comments_by_task = fetch_comments_by_task(
api, comment_project_ids, sorted(task_ids_for_comments)
api,
active_comment_project_ids,
sorted(completed_task_ids_for_comments),
)
completed_history = load_completed_history()
updated_history = {}
data = []
for project in projects:
@ -460,6 +574,15 @@ def main():
processed_active = [process_task(t, comments_by_task) for t in active_tasks]
processed_completed = [process_task(t, comments_by_task) for t in completed_tasks]
for task in processed_completed:
key = make_completed_task_key_from_dict(task)
history_entry = skipped_completed_history.get(key) if key else None
if history_entry:
if (not task.get('comments')) and history_entry.get('comments'):
task['comments'] = copy.deepcopy(history_entry['comments'])
if (not task.get('attachments')) and history_entry.get('attachments'):
task['attachments'] = copy.deepcopy(history_entry['attachments'])
# Build hierarchy for active tasks
project_dict['tasks'] = build_task_hierarchy(processed_active)