Add API debug output and optimize comment fetching for completed tasks
This commit is contained in:
		| @ -9,6 +9,7 @@ Todoist is a SaaS task manager. Todoist provides backups of current tasks, but d | ||||
| - Downloads attachments to `output/attachments/` and references them in the JSON and HTML output | ||||
| - JSON and HTML files are named with the current date when the script is run | ||||
| - Maintains `Todoist-Completed-History.json` so completed tasks older than Todoist's 90-day API window stay in future exports | ||||
| - Reuses archived comments for completed tasks to avoid unnecessary API calls (assumes no new comments after completion) | ||||
|  | ||||
| ## Setup | ||||
| - Ensure you have Python 3.8 or newer installed. Check with `python --version` on the command line. | ||||
| @ -26,8 +27,8 @@ Todoist is a SaaS task manager. Todoist provides backups of current tasks, but d | ||||
|    ```bash | ||||
|    python export_todoist.py export | ||||
|    ``` | ||||
|   This will create `output/Todoist-Actual-Backup-YYYY-MM-DD.json` and `output/Todoist-Actual-Backup-YYYY-MM-DD.html`, and it will update `output/attachments/` with any downloaded files while leaving `Todoist-Completed-History.json` in the project root. | ||||
|   Keep `Todoist-Completed-History.json` somewhere safe (e.g., in source control or a backup location); it is the only way the exporter can retain completions older than Todoist's 90-day API retention window. | ||||
|     This will create `output/Todoist-Actual-Backup-YYYY-MM-DD.json` and `output/Todoist-Actual-Backup-YYYY-MM-DD.html`, and it will update `output/attachments/` with any downloaded files while leaving `Todoist-Completed-History.json` in the project root. | ||||
|     Keep `Todoist-Completed-History.json` somewhere safe (e.g., in source control or a backup location); it is the only way the exporter can retain completions older than Todoist's 90-day API retention window. | ||||
| 4. To see usage instructions, run the script with no arguments or any argument other than `export`. | ||||
|  | ||||
| ## Requirements | ||||
|  | ||||
| @ -4,7 +4,9 @@ import json | ||||
| import time | ||||
| import getpass | ||||
| import shutil | ||||
| import copy | ||||
| from collections import defaultdict | ||||
| from urllib.parse import quote_plus | ||||
| import requests | ||||
| from datetime import datetime, timedelta | ||||
| from todoist_api_python.api import TodoistAPI | ||||
| @ -15,6 +17,12 @@ ATTACHMENTS_DIR = os.path.join(OUTPUT_DIR, "attachments") | ||||
| LEGACY_ATTACHMENTS_DIR = "attachments" | ||||
| TODOIST_API_TOKEN: str | None = None | ||||
| COMPLETED_HISTORY_FILE = "Todoist-Completed-History.json" | ||||
| COMMENT_REQUEST_MIN_INTERVAL = 0.5  # seconds | ||||
| COMMENT_MAX_ATTEMPTS = 8 | ||||
| PROJECTS_URL = "https://api.todoist.com/rest/v2/projects" | ||||
| TASKS_URL = "https://api.todoist.com/rest/v2/tasks" | ||||
| COMPLETED_TASKS_URL = "https://api.todoist.com/api/v1/tasks/completed/by_completion_date" | ||||
| COMMENTS_URL = "https://api.todoist.com/api/v1/comments" | ||||
|  | ||||
|  | ||||
| def json_serial(obj): | ||||
| @ -119,28 +127,63 @@ def normalize_timestamp(value): | ||||
|     return str(value) | ||||
|  | ||||
|  | ||||
| def make_completed_task_key_from_dict(task): | ||||
|     task_id = str(task.get('id', '')) if isinstance(task, dict) else "" | ||||
|     if not task_id: | ||||
|         return None | ||||
|     completed_at = normalize_timestamp(task.get('completed_at')) | ||||
|     if not completed_at: | ||||
|         completed_at = normalize_timestamp(task.get('updated_at')) | ||||
|     return (task_id, completed_at) | ||||
|  | ||||
|  | ||||
| def make_completed_task_key_from_api(task): | ||||
|     task_id = getattr(task, "id", None) | ||||
|     if not task_id: | ||||
|         return None | ||||
|     completed_at = normalize_timestamp(getattr(task, "completed_at", None)) | ||||
|     if not completed_at: | ||||
|         completed_at = normalize_timestamp(getattr(task, "updated_at", None)) | ||||
|     return (str(task_id), completed_at) | ||||
|  | ||||
|  | ||||
| def merge_completed_lists(history_tasks, new_tasks): | ||||
|     merged = [] | ||||
|     seen = set() | ||||
|     index_by_key = {} | ||||
|  | ||||
|     def make_key(task): | ||||
|         task_id = str(task.get('id', '')) | ||||
|         completed_at = normalize_timestamp(task.get('completed_at')) | ||||
|         if not completed_at: | ||||
|             completed_at = normalize_timestamp(task.get('updated_at')) | ||||
|         return (task_id, completed_at) | ||||
|     def merge_task_dicts(primary, secondary, prefer_primary=True): | ||||
|         for key, value in secondary.items(): | ||||
|             if key == 'comments': | ||||
|                 if (not primary.get('comments')) and value: | ||||
|                     primary['comments'] = value | ||||
|                 continue | ||||
|             if key == 'attachments': | ||||
|                 if (not primary.get('attachments')) and value: | ||||
|                     primary['attachments'] = value | ||||
|                 continue | ||||
|             if key not in primary or primary[key] in (None, "", [], {}): | ||||
|                 primary[key] = value | ||||
|                 continue | ||||
|             if not prefer_primary: | ||||
|                 primary[key] = value | ||||
|         return primary | ||||
|  | ||||
|     def add_task(task): | ||||
|         key = make_key(task) | ||||
|         if key in seen: | ||||
|     def add_or_merge(task, prefer_existing=True): | ||||
|         key = make_completed_task_key_from_dict(task) | ||||
|         if key is None: | ||||
|             merged.append(task) | ||||
|             return | ||||
|         seen.add(key) | ||||
|         merged.append(task) | ||||
|         if key in index_by_key: | ||||
|             idx = index_by_key[key] | ||||
|             merge_task_dicts(merged[idx], task, prefer_primary=prefer_existing) | ||||
|         else: | ||||
|             merged.append(task) | ||||
|             index_by_key[key] = len(merged) - 1 | ||||
|  | ||||
|     for item in new_tasks: | ||||
|         add_task(item) | ||||
|         add_or_merge(item, prefer_existing=True) | ||||
|     for item in history_tasks: | ||||
|         add_task(item) | ||||
|         add_or_merge(item, prefer_existing=True) | ||||
|  | ||||
|     def sort_key(task): | ||||
|         completed_at = normalize_timestamp(task.get('completed_at')) | ||||
| @ -232,11 +275,12 @@ def _get_retry_delay(response, attempt, base_delay=5, max_delay=120): | ||||
|     return min(max_delay, base_delay * (2 ** attempt)) | ||||
|  | ||||
|  | ||||
| def execute_with_rate_limit(func, *args, **kwargs): | ||||
| def execute_with_rate_limit(func, *args, max_attempts=5, request_desc=None, **kwargs): | ||||
|     attempts = 0 | ||||
|     max_attempts = 5 | ||||
|     desc = request_desc or getattr(func, "__name__", "call") | ||||
|     while True: | ||||
|         try: | ||||
|             print(f"  Calling {desc}") | ||||
|             return func(*args, **kwargs) | ||||
|         except Exception as error:  # pylint: disable=broad-except | ||||
|             status_code = getattr(error, "status_code", None) | ||||
| @ -246,7 +290,9 @@ def execute_with_rate_limit(func, *args, **kwargs): | ||||
|             if status_code == 429 and attempts < max_attempts: | ||||
|                 delay = _get_retry_delay(response, attempts) | ||||
|                 attempts += 1 | ||||
|                 print(f"Rate limit hit for {func.__name__}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...") | ||||
|                 print(f"  Rate limit hit for {desc}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...") | ||||
|                 if delay > 1: | ||||
|                     print(f"  Waiting {delay} seconds due to rate limiting") | ||||
|                 time.sleep(delay) | ||||
|                 continue | ||||
|             raise | ||||
| @ -255,7 +301,10 @@ def execute_with_rate_limit(func, *args, **kwargs): | ||||
| def fetch_all_projects(api): | ||||
|     projects_by_id = {} | ||||
|     try: | ||||
|         projects_iter = execute_with_rate_limit(api.get_projects) | ||||
|         projects_iter = execute_with_rate_limit( | ||||
|             api.get_projects, | ||||
|             request_desc=f"GET {PROJECTS_URL}" | ||||
|         ) | ||||
|         for batch in projects_iter: | ||||
|             for project in batch: | ||||
|                 projects_by_id[str(getattr(project, "id", ""))] = project | ||||
| @ -267,7 +316,10 @@ def fetch_all_projects(api): | ||||
| def fetch_active_tasks_by_project(api): | ||||
|     tasks_by_project = defaultdict(list) | ||||
|     try: | ||||
|         tasks_iter = execute_with_rate_limit(api.get_tasks) | ||||
|         tasks_iter = execute_with_rate_limit( | ||||
|             api.get_tasks, | ||||
|             request_desc=f"GET {TASKS_URL}" | ||||
|         ) | ||||
|         for batch in tasks_iter: | ||||
|             for task in batch: | ||||
|                 tasks_by_project[str(getattr(task, "project_id", ""))].append(task) | ||||
| @ -280,8 +332,10 @@ def fetch_active_tasks_by_project(api): | ||||
| def fetch_completed_tasks_by_project(api, since, until): | ||||
|     tasks_by_project = defaultdict(list) | ||||
|     try: | ||||
|         query = f"?since={since.isoformat()}&until={until.isoformat()}" | ||||
|         completed_iter = execute_with_rate_limit( | ||||
|             api.get_completed_tasks_by_completion_date, | ||||
|             request_desc=f"GET {COMPLETED_TASKS_URL}{query}", | ||||
|             since=since, | ||||
|             until=until, | ||||
|         ) | ||||
| @ -297,29 +351,78 @@ def fetch_completed_tasks_by_project(api, since, until): | ||||
| def fetch_comments_by_task(api, project_ids, task_ids): | ||||
|     comments_by_task = defaultdict(list) | ||||
|     total_comments = 0 | ||||
|     last_comment_call = 0.0 | ||||
|  | ||||
|     def throttled_get_comments(**kwargs): | ||||
|         nonlocal last_comment_call | ||||
|         elapsed = time.time() - last_comment_call | ||||
|         if elapsed < COMMENT_REQUEST_MIN_INTERVAL: | ||||
|             time.sleep(COMMENT_REQUEST_MIN_INTERVAL - elapsed) | ||||
|         params = [] | ||||
|         for key, value in kwargs.items(): | ||||
|             if value is None: | ||||
|                 continue | ||||
|             params.append(f"{key}={quote_plus(str(value))}") | ||||
|         query = "&".join(params) | ||||
|         desc = f"GET {COMMENTS_URL}{('?' + query) if query else ''}" | ||||
|         result = execute_with_rate_limit( | ||||
|             api.get_comments, | ||||
|             max_attempts=COMMENT_MAX_ATTEMPTS, | ||||
|             request_desc=desc, | ||||
|             **kwargs, | ||||
|         ) | ||||
|         last_comment_call = time.time() | ||||
|         return result | ||||
|  | ||||
|     def handle_comment_error(scope, identifier, error): | ||||
|         status_code = getattr(error, "status_code", None) | ||||
|         response = getattr(error, "response", None) | ||||
|         if status_code is None and response is not None: | ||||
|             status_code = getattr(response, "status_code", None) | ||||
|         if status_code == 404: | ||||
|             print(f"  Comments not found for {scope} {identifier} (404). Skipping.") | ||||
|             return False | ||||
|         if status_code == 429: | ||||
|             delay = _get_retry_delay(response, COMMENT_MAX_ATTEMPTS) | ||||
|             print( | ||||
|                 f"  Rate limit while fetching comments for {scope} {identifier} after retries; waiting {delay} seconds before continuing." | ||||
|             ) | ||||
|             if delay > 1: | ||||
|                 print(f"  Waiting {delay} seconds due to rate limiting") | ||||
|             time.sleep(delay) | ||||
|             return True | ||||
|         print(f"  Error fetching comments for {scope} {identifier}: {error}") | ||||
|         return False | ||||
|  | ||||
|     for project_id in project_ids: | ||||
|         try: | ||||
|             comments_iter = execute_with_rate_limit(api.get_comments, project_id=project_id) | ||||
|             for batch in comments_iter: | ||||
|                 for comment in batch: | ||||
|                     task_id = str(getattr(comment, "task_id", "")) | ||||
|                     if task_id: | ||||
|                         comments_by_task[task_id].append(comment) | ||||
|                         total_comments += 1 | ||||
|         except Exception as error:  # pylint: disable=broad-except | ||||
|             print(f"Error fetching comments for project {project_id}: {error}") | ||||
|         while True: | ||||
|             try: | ||||
|                 comments_iter = throttled_get_comments(project_id=project_id) | ||||
|                 for batch in comments_iter: | ||||
|                     for comment in batch: | ||||
|                         task_id = str(getattr(comment, "task_id", "")) | ||||
|                         if task_id: | ||||
|                             comments_by_task[task_id].append(comment) | ||||
|                             total_comments += 1 | ||||
|                 break | ||||
|             except Exception as error:  # pylint: disable=broad-except | ||||
|                 if not handle_comment_error("project", project_id, error): | ||||
|                     break | ||||
|     missing_task_ids = [task_id for task_id in task_ids if task_id not in comments_by_task] | ||||
|     for task_id in missing_task_ids: | ||||
|         try: | ||||
|             comments_iter = execute_with_rate_limit(api.get_comments, task_id=task_id) | ||||
|             for batch in comments_iter: | ||||
|                 for comment in batch: | ||||
|                     key = str(getattr(comment, "task_id", "")) | ||||
|                     if key: | ||||
|                         comments_by_task[key].append(comment) | ||||
|                         total_comments += 1 | ||||
|         except Exception as error:  # pylint: disable=broad-except | ||||
|             print(f"Error fetching comments for task {task_id}: {error}") | ||||
|         while True: | ||||
|             try: | ||||
|                 comments_iter = throttled_get_comments(task_id=task_id) | ||||
|                 for batch in comments_iter: | ||||
|                     for comment in batch: | ||||
|                         key = str(getattr(comment, "task_id", "")) | ||||
|                         if key: | ||||
|                             comments_by_task[key].append(comment) | ||||
|                             total_comments += 1 | ||||
|                 break | ||||
|             except Exception as error:  # pylint: disable=broad-except | ||||
|                 if not handle_comment_error("task", task_id, error): | ||||
|                     break | ||||
|     print( | ||||
|         f"Fetched {total_comments} comments mapped to {len(comments_by_task)} tasks" | ||||
|     ) | ||||
| @ -429,26 +532,37 @@ def main(): | ||||
|     until = datetime.now() | ||||
|     active_tasks_by_project = fetch_active_tasks_by_project(api) | ||||
|     completed_tasks_by_project = fetch_completed_tasks_by_project(api, since=since, until=until) | ||||
|     comment_project_ids = sorted( | ||||
|     completed_history = load_completed_history() | ||||
|     history_by_key = {} | ||||
|     for task_list in completed_history.values(): | ||||
|         for stored_task in task_list: | ||||
|             key = make_completed_task_key_from_dict(stored_task) | ||||
|             if key: | ||||
|                 history_by_key[key] = stored_task | ||||
|  | ||||
|     active_comment_project_ids = sorted( | ||||
|         pid | ||||
|         for pid in (set(active_tasks_by_project.keys()) | set(completed_tasks_by_project.keys())) | ||||
|         if pid | ||||
|         for pid, tasks in active_tasks_by_project.items() | ||||
|         if pid and tasks | ||||
|     ) | ||||
|     task_ids_for_comments: set[str] = set() | ||||
|     for task_list in active_tasks_by_project.values(): | ||||
|         for task in task_list: | ||||
|             task_id = getattr(task, "id", None) | ||||
|             if task_id: | ||||
|                 task_ids_for_comments.add(str(task_id)) | ||||
|     completed_task_ids_for_comments: set[str] = set() | ||||
|     skipped_completed_history = {} | ||||
|     for task_list in completed_tasks_by_project.values(): | ||||
|         for task in task_list: | ||||
|             task_id = getattr(task, "id", None) | ||||
|             if task_id: | ||||
|                 task_ids_for_comments.add(str(task_id)) | ||||
|             key = make_completed_task_key_from_api(task) | ||||
|             if key is None: | ||||
|                 continue | ||||
|             history_entry = history_by_key.get(key) | ||||
|             if history_entry: | ||||
|                 skipped_completed_history[key] = history_entry | ||||
|             else: | ||||
|                 completed_task_ids_for_comments.add(key[0]) | ||||
|  | ||||
|     comments_by_task = fetch_comments_by_task( | ||||
|         api, comment_project_ids, sorted(task_ids_for_comments) | ||||
|         api, | ||||
|         active_comment_project_ids, | ||||
|         sorted(completed_task_ids_for_comments), | ||||
|     ) | ||||
|     completed_history = load_completed_history() | ||||
|     updated_history = {} | ||||
|     data = [] | ||||
|     for project in projects: | ||||
| @ -460,6 +574,15 @@ def main(): | ||||
|         processed_active = [process_task(t, comments_by_task) for t in active_tasks] | ||||
|         processed_completed = [process_task(t, comments_by_task) for t in completed_tasks] | ||||
|  | ||||
|         for task in processed_completed: | ||||
|             key = make_completed_task_key_from_dict(task) | ||||
|             history_entry = skipped_completed_history.get(key) if key else None | ||||
|             if history_entry: | ||||
|                 if (not task.get('comments')) and history_entry.get('comments'): | ||||
|                     task['comments'] = copy.deepcopy(history_entry['comments']) | ||||
|                 if (not task.get('attachments')) and history_entry.get('attachments'): | ||||
|                     task['attachments'] = copy.deepcopy(history_entry['attachments']) | ||||
|  | ||||
|         # Build hierarchy for active tasks | ||||
|         project_dict['tasks'] = build_task_hierarchy(processed_active) | ||||
|  | ||||
|  | ||||
		Reference in New Issue
	
	Block a user