Add API debug output and optimize comment fetching for completed tasks
This commit is contained in:
@ -9,6 +9,7 @@ Todoist is a SaaS task manager. Todoist provides backups of current tasks, but d
|
||||
- Downloads attachments to `output/attachments/` and references them in the JSON and HTML output
|
||||
- JSON and HTML files are named with the current date when the script is run
|
||||
- Maintains `Todoist-Completed-History.json` so completed tasks older than Todoist's 90-day API window stay in future exports
|
||||
- Reuses archived comments for completed tasks to avoid unnecessary API calls (assumes no new comments after completion)
|
||||
|
||||
## Setup
|
||||
- Ensure you have Python 3.8 or newer installed. Check with `python --version` on the command line.
|
||||
|
@ -4,7 +4,9 @@ import json
|
||||
import time
|
||||
import getpass
|
||||
import shutil
|
||||
import copy
|
||||
from collections import defaultdict
|
||||
from urllib.parse import quote_plus
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
from todoist_api_python.api import TodoistAPI
|
||||
@ -15,6 +17,12 @@ ATTACHMENTS_DIR = os.path.join(OUTPUT_DIR, "attachments")
|
||||
LEGACY_ATTACHMENTS_DIR = "attachments"
|
||||
TODOIST_API_TOKEN: str | None = None
|
||||
COMPLETED_HISTORY_FILE = "Todoist-Completed-History.json"
|
||||
COMMENT_REQUEST_MIN_INTERVAL = 0.5 # seconds
|
||||
COMMENT_MAX_ATTEMPTS = 8
|
||||
PROJECTS_URL = "https://api.todoist.com/rest/v2/projects"
|
||||
TASKS_URL = "https://api.todoist.com/rest/v2/tasks"
|
||||
COMPLETED_TASKS_URL = "https://api.todoist.com/api/v1/tasks/completed/by_completion_date"
|
||||
COMMENTS_URL = "https://api.todoist.com/api/v1/comments"
|
||||
|
||||
|
||||
def json_serial(obj):
|
||||
@ -119,28 +127,63 @@ def normalize_timestamp(value):
|
||||
return str(value)
|
||||
|
||||
|
||||
def merge_completed_lists(history_tasks, new_tasks):
|
||||
merged = []
|
||||
seen = set()
|
||||
|
||||
def make_key(task):
|
||||
task_id = str(task.get('id', ''))
|
||||
def make_completed_task_key_from_dict(task):
|
||||
task_id = str(task.get('id', '')) if isinstance(task, dict) else ""
|
||||
if not task_id:
|
||||
return None
|
||||
completed_at = normalize_timestamp(task.get('completed_at'))
|
||||
if not completed_at:
|
||||
completed_at = normalize_timestamp(task.get('updated_at'))
|
||||
return (task_id, completed_at)
|
||||
|
||||
def add_task(task):
|
||||
key = make_key(task)
|
||||
if key in seen:
|
||||
return
|
||||
seen.add(key)
|
||||
|
||||
def make_completed_task_key_from_api(task):
|
||||
task_id = getattr(task, "id", None)
|
||||
if not task_id:
|
||||
return None
|
||||
completed_at = normalize_timestamp(getattr(task, "completed_at", None))
|
||||
if not completed_at:
|
||||
completed_at = normalize_timestamp(getattr(task, "updated_at", None))
|
||||
return (str(task_id), completed_at)
|
||||
|
||||
|
||||
def merge_completed_lists(history_tasks, new_tasks):
|
||||
merged = []
|
||||
index_by_key = {}
|
||||
|
||||
def merge_task_dicts(primary, secondary, prefer_primary=True):
|
||||
for key, value in secondary.items():
|
||||
if key == 'comments':
|
||||
if (not primary.get('comments')) and value:
|
||||
primary['comments'] = value
|
||||
continue
|
||||
if key == 'attachments':
|
||||
if (not primary.get('attachments')) and value:
|
||||
primary['attachments'] = value
|
||||
continue
|
||||
if key not in primary or primary[key] in (None, "", [], {}):
|
||||
primary[key] = value
|
||||
continue
|
||||
if not prefer_primary:
|
||||
primary[key] = value
|
||||
return primary
|
||||
|
||||
def add_or_merge(task, prefer_existing=True):
|
||||
key = make_completed_task_key_from_dict(task)
|
||||
if key is None:
|
||||
merged.append(task)
|
||||
return
|
||||
if key in index_by_key:
|
||||
idx = index_by_key[key]
|
||||
merge_task_dicts(merged[idx], task, prefer_primary=prefer_existing)
|
||||
else:
|
||||
merged.append(task)
|
||||
index_by_key[key] = len(merged) - 1
|
||||
|
||||
for item in new_tasks:
|
||||
add_task(item)
|
||||
add_or_merge(item, prefer_existing=True)
|
||||
for item in history_tasks:
|
||||
add_task(item)
|
||||
add_or_merge(item, prefer_existing=True)
|
||||
|
||||
def sort_key(task):
|
||||
completed_at = normalize_timestamp(task.get('completed_at'))
|
||||
@ -232,11 +275,12 @@ def _get_retry_delay(response, attempt, base_delay=5, max_delay=120):
|
||||
return min(max_delay, base_delay * (2 ** attempt))
|
||||
|
||||
|
||||
def execute_with_rate_limit(func, *args, **kwargs):
|
||||
def execute_with_rate_limit(func, *args, max_attempts=5, request_desc=None, **kwargs):
|
||||
attempts = 0
|
||||
max_attempts = 5
|
||||
desc = request_desc or getattr(func, "__name__", "call")
|
||||
while True:
|
||||
try:
|
||||
print(f" Calling {desc}")
|
||||
return func(*args, **kwargs)
|
||||
except Exception as error: # pylint: disable=broad-except
|
||||
status_code = getattr(error, "status_code", None)
|
||||
@ -246,7 +290,9 @@ def execute_with_rate_limit(func, *args, **kwargs):
|
||||
if status_code == 429 and attempts < max_attempts:
|
||||
delay = _get_retry_delay(response, attempts)
|
||||
attempts += 1
|
||||
print(f"Rate limit hit for {func.__name__}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...")
|
||||
print(f" Rate limit hit for {desc}. Waiting {delay} seconds before retry {attempts}/{max_attempts}...")
|
||||
if delay > 1:
|
||||
print(f" Waiting {delay} seconds due to rate limiting")
|
||||
time.sleep(delay)
|
||||
continue
|
||||
raise
|
||||
@ -255,7 +301,10 @@ def execute_with_rate_limit(func, *args, **kwargs):
|
||||
def fetch_all_projects(api):
|
||||
projects_by_id = {}
|
||||
try:
|
||||
projects_iter = execute_with_rate_limit(api.get_projects)
|
||||
projects_iter = execute_with_rate_limit(
|
||||
api.get_projects,
|
||||
request_desc=f"GET {PROJECTS_URL}"
|
||||
)
|
||||
for batch in projects_iter:
|
||||
for project in batch:
|
||||
projects_by_id[str(getattr(project, "id", ""))] = project
|
||||
@ -267,7 +316,10 @@ def fetch_all_projects(api):
|
||||
def fetch_active_tasks_by_project(api):
|
||||
tasks_by_project = defaultdict(list)
|
||||
try:
|
||||
tasks_iter = execute_with_rate_limit(api.get_tasks)
|
||||
tasks_iter = execute_with_rate_limit(
|
||||
api.get_tasks,
|
||||
request_desc=f"GET {TASKS_URL}"
|
||||
)
|
||||
for batch in tasks_iter:
|
||||
for task in batch:
|
||||
tasks_by_project[str(getattr(task, "project_id", ""))].append(task)
|
||||
@ -280,8 +332,10 @@ def fetch_active_tasks_by_project(api):
|
||||
def fetch_completed_tasks_by_project(api, since, until):
|
||||
tasks_by_project = defaultdict(list)
|
||||
try:
|
||||
query = f"?since={since.isoformat()}&until={until.isoformat()}"
|
||||
completed_iter = execute_with_rate_limit(
|
||||
api.get_completed_tasks_by_completion_date,
|
||||
request_desc=f"GET {COMPLETED_TASKS_URL}{query}",
|
||||
since=since,
|
||||
until=until,
|
||||
)
|
||||
@ -297,29 +351,78 @@ def fetch_completed_tasks_by_project(api, since, until):
|
||||
def fetch_comments_by_task(api, project_ids, task_ids):
|
||||
comments_by_task = defaultdict(list)
|
||||
total_comments = 0
|
||||
last_comment_call = 0.0
|
||||
|
||||
def throttled_get_comments(**kwargs):
|
||||
nonlocal last_comment_call
|
||||
elapsed = time.time() - last_comment_call
|
||||
if elapsed < COMMENT_REQUEST_MIN_INTERVAL:
|
||||
time.sleep(COMMENT_REQUEST_MIN_INTERVAL - elapsed)
|
||||
params = []
|
||||
for key, value in kwargs.items():
|
||||
if value is None:
|
||||
continue
|
||||
params.append(f"{key}={quote_plus(str(value))}")
|
||||
query = "&".join(params)
|
||||
desc = f"GET {COMMENTS_URL}{('?' + query) if query else ''}"
|
||||
result = execute_with_rate_limit(
|
||||
api.get_comments,
|
||||
max_attempts=COMMENT_MAX_ATTEMPTS,
|
||||
request_desc=desc,
|
||||
**kwargs,
|
||||
)
|
||||
last_comment_call = time.time()
|
||||
return result
|
||||
|
||||
def handle_comment_error(scope, identifier, error):
|
||||
status_code = getattr(error, "status_code", None)
|
||||
response = getattr(error, "response", None)
|
||||
if status_code is None and response is not None:
|
||||
status_code = getattr(response, "status_code", None)
|
||||
if status_code == 404:
|
||||
print(f" Comments not found for {scope} {identifier} (404). Skipping.")
|
||||
return False
|
||||
if status_code == 429:
|
||||
delay = _get_retry_delay(response, COMMENT_MAX_ATTEMPTS)
|
||||
print(
|
||||
f" Rate limit while fetching comments for {scope} {identifier} after retries; waiting {delay} seconds before continuing."
|
||||
)
|
||||
if delay > 1:
|
||||
print(f" Waiting {delay} seconds due to rate limiting")
|
||||
time.sleep(delay)
|
||||
return True
|
||||
print(f" Error fetching comments for {scope} {identifier}: {error}")
|
||||
return False
|
||||
|
||||
for project_id in project_ids:
|
||||
while True:
|
||||
try:
|
||||
comments_iter = execute_with_rate_limit(api.get_comments, project_id=project_id)
|
||||
comments_iter = throttled_get_comments(project_id=project_id)
|
||||
for batch in comments_iter:
|
||||
for comment in batch:
|
||||
task_id = str(getattr(comment, "task_id", ""))
|
||||
if task_id:
|
||||
comments_by_task[task_id].append(comment)
|
||||
total_comments += 1
|
||||
break
|
||||
except Exception as error: # pylint: disable=broad-except
|
||||
print(f"Error fetching comments for project {project_id}: {error}")
|
||||
if not handle_comment_error("project", project_id, error):
|
||||
break
|
||||
missing_task_ids = [task_id for task_id in task_ids if task_id not in comments_by_task]
|
||||
for task_id in missing_task_ids:
|
||||
while True:
|
||||
try:
|
||||
comments_iter = execute_with_rate_limit(api.get_comments, task_id=task_id)
|
||||
comments_iter = throttled_get_comments(task_id=task_id)
|
||||
for batch in comments_iter:
|
||||
for comment in batch:
|
||||
key = str(getattr(comment, "task_id", ""))
|
||||
if key:
|
||||
comments_by_task[key].append(comment)
|
||||
total_comments += 1
|
||||
break
|
||||
except Exception as error: # pylint: disable=broad-except
|
||||
print(f"Error fetching comments for task {task_id}: {error}")
|
||||
if not handle_comment_error("task", task_id, error):
|
||||
break
|
||||
print(
|
||||
f"Fetched {total_comments} comments mapped to {len(comments_by_task)} tasks"
|
||||
)
|
||||
@ -429,26 +532,37 @@ def main():
|
||||
until = datetime.now()
|
||||
active_tasks_by_project = fetch_active_tasks_by_project(api)
|
||||
completed_tasks_by_project = fetch_completed_tasks_by_project(api, since=since, until=until)
|
||||
comment_project_ids = sorted(
|
||||
completed_history = load_completed_history()
|
||||
history_by_key = {}
|
||||
for task_list in completed_history.values():
|
||||
for stored_task in task_list:
|
||||
key = make_completed_task_key_from_dict(stored_task)
|
||||
if key:
|
||||
history_by_key[key] = stored_task
|
||||
|
||||
active_comment_project_ids = sorted(
|
||||
pid
|
||||
for pid in (set(active_tasks_by_project.keys()) | set(completed_tasks_by_project.keys()))
|
||||
if pid
|
||||
for pid, tasks in active_tasks_by_project.items()
|
||||
if pid and tasks
|
||||
)
|
||||
task_ids_for_comments: set[str] = set()
|
||||
for task_list in active_tasks_by_project.values():
|
||||
for task in task_list:
|
||||
task_id = getattr(task, "id", None)
|
||||
if task_id:
|
||||
task_ids_for_comments.add(str(task_id))
|
||||
completed_task_ids_for_comments: set[str] = set()
|
||||
skipped_completed_history = {}
|
||||
for task_list in completed_tasks_by_project.values():
|
||||
for task in task_list:
|
||||
task_id = getattr(task, "id", None)
|
||||
if task_id:
|
||||
task_ids_for_comments.add(str(task_id))
|
||||
key = make_completed_task_key_from_api(task)
|
||||
if key is None:
|
||||
continue
|
||||
history_entry = history_by_key.get(key)
|
||||
if history_entry:
|
||||
skipped_completed_history[key] = history_entry
|
||||
else:
|
||||
completed_task_ids_for_comments.add(key[0])
|
||||
|
||||
comments_by_task = fetch_comments_by_task(
|
||||
api, comment_project_ids, sorted(task_ids_for_comments)
|
||||
api,
|
||||
active_comment_project_ids,
|
||||
sorted(completed_task_ids_for_comments),
|
||||
)
|
||||
completed_history = load_completed_history()
|
||||
updated_history = {}
|
||||
data = []
|
||||
for project in projects:
|
||||
@ -460,6 +574,15 @@ def main():
|
||||
processed_active = [process_task(t, comments_by_task) for t in active_tasks]
|
||||
processed_completed = [process_task(t, comments_by_task) for t in completed_tasks]
|
||||
|
||||
for task in processed_completed:
|
||||
key = make_completed_task_key_from_dict(task)
|
||||
history_entry = skipped_completed_history.get(key) if key else None
|
||||
if history_entry:
|
||||
if (not task.get('comments')) and history_entry.get('comments'):
|
||||
task['comments'] = copy.deepcopy(history_entry['comments'])
|
||||
if (not task.get('attachments')) and history_entry.get('attachments'):
|
||||
task['attachments'] = copy.deepcopy(history_entry['attachments'])
|
||||
|
||||
# Build hierarchy for active tasks
|
||||
project_dict['tasks'] = build_task_hierarchy(processed_active)
|
||||
|
||||
|
Reference in New Issue
Block a user