Nest subtasks, Fix attachment handling

This commit is contained in:
2025-10-18 11:53:42 -04:00
parent 43ad7ff17e
commit 406f8cef0b
2 changed files with 328 additions and 95 deletions

View File

@ -9,6 +9,7 @@ from todoist_api_python.api import TodoistAPI
from jinja2 import Environment, FileSystemLoader, select_autoescape
ATTACHMENTS_DIR = "attachments"
TODOIST_API_TOKEN: str | None = None
def usage():
@ -38,19 +39,66 @@ def ensure_attachments_dir():
os.makedirs(ATTACHMENTS_DIR)
def _file_looks_like_html(path):
try:
with open(path, 'rb') as handle:
prefix = handle.read(256)
except OSError:
return False
if not prefix:
return True
snippet = prefix.lstrip().lower()
return snippet.startswith(b"<!doctype") or snippet.startswith(b"<html")
def download_attachment(url, filename):
local_path = os.path.join(ATTACHMENTS_DIR, filename)
if os.path.exists(local_path):
return local_path
if _file_looks_like_html(local_path) and not filename.lower().endswith(('.htm', '.html')):
try:
os.remove(local_path)
except OSError:
pass
else:
return local_path
print(f"Downloading attachment {url}")
r = requests.get(url, stream=True)
if r.status_code == 200:
with open(local_path, 'wb') as f:
for chunk in r.iter_content(1024):
f.write(chunk)
return local_path
else:
headers = {}
if TODOIST_API_TOKEN:
headers["Authorization"] = f"Bearer {TODOIST_API_TOKEN}"
try:
response = requests.get(url, stream=True, headers=headers, timeout=30)
except requests.RequestException as exc: # pylint: disable=broad-except
print(f"Failed to download attachment {url}: {exc}")
return None
if response.status_code != 200:
print(f"Failed to download attachment {url}: HTTP {response.status_code}")
return None
content_type = (response.headers.get("Content-Type") or "").lower()
first_chunk = b""
try:
with open(local_path, 'wb') as handle:
for chunk in response.iter_content(chunk_size=8192):
if not chunk:
continue
if not first_chunk:
first_chunk = chunk
handle.write(chunk)
except OSError as exc: # pylint: disable=broad-except
print(f"Failed to save attachment {filename}: {exc}")
return None
looks_like_html = (
"text/html" in content_type
or (first_chunk and _file_looks_like_html(local_path))
)
if looks_like_html and not filename.lower().endswith(('.htm', '.html')):
try:
os.remove(local_path)
except OSError:
pass
print(f"Skipped attachment {url}: received HTML response instead of file")
return None
print(f"Downloaded attachment {url}")
return local_path
def _get_retry_delay(response, attempt, base_delay=5, max_delay=120):
@ -134,8 +182,9 @@ def fetch_completed_tasks_by_project(api, since, until):
return tasks_by_project
def fetch_comments_by_task(api, project_ids):
def fetch_comments_by_task(api, project_ids, task_ids):
comments_by_task = defaultdict(list)
total_comments = 0
for project_id in project_ids:
try:
comments_iter = execute_with_rate_limit(api.get_comments, project_id=project_id)
@ -144,9 +193,24 @@ def fetch_comments_by_task(api, project_ids):
task_id = str(getattr(comment, "task_id", ""))
if task_id:
comments_by_task[task_id].append(comment)
total_comments += 1
except Exception as error: # pylint: disable=broad-except
print(f"Error fetching comments for project {project_id}: {error}")
print(f"Fetched comments for {len(comments_by_task)} tasks")
missing_task_ids = [task_id for task_id in task_ids if task_id not in comments_by_task]
for task_id in missing_task_ids:
try:
comments_iter = execute_with_rate_limit(api.get_comments, task_id=task_id)
for batch in comments_iter:
for comment in batch:
key = str(getattr(comment, "task_id", ""))
if key:
comments_by_task[key].append(comment)
total_comments += 1
except Exception as error: # pylint: disable=broad-except
print(f"Error fetching comments for task {task_id}: {error}")
print(
f"Fetched {total_comments} comments mapped to {len(comments_by_task)} tasks"
)
return comments_by_task
@ -171,33 +235,138 @@ def process_task(task, comments_lookup):
# Comments
comment_key = str(task_id) if task_id is not None else None
if comment_key and comment_key in comments_lookup:
task_dict['comments'] = [c.__dict__ for c in comments_lookup[comment_key]]
serialized_comments = []
for comment in comments_lookup[comment_key]:
comment_dict = comment.__dict__.copy()
attachment = getattr(comment, "attachment", None)
if attachment:
attachment_dict = attachment.__dict__.copy()
file_url = attachment_dict.get("file_url")
if file_url:
filename = attachment_dict.get("file_name") or os.path.basename(file_url)
local_path = download_attachment(file_url, filename)
if local_path:
attachment_dict['local_file'] = os.path.relpath(local_path)
comment_dict['attachment'] = attachment_dict
serialized_comments.append(comment_dict)
task_dict['comments'] = serialized_comments
return task_dict
def build_task_hierarchy(task_dicts):
task_lookup = {}
order_lookup = {}
for index, task in enumerate(task_dicts):
task_id = task.get('id')
if task_id is None:
continue
task_lookup[str(task_id)] = task
order_lookup[str(task_id)] = index
task.setdefault('subtasks', [])
roots = []
for task in task_dicts:
task_id = task.get('id')
if task_id is None:
roots.append(task)
continue
parent_id = task.get('parent_id')
if parent_id:
parent = task_lookup.get(str(parent_id))
if parent:
parent.setdefault('subtasks', [])
parent['subtasks'].append(task)
continue
roots.append(task)
def sort_children(children):
children.sort(key=lambda item: order_lookup.get(str(item.get('id')), 0))
for child in children:
child_children = child.get('subtasks') or []
if child_children:
sort_children(child_children)
sort_children(roots)
# Remove empty subtasks lists for cleanliness
def prune(task):
subtasks = task.get('subtasks')
if subtasks:
for sub in subtasks:
prune(sub)
else:
task.pop('subtasks', None)
for root in roots:
prune(root)
return roots
def main():
if len(sys.argv) != 2 or sys.argv[1] != "export":
usage()
return
ensure_attachments_dir()
api = TodoistAPI(get_api_key())
token = get_api_key()
global TODOIST_API_TOKEN # pylint: disable=global-statement
TODOIST_API_TOKEN = token
api = TodoistAPI(token)
projects = fetch_all_projects(api)
since = (datetime.now() - timedelta(days=90)).replace(hour=0, minute=0, second=0, microsecond=0)
until = datetime.now()
active_tasks_by_project = fetch_active_tasks_by_project(api)
completed_tasks_by_project = fetch_completed_tasks_by_project(api, since=since, until=until)
comment_project_ids = sorted(
pid for pid in (set(active_tasks_by_project.keys()) | set(completed_tasks_by_project.keys())) if pid
pid
for pid in (set(active_tasks_by_project.keys()) | set(completed_tasks_by_project.keys()))
if pid
)
task_ids_for_comments: set[str] = set()
for task_list in active_tasks_by_project.values():
for task in task_list:
task_id = getattr(task, "id", None)
if task_id:
task_ids_for_comments.add(str(task_id))
for task_list in completed_tasks_by_project.values():
for task in task_list:
task_id = getattr(task, "id", None)
if task_id:
task_ids_for_comments.add(str(task_id))
comments_by_task = fetch_comments_by_task(
api, comment_project_ids, sorted(task_ids_for_comments)
)
comments_by_task = fetch_comments_by_task(api, comment_project_ids)
data = []
for project in projects:
project_dict = project.__dict__.copy()
project_id = str(getattr(project, "id", ""))
active_tasks = active_tasks_by_project.get(project_id, [])
completed_tasks = completed_tasks_by_project.get(project_id, [])
project_dict['tasks'] = [process_task(t, comments_by_task) for t in active_tasks]
project_dict['completed_tasks'] = [process_task(t, comments_by_task) for t in completed_tasks]
processed_active = [process_task(t, comments_by_task) for t in active_tasks]
processed_completed = [process_task(t, comments_by_task) for t in completed_tasks]
# Build hierarchy for active tasks
project_dict['tasks'] = build_task_hierarchy(processed_active)
# Map task IDs to names for parent lookups
name_lookup = {}
for task in active_tasks + completed_tasks:
task_id = getattr(task, "id", None)
if task_id:
name_lookup[str(task_id)] = getattr(task, "content", "")
for task in processed_completed:
parent_id = task.get('parent_id')
if parent_id:
parent_name = name_lookup.get(str(parent_id))
if parent_name:
task['parent_task'] = {
"id": str(parent_id),
"content": parent_name,
}
project_dict['completed_tasks'] = processed_completed
data.append(project_dict)
# Write JSON
today = datetime.now().strftime("%Y-%m-%d")

View File

@ -5,24 +5,118 @@
<title>Todoist Backup - {{ date }}</title>
<style>
body { font-family: Arial, sans-serif; background: #f8f9fa; color: #222; margin: 0; padding: 0; }
.container { max-width: 900px; margin: 2em auto; background: #fff; padding: 2em; border-radius: 8px; box-shadow: 0 2px 8px #0001; }
.container { max-width: 960px; margin: 2em auto; background: #fff; padding: 2em; border-radius: 8px; box-shadow: 0 2px 8px #0001; }
h1, h2, h3 { color: #2d72d9; }
.project { margin-bottom: 2em; }
.task-list { margin: 0 0 1em 1em; }
.task { border-bottom: 1px solid #eee; padding: 0.5em 0; }
.completed { color: #888; }
.attachments { margin: 0.5em 0 0.5em 1em; }
.comments { margin: 0.5em 0 0.5em 1em; font-size: 0.95em; color: #444; }
.task-name { font-weight: bold; }
.field-label { font-style: italic; }
a.attachment-link { color: #2d72d9; text-decoration: underline; }
.meta { color: #666; font-size: 0.95em; }
nav ul { list-style: none; padding: 0; margin: 0; }
nav li { margin: 0.25em 0; }
nav a { text-decoration: none; color: #2d72d9; }
nav a:hover { text-decoration: underline; }
.project { margin-bottom: 3em; }
.task-list { margin: 0 0 1em 0; }
.task { border-bottom: 1px solid #eee; padding: 0.75em 0; }
.task:last-child { border-bottom: none; }
.task.level-0 { margin-left: 0; }
.task.level-1 { margin-left: 1.5em; }
.task.level-2 { margin-left: 3em; }
.task.level-3 { margin-left: 4.5em; }
.taskname { font-weight: 600; }
.taskdesc { margin: 0.35em 0; color: #555; }
.meta { color: #777; font-size: 0.9em; display: inline-block; margin-top: 0.25em; }
.field-name { font-weight: 600; }
.attachments ul,
.comments ul { margin: 0.5em 0 0 1.2em; }
.attachments li,
.comments li { margin-bottom: 0.35em; }
.attachment-link { color: #2d72d9; }
.attachment-link:hover { text-decoration: underline; }
.comments { margin-top: 0.5em; }
.comment-attachment { margin-top: 0.25em; }
.task.completed { background: #f3f6ff; padding: 0.75em; border-radius: 6px; border: 1px solid #d6e2ff; }
</style>
</head>
<body>
<div class="container">
<h1>Todoist Backup ({{ date }})</h1>
<!-- Table of Contents -->
{% macro render_task(task, level=0) %}
<div class="task level-{{ level }}">
<span class="taskname">{{ task.content | markdown | safe }}</span><br>
{% if task.description %}
<div class="taskdesc">{{ task.description | markdown | safe }}</div>
{% endif %}
<span class="meta">
{% set meta_fields = [] %}
{% if task.id is not none %}
{% set _ = meta_fields.append('ID: ' ~ task.id) %}
{% endif %}
{% if task.due and task.due.date %}
{% set due_dt = task.due.date %}
{% if due_dt.__class__.__name__ == 'datetime' or due_dt.__class__.__name__ == 'date' %}
{% set due_fmt = due_dt.strftime('%Y-%m-%d') %}
{% else %}
{% set due_str = due_dt|string %}
{% if 'T' in due_str %}
{% set due_fmt = due_str[:10] %}
{% else %}
{% set due_fmt = due_str %}
{% endif %}
{% endif %}
{% set _ = meta_fields.append('Due: ' ~ due_fmt) %}
{% endif %}
{% if task.due and task.due.is_recurring %}
{% if task.due.string %}
{% set _ = meta_fields.append('Recurring: ' ~ task.due.string) %}
{% endif %}
{% endif %}
{% if task.priority is not none %}
{% set _ = meta_fields.append('Priority: ' ~ task.priority) %}
{% endif %}
{{ meta_fields|join(' | ') }}
</span><br>
{% if task.attachments %}
<div class="attachments">
<span class="field-name">Attachments:</span>
<ul>
{% for att in task.attachments %}
<li><a class="attachment-link" href="{{ att.local_file }}" download>{{ att.file_name or att.local_file }}</a></li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if task.comments %}
<div class="comments">
<span class="field-name">Comments:</span>
<ul>
{% for comment in task.comments %}
<li>
{{ comment.content | markdown | safe }}
<span class="meta">({{ comment.posted_at }})</span>
{% set attachment = comment.attachment %}
{% if attachment and (attachment.local_file or attachment.file_url) %}
<div class="comment-attachment">
Attachment:
{% if attachment.local_file %}
<a class="attachment-link" href="{{ attachment.local_file }}" download>{{ attachment.file_name or attachment.local_file }}</a>
{% elif attachment.file_url %}
<a class="attachment-link" href="{{ attachment.file_url }}" target="_blank">{{ attachment.file_name or attachment.file_url }}</a>
{% endif %}
</div>
{% endif %}
</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if task.subtasks %}
<div class="subtasks">
{% for child in task.subtasks %}
{{ render_task(child, level + 1) }}
{% endfor %}
</div>
{% endif %}
</div>
{% endmacro %}
<nav style="margin-bottom:2em;">
<h2 style="font-size:1.2em;">Projects</h2>
<ul>
@ -31,21 +125,32 @@
{% endfor %}
</ul>
</nav>
{% for project in projects %}
<div class="project" id="project-{{ project.id }}">
<h2>{{ project.name }} {% if project.is_archived %}<span class="meta">[Archived]</span>{% endif %}</h2>
<div class="meta">
<span>ID: {{ project.id }}</span> | <span>Color: {{ project.color }}</span> | <span>Created: {{ project.created_at }}</span>
</div>
<h3>Active Tasks</h3>
<div class="task-list">
{% for task in project.tasks %}
<div class="task">
<div class="task-name">{{ task.content | markdown | safe }}</div>
{{ render_task(task, 0) }}
{% else %}
<p class="meta">No active tasks.</p>
{% endfor %}
</div>
<h3>Completed Tasks</h3>
<div class="task-list">
{% for task in project.completed_tasks %}
<div class="task completed">
<span class="task-name">{{ task.content | markdown | safe }}</span><br>
{% if task.description %}
<div class="task-desc">{{ task.description | markdown | safe }}</div>
<div class="taskdesc">{{ task.description | markdown | safe }}</div>
{% endif %}
<div class="meta">
<span class="meta">
{% set meta_fields = [] %}
{% if task.id is not none %}
{% set _ = meta_fields.append('ID: ' ~ task.id) %}
@ -72,66 +177,6 @@
{% if task.priority is not none %}
{% set _ = meta_fields.append('Priority: ' ~ task.priority) %}
{% endif %}
{{ meta_fields|join(' | ') }}
</div>
{% if task.attachments %}
<div class="attachments">
<span class="field-name">Attachments:</span>
<ul>
{% for att in task.attachments %}
<li><a class="attachment-link" href="{{ att.local_file }}" download>{{ att.file_name or att.local_file }}</a></li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if task.comments %}
<div class="comments">
<span class="field-name">Comments:</span>
<ul>
{% for comment in task.comments %}
<li>{{ comment.content }} <span class="meta">({{ comment.posted_at }})</span></li>
{% endfor %}
</ul>
</div>
{% endif %}
</div>
{% endfor %}
</div>
<h3>Completed Tasks</h3>
<div class="task-list">
{% for task in project.completed_tasks %}
<div class="task completed">
<div class="task-name">{{ task.content | markdown | safe }}</div>
{% if task.description %}
<div class="task-desc">{{ task.description | markdown | safe }}</div>
{% endif %}
<div class="meta">
{% set meta_fields = [] %}
{% if task.id is not none %}
{% set _ = meta_fields.append('ID: ' ~ task.id) %}
{% endif %}
{% if task.due and task.due.date %}
{% set due_dt = task.due.date %}
{% if due_dt.__class__.__name__ == 'datetime' or due_dt.__class__.__name__ == 'date' %}
{% set due_fmt = due_dt.strftime('%Y-%m-%d %H:%M') if due_dt.__class__.__name__ == 'datetime' else due_dt.strftime('%Y-%m-%d 00:00') %}
{% else %}
{% set due_str = due_dt|string %}
{% if 'T' in due_str %}
{% set due_fmt = due_str[:16].replace('T', ' ') %}
{% else %}
{% set due_fmt = due_str %}
{% endif %}
{% endif %}
{% set _ = meta_fields.append('Due: ' ~ due_fmt) %}
{% endif %}
{% if task.due and task.due.is_recurring %}
{% if task.due.string %}
{% set _ = meta_fields.append('Recurring: ' ~ task.due.string) %}
{% endif %}
{% endif %}
{% if task.priority is not none %}
{% set _ = meta_fields.append('Priority: ' ~ task.priority) %}
{% endif %}
{% if task.completed_at %}
{% if task.completed_at.__class__.__name__ == 'datetime' or task.completed_at.__class__.__name__ == 'date' %}
{% set completed_fmt = task.completed_at.strftime('%Y-%m-%d') %}
@ -146,7 +191,10 @@
{% set _ = meta_fields.append('Completed: ' ~ completed_fmt) %}
{% endif %}
{{ meta_fields|join(' | ') }}
</div>
</span><br>
{% if task.parent_task %}
<div class="meta">Parent task: {{ task.parent_task.content | markdown | safe }}</div>
{% endif %}
{% if task.attachments %}
<div class="attachments">
<span class="field-name">Attachments:</span>
@ -159,15 +207,31 @@
{% endif %}
{% if task.comments %}
<div class="comments">
<span class="field">Comments:</span>
<span class="field-name">Comments:</span>
<ul>
{% for comment in task.comments %}
<li>{{ comment.content }} <span class="meta">({{ comment.posted_at }})</span></li>
<li>
{{ comment.content | markdown | safe }}
<span class="meta">({{ comment.posted_at }})</span>
{% set attachment = comment.attachment %}
{% if attachment and (attachment.local_file or attachment.file_url) %}
<div class="comment-attachment">
Attachment:
{% if attachment.local_file %}
<a class="attachment-link" href="{{ attachment.local_file }}" download>{{ attachment.file_name or attachment.local_file }}</a>
{% elif attachment.file_url %}
<a class="attachment-link" href="{{ attachment.file_url }}" target="_blank">{{ attachment.file_name or attachment.file_url }}</a>
{% endif %}
</div>
{% endif %}
</li>
{% endfor %}
</ul>
</div>
{% endif %}
</div>
{% else %}
<p class="meta">No completed tasks in this period.</p>
{% endfor %}
</div>
</div>