feat(conversions): end-to-end PPT/PPTX/ODP -> PDF pipeline with RQ worker + Gotenberg
DB/model Add Conversion model + ConversionStatus enum (pending, processing, ready, failed) Alembic migrations: create conversions table, indexes, unique (source_event_media_id, target_format, file_hash), and NOT NULL on file_hash API Enqueue on upload (ppt|pptx|odp) in routes/eventmedia.py: compute sha256, upsert Conversion, enqueue job New routes: POST /api/conversions/<media_id>/pdf — ensure/enqueue conversion GET /api/conversions/<media_id>/status — latest status/details GET /api/files/converted/<path> — serve converted PDFs Register conversions blueprint in wsgi Worker server/worker.py: convert_event_media_to_pdf Calls Gotenberg /forms/libreoffice/convert, writes to server/media/converted/ Updates Conversion status, timestamps, error messages Fix media root resolution to /server/media Prefer function enqueue over string path; expose server.worker in package init for RQ string compatibility Queue/infra server/task_queue.py: RQ queue helper (REDIS_URL, default redis://redis:6379/0) docker-compose: Add redis and gotenberg services Add worker service (rq worker conversions) Pass REDIS_URL and GOTENBERG_URL to server/worker Mount shared media volume in prod for API/worker parity docker-compose.override: Add dev redis/gotenberg/worker services Ensure PYTHONPATH + working_dir allow importing server.worker Use rq CLI instead of python -m rq for worker Dashboard dev: run as appropriate user/root and pre-create/chown caches to avoid EACCES Dashboard dev UX Vite: set cacheDir .vite to avoid EACCES in node_modules Disable Node inspector by default to avoid port conflicts Docs Update copilot-instructions.md with conversion system: flow, services, env vars, endpoints, storage paths, and data model
This commit is contained in:
94
server/worker.py
Normal file
94
server/worker.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import traceback
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import requests
|
||||
from sqlalchemy.orm import Session as SASession
|
||||
|
||||
from server.database import Session
|
||||
from models.models import Conversion, ConversionStatus, EventMedia, MediaType
|
||||
|
||||
GOTENBERG_URL = os.getenv("GOTENBERG_URL", "http://gotenberg:3000")
|
||||
|
||||
|
||||
def _now():
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def convert_event_media_to_pdf(conversion_id: int):
|
||||
"""
|
||||
Job entry point: convert a single EventMedia to PDF using Gotenberg.
|
||||
|
||||
Steps:
|
||||
- Load conversion + source media
|
||||
- Set status=processing, started_at
|
||||
- POST to Gotenberg /forms/libreoffice/convert with the source file bytes
|
||||
- Save response bytes to target_path
|
||||
- Set status=ready, completed_at, target_path
|
||||
- On error: set status=failed, error_message
|
||||
"""
|
||||
session: SASession = Session()
|
||||
try:
|
||||
conv: Conversion = session.query(Conversion).get(conversion_id)
|
||||
if not conv:
|
||||
return
|
||||
|
||||
media: EventMedia = session.query(
|
||||
EventMedia).get(conv.source_event_media_id)
|
||||
if not media or not media.file_path:
|
||||
conv.status = ConversionStatus.failed
|
||||
conv.error_message = "Source media or file_path missing"
|
||||
conv.completed_at = _now()
|
||||
session.commit()
|
||||
return
|
||||
|
||||
conv.status = ConversionStatus.processing
|
||||
conv.started_at = _now()
|
||||
session.commit()
|
||||
|
||||
# Get the server directory (where this worker.py file is located)
|
||||
server_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
media_root = os.path.join(server_dir, "media")
|
||||
abs_source = os.path.join(media_root, media.file_path)
|
||||
# Output target under media/converted
|
||||
converted_dir = os.path.join(media_root, "converted")
|
||||
os.makedirs(converted_dir, exist_ok=True)
|
||||
filename_wo_ext = os.path.splitext(
|
||||
os.path.basename(media.file_path))[0]
|
||||
pdf_name = f"{filename_wo_ext}.pdf"
|
||||
abs_target = os.path.join(converted_dir, pdf_name)
|
||||
|
||||
# Send to Gotenberg
|
||||
with open(abs_source, "rb") as f:
|
||||
files = {"files": (os.path.basename(abs_source), f)}
|
||||
resp = requests.post(
|
||||
f"{GOTENBERG_URL}/forms/libreoffice/convert",
|
||||
files=files,
|
||||
timeout=600,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
with open(abs_target, "wb") as out:
|
||||
out.write(resp.content)
|
||||
|
||||
conv.status = ConversionStatus.ready
|
||||
# Store relative path under media/
|
||||
conv.target_path = os.path.relpath(abs_target, media_root)
|
||||
conv.completed_at = _now()
|
||||
session.commit()
|
||||
except requests.exceptions.Timeout:
|
||||
conv = session.query(Conversion).get(conversion_id)
|
||||
if conv:
|
||||
conv.status = ConversionStatus.failed
|
||||
conv.error_message = "Conversion timeout"
|
||||
conv.completed_at = _now()
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
conv = session.query(Conversion).get(conversion_id)
|
||||
if conv:
|
||||
conv.status = ConversionStatus.failed
|
||||
conv.error_message = f"{e}\n{traceback.format_exc()}"
|
||||
conv.completed_at = _now()
|
||||
session.commit()
|
||||
finally:
|
||||
session.close()
|
||||
Reference in New Issue
Block a user