feat(conversions): end-to-end PPT/PPTX/ODP -> PDF pipeline with RQ worker + Gotenberg
DB/model Add Conversion model + ConversionStatus enum (pending, processing, ready, failed) Alembic migrations: create conversions table, indexes, unique (source_event_media_id, target_format, file_hash), and NOT NULL on file_hash API Enqueue on upload (ppt|pptx|odp) in routes/eventmedia.py: compute sha256, upsert Conversion, enqueue job New routes: POST /api/conversions/<media_id>/pdf — ensure/enqueue conversion GET /api/conversions/<media_id>/status — latest status/details GET /api/files/converted/<path> — serve converted PDFs Register conversions blueprint in wsgi Worker server/worker.py: convert_event_media_to_pdf Calls Gotenberg /forms/libreoffice/convert, writes to server/media/converted/ Updates Conversion status, timestamps, error messages Fix media root resolution to /server/media Prefer function enqueue over string path; expose server.worker in package init for RQ string compatibility Queue/infra server/task_queue.py: RQ queue helper (REDIS_URL, default redis://redis:6379/0) docker-compose: Add redis and gotenberg services Add worker service (rq worker conversions) Pass REDIS_URL and GOTENBERG_URL to server/worker Mount shared media volume in prod for API/worker parity docker-compose.override: Add dev redis/gotenberg/worker services Ensure PYTHONPATH + working_dir allow importing server.worker Use rq CLI instead of python -m rq for worker Dashboard dev: run as appropriate user/root and pre-create/chown caches to avoid EACCES Dashboard dev UX Vite: set cacheDir .vite to avoid EACCES in node_modules Disable Node inspector by default to avoid port conflicts Docs Update copilot-instructions.md with conversion system: flow, services, env vars, endpoints, storage paths, and data model
This commit is contained in:
94
server/routes/conversions.py
Normal file
94
server/routes/conversions.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from flask import Blueprint, jsonify, request
|
||||
from server.database import Session
|
||||
from models.models import Conversion, ConversionStatus, EventMedia, MediaType
|
||||
from server.task_queue import get_queue
|
||||
from server.worker import convert_event_media_to_pdf
|
||||
from datetime import datetime, timezone
|
||||
import hashlib
|
||||
|
||||
conversions_bp = Blueprint("conversions", __name__,
|
||||
url_prefix="/api/conversions")
|
||||
|
||||
|
||||
def sha256_file(abs_path: str) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(abs_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
@conversions_bp.route("/<int:media_id>/pdf", methods=["POST"])
|
||||
def ensure_conversion(media_id: int):
|
||||
session = Session()
|
||||
try:
|
||||
media = session.query(EventMedia).get(media_id)
|
||||
if not media or not media.file_path:
|
||||
return jsonify({"error": "Media not found or no file"}), 404
|
||||
|
||||
# Only enqueue for office presentation formats
|
||||
if media.media_type not in {MediaType.ppt, MediaType.pptx, MediaType.odp}:
|
||||
return jsonify({"message": "No conversion required for this media_type"}), 200
|
||||
|
||||
# Compute file hash
|
||||
import os
|
||||
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
media_root = os.path.join(base_dir, "media")
|
||||
abs_source = os.path.join(media_root, media.file_path)
|
||||
file_hash = sha256_file(abs_source)
|
||||
|
||||
# Find or create conversion row
|
||||
conv = (
|
||||
session.query(Conversion)
|
||||
.filter_by(
|
||||
source_event_media_id=media.id,
|
||||
target_format="pdf",
|
||||
file_hash=file_hash,
|
||||
)
|
||||
.one_or_none()
|
||||
)
|
||||
if not conv:
|
||||
conv = Conversion(
|
||||
source_event_media_id=media.id,
|
||||
target_format="pdf",
|
||||
status=ConversionStatus.pending,
|
||||
file_hash=file_hash,
|
||||
)
|
||||
session.add(conv)
|
||||
session.commit()
|
||||
|
||||
# Enqueue if not already processing/ready
|
||||
if conv.status in {ConversionStatus.pending, ConversionStatus.failed}:
|
||||
q = get_queue()
|
||||
job = q.enqueue(convert_event_media_to_pdf, conv.id)
|
||||
return jsonify({"id": conv.id, "status": conv.status.value, "job_id": job.get_id()}), 202
|
||||
else:
|
||||
return jsonify({"id": conv.id, "status": conv.status.value, "target_path": conv.target_path}), 200
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
|
||||
@conversions_bp.route("/<int:media_id>/status", methods=["GET"])
|
||||
def conversion_status(media_id: int):
|
||||
session = Session()
|
||||
try:
|
||||
conv = (
|
||||
session.query(Conversion)
|
||||
.filter_by(source_event_media_id=media_id, target_format="pdf")
|
||||
.order_by(Conversion.id.desc())
|
||||
.first()
|
||||
)
|
||||
if not conv:
|
||||
return jsonify({"status": "missing"}), 404
|
||||
return jsonify(
|
||||
{
|
||||
"id": conv.id,
|
||||
"status": conv.status.value,
|
||||
"target_path": conv.target_path,
|
||||
"started_at": conv.started_at.isoformat() if conv.started_at else None,
|
||||
"completed_at": conv.completed_at.isoformat() if conv.completed_at else None,
|
||||
"error_message": conv.error_message,
|
||||
}
|
||||
)
|
||||
finally:
|
||||
session.close()
|
||||
@@ -1,7 +1,10 @@
|
||||
from re import A
|
||||
from flask import Blueprint, request, jsonify, send_from_directory
|
||||
from server.database import Session
|
||||
from models.models import EventMedia, MediaType
|
||||
from models.models import EventMedia, MediaType, Conversion, ConversionStatus
|
||||
from server.task_queue import get_queue
|
||||
from server.worker import convert_event_media_to_pdf
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
eventmedia_bp = Blueprint('eventmedia', __name__, url_prefix='/api/eventmedia')
|
||||
@@ -134,6 +137,41 @@ def filemanager_upload():
|
||||
uploaded_at=datetime.now(timezone.utc)
|
||||
)
|
||||
session.add(media)
|
||||
session.commit()
|
||||
|
||||
# Enqueue conversion for office presentation types
|
||||
if media_type in {MediaType.ppt, MediaType.pptx, MediaType.odp}:
|
||||
# compute file hash
|
||||
h = hashlib.sha256()
|
||||
with open(file_path, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
h.update(chunk)
|
||||
file_hash = h.hexdigest()
|
||||
|
||||
# upsert Conversion row
|
||||
conv = (
|
||||
session.query(Conversion)
|
||||
.filter_by(
|
||||
source_event_media_id=media.id,
|
||||
target_format='pdf',
|
||||
file_hash=file_hash,
|
||||
)
|
||||
.one_or_none()
|
||||
)
|
||||
if not conv:
|
||||
conv = Conversion(
|
||||
source_event_media_id=media.id,
|
||||
target_format='pdf',
|
||||
status=ConversionStatus.pending,
|
||||
file_hash=file_hash,
|
||||
)
|
||||
session.add(conv)
|
||||
session.commit()
|
||||
|
||||
if conv.status in {ConversionStatus.pending, ConversionStatus.failed}:
|
||||
q = get_queue()
|
||||
q.enqueue(convert_event_media_to_pdf, conv.id)
|
||||
|
||||
session.commit()
|
||||
return jsonify({'success': True})
|
||||
|
||||
|
||||
@@ -55,3 +55,14 @@ def download_media_file(media_id: int, filename: str):
|
||||
served_name = os.path.basename(abs_path)
|
||||
session.close()
|
||||
return send_from_directory(directory, served_name, as_attachment=True)
|
||||
|
||||
|
||||
@files_bp.route("/converted/<path:relpath>", methods=["GET"])
|
||||
def download_converted(relpath: str):
|
||||
"""Serve converted files (e.g., PDFs) relative to media/converted."""
|
||||
abs_path = os.path.join(MEDIA_ROOT, relpath)
|
||||
if not abs_path.startswith(MEDIA_ROOT):
|
||||
return jsonify({"error": "Invalid path"}), 400
|
||||
if not os.path.isfile(abs_path):
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
return send_from_directory(os.path.dirname(abs_path), os.path.basename(abs_path), as_attachment=True)
|
||||
|
||||
Reference in New Issue
Block a user