feat(conversions): end-to-end PPT/PPTX/ODP -> PDF pipeline with RQ worker + Gotenberg

DB/model

Add Conversion model + ConversionStatus enum (pending, processing, ready, failed)
Alembic migrations: create conversions table, indexes, unique (source_event_media_id, target_format, file_hash), and NOT NULL on file_hash
API

Enqueue on upload (ppt|pptx|odp) in routes/eventmedia.py: compute sha256, upsert Conversion, enqueue job
New routes:
POST /api/conversions/<media_id>/pdf — ensure/enqueue conversion
GET /api/conversions/<media_id>/status — latest status/details
GET /api/files/converted/<path> — serve converted PDFs
Register conversions blueprint in wsgi
Worker

server/worker.py: convert_event_media_to_pdf
Calls Gotenberg /forms/libreoffice/convert, writes to server/media/converted/
Updates Conversion status, timestamps, error messages
Fix media root resolution to /server/media
Prefer function enqueue over string path; expose server.worker in package init for RQ string compatibility
Queue/infra

server/task_queue.py: RQ queue helper (REDIS_URL, default redis://redis:6379/0)
docker-compose:
Add redis and gotenberg services
Add worker service (rq worker conversions)
Pass REDIS_URL and GOTENBERG_URL to server/worker
Mount shared media volume in prod for API/worker parity
docker-compose.override:
Add dev redis/gotenberg/worker services
Ensure PYTHONPATH + working_dir allow importing server.worker
Use rq CLI instead of python -m rq for worker
Dashboard dev: run as appropriate user/root and pre-create/chown caches to avoid EACCES
Dashboard dev UX

Vite: set cacheDir .vite to avoid EACCES in node_modules
Disable Node inspector by default to avoid port conflicts
Docs

Update copilot-instructions.md with conversion system: flow, services, env vars, endpoints, storage paths, and data model
This commit is contained in:
2025-10-07 19:06:09 +00:00
parent 80bf8bc58d
commit fcc0dfbb0f
20 changed files with 1809 additions and 422 deletions

View File

@@ -0,0 +1,28 @@
"""merge heads after conversions
Revision ID: 2b627d0885c3
Revises: 5b3c1a2f8d10, 8d1df7199cb7
Create Date: 2025-10-06 20:27:53.974926
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '2b627d0885c3'
down_revision: Union[str, None] = ('5b3c1a2f8d10', '8d1df7199cb7')
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
pass
def downgrade() -> None:
"""Downgrade schema."""
pass

View File

@@ -0,0 +1,53 @@
"""Add conversions table
Revision ID: 5b3c1a2f8d10
Revises: e6eaede720aa
Create Date: 2025-10-06 12:00:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '5b3c1a2f8d10'
down_revision: Union[str, None] = 'e6eaede720aa'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
'conversions',
sa.Column('id', sa.Integer(), primary_key=True, autoincrement=True),
sa.Column('source_event_media_id', sa.Integer(), nullable=False),
sa.Column('target_format', sa.String(length=10), nullable=False),
sa.Column('target_path', sa.String(length=512), nullable=True),
sa.Column('status', sa.Enum('pending', 'processing', 'ready', 'failed', name='conversionstatus'),
nullable=False, server_default='pending'),
sa.Column('file_hash', sa.String(length=64), nullable=True),
sa.Column('started_at', sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column('completed_at', sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.ForeignKeyConstraint(['source_event_media_id'], ['event_media.id'],
name='fk_conversions_event_media', ondelete='CASCADE'),
)
op.create_index('ix_conv_source_event_media_id', 'conversions', ['source_event_media_id'])
op.create_index('ix_conversions_target_format', 'conversions', ['target_format'])
op.create_index('ix_conv_status_target', 'conversions', ['status', 'target_format'])
op.create_index('ix_conv_source_target', 'conversions', ['source_event_media_id', 'target_format'])
op.create_unique_constraint('uq_conv_source_target_hash', 'conversions',
['source_event_media_id', 'target_format', 'file_hash'])
def downgrade() -> None:
op.drop_constraint('uq_conv_source_target_hash', 'conversions', type_='unique')
op.drop_index('ix_conv_source_target', table_name='conversions')
op.drop_index('ix_conv_status_target', table_name='conversions')
op.drop_index('ix_conversions_target_format', table_name='conversions')
op.drop_index('ix_conv_source_event_media_id', table_name='conversions')
op.drop_table('conversions')

View File

@@ -0,0 +1,40 @@
"""Make conversions.file_hash NOT NULL
Revision ID: b5a6c3d4e7f8
Revises: 2b627d0885c3
Create Date: 2025-10-06 21:05:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "b5a6c3d4e7f8"
down_revision: Union[str, None] = "2b627d0885c3"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Ensure no NULLs remain before altering nullability
op.execute("UPDATE conversions SET file_hash = '' WHERE file_hash IS NULL")
op.alter_column(
"conversions",
"file_hash",
existing_type=sa.String(length=64),
nullable=False,
existing_nullable=True,
)
def downgrade() -> None:
op.alter_column(
"conversions",
"file_hash",
existing_type=sa.String(length=64),
nullable=True,
existing_nullable=False,
)