Source code for app.models.document

"""
Document model — immutable raw file registry.
Every upload creates one row here. Never deleted (soft delete only).
"""
from datetime import datetime
from typing import TYPE_CHECKING

from sqlalchemy import (
    BigInteger, Boolean, CheckConstraint, DateTime,
    Enum as SAEnum, String, Text, func,
)
from sqlalchemy.orm import Mapped, mapped_column, relationship

from app.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
from app.models.enums import DocumentType

if TYPE_CHECKING:
    from app.models.processing_job import ProcessingJob


[docs] class Document(UUIDPrimaryKeyMixin, TimestampMixin, Base): """ Immutable record of every uploaded file. Acts as the root entity — everything else hangs off this. """ __tablename__ = "documents" filename: Mapped[str] = mapped_column(String(512), nullable=False) original_name: Mapped[str] = mapped_column(String(512), nullable=False) document_type: Mapped[DocumentType] = mapped_column( SAEnum(DocumentType, name="document_type_enum"), nullable=False ) file_type: Mapped[str] = mapped_column(String(10), nullable=False) # 'pdf' | 'csv' file_size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False) checksum_sha256: Mapped[str] = mapped_column(String(64), nullable=False, unique=True) storage_path: Mapped[str] = mapped_column(Text, nullable=False) uploaded_by: Mapped[str] = mapped_column(String(255), nullable=False, default="system") uploaded_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False ) is_deleted: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) deleted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) __table_args__ = ( CheckConstraint( "(is_deleted = FALSE AND deleted_at IS NULL) OR " "(is_deleted = TRUE AND deleted_at IS NOT NULL)", name="chk_documents_deleted_at_consistency", ), ) # ── Relationships ────────────────────────────────────────────────────────── processing_jobs: Mapped[list["ProcessingJob"]] = relationship( "ProcessingJob", back_populates="document", cascade="all, delete-orphan" ) def __repr__(self) -> str: return f"<Document id={self.id} type={self.document_type} name={self.original_name!r}>"