Source code for app.models.bank_statement

"""
BankStatement and BankTransaction ORM models.
"""
import uuid
from datetime import date, datetime
from decimal import Decimal
from typing import TYPE_CHECKING, Any

from sqlalchemy import (
    CheckConstraint, Date, DateTime, Enum as SAEnum,
    ForeignKey, Integer, Numeric, SmallInteger, String, Text, func, JSON,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship

from app.models.base import Base, UUIDPrimaryKeyMixin
from app.models.enums import TransactionType

if TYPE_CHECKING:
    from app.models.document import Document
    from app.models.processing_job import ProcessingJob


[docs] class BankStatement(UUIDPrimaryKeyMixin, Base): """Statement-level metadata extracted from a CSV bank statement.""" __tablename__ = "bank_statements" document_id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), ForeignKey("documents.id", ondelete="CASCADE"), nullable=False ) processing_job_id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), ForeignKey("processing_jobs.id"), nullable=False ) # ── Extracted fields ─────────────────────────────────────────────────────── bank_name: Mapped[str | None] = mapped_column(String(255), nullable=True) account_number: Mapped[str | None] = mapped_column( String(20), nullable=True, comment="MASKED — last 4 digits only. e.g. ****4321" ) account_holder: Mapped[str | None] = mapped_column(String(500), nullable=True) currency: Mapped[str | None] = mapped_column(String(3), nullable=True) # TODO: Add base_currency + exchange_rate for cross-currency normalization # base_currency: Mapped[str | None] # exchange_rate: Mapped[Decimal | None] statement_from: Mapped[date | None] = mapped_column(Date, nullable=True) statement_to: Mapped[date | None] = mapped_column(Date, nullable=True) opening_balance: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True) closing_balance: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True) # ── CSV format metadata (from edge case analysis) ────────────────────────── raw_headers: Mapped[dict[str, Any] | None] = mapped_column( JSON, nullable=True, comment="Original CSV column names as-is, for debugging format mismatches" ) detected_format: Mapped[str | None] = mapped_column( String(100), nullable=True, comment="Detected bank format: HDFC_V2, ICICI_STANDARD, GENERIC, etc." ) detected_delimiter: Mapped[str | None] = mapped_column(String(1), nullable=True) detected_encoding: Mapped[str | None] = mapped_column(String(30), nullable=True) total_rows_parsed: Mapped[int | None] = mapped_column(Integer, nullable=True) total_rows_skipped: Mapped[int | None] = mapped_column(Integer, nullable=True) extracted_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False ) # ── Relationships ────────────────────────────────────────────────────────── document: Mapped["Document"] = relationship("Document") processing_job: Mapped["ProcessingJob"] = relationship( "ProcessingJob", back_populates="bank_statement" ) transactions: Mapped[list["BankTransaction"]] = relationship( "BankTransaction", back_populates="bank_statement", cascade="all, delete-orphan" ) def __repr__(self) -> str: return f"<BankStatement id={self.id} bank={self.bank_name} currency={self.currency}>"
[docs] class BankTransaction(UUIDPrimaryKeyMixin, Base): """Individual transaction row from a bank statement CSV.""" __tablename__ = "bank_transactions" bank_statement_id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), ForeignKey("bank_statements.id", ondelete="CASCADE"), nullable=False, ) # ── Core transaction fields ──────────────────────────────────────────────── transaction_date: Mapped[date] = mapped_column(Date, nullable=False) value_date: Mapped[date | None] = mapped_column(Date, nullable=True) description: Mapped[str | None] = mapped_column(Text, nullable=True) raw_description: Mapped[str | None] = mapped_column( Text, nullable=True, comment="Original CSV text before cleaning" ) reference_number: Mapped[str | None] = mapped_column(String(255), nullable=True) transaction_type: Mapped[TransactionType] = mapped_column( SAEnum(TransactionType, name="transaction_type_enum"), nullable=False, default=TransactionType.UNKNOWN, ) # ── Amount — always positive; direction is explicit ──────────────────────── amount: Mapped[Decimal] = mapped_column( Numeric(18, 4), nullable=False, comment="Always positive. Use direction column for sign." ) direction: Mapped[str] = mapped_column( String(1), nullable=False, comment="C = Credit (money in), D = Debit (money out)" ) balance_after: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True) currency: Mapped[str | None] = mapped_column( String(3), nullable=True, comment="Per-row currency for multi-currency statements" ) # TODO: Add cross-currency normalization columns # base_amount: Mapped[Decimal | None] # exchange_rate: Mapped[Decimal | None] # ── Parser metadata ──────────────────────────────────────────────────────── row_index: Mapped[int] = mapped_column( Integer, nullable=False, comment="Original CSV row number (1-indexed) for debugging" ) parse_warnings: Mapped[list[str] | None] = mapped_column( JSON, nullable=True, comment="Row-level warnings: ['date guessed', 'amount format unusual']" ) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False ) __table_args__ = ( CheckConstraint("amount >= 0", name="chk_bank_tx_amount_positive"), CheckConstraint("direction IN ('C', 'D')", name="chk_bank_tx_direction"), ) # ── Relationships ────────────────────────────────────────────────────────── bank_statement: Mapped["BankStatement"] = relationship( "BankStatement", back_populates="transactions" ) def __repr__(self) -> str: return ( f"<BankTransaction id={self.id} date={self.transaction_date} " f"amount={self.direction}{self.amount}>" )