Source code for app.parsers.schemas

from dataclasses import dataclass, field
from datetime import date
from decimal import Decimal
from app.models.enums import TransactionType

[docs] @dataclass class ParsedTransaction: """One fully-parsed transaction row.""" row_index: int transaction_date: date value_date: date | None description: str | None raw_description: str | None reference_number: str | None transaction_type: TransactionType amount: Decimal direction: str # 'C' | 'D' balance_after: Decimal | None currency: str | None parse_warnings: list[str] = field(default_factory=list)
[docs] @dataclass class ColumnMapping: """Maps logical field names to actual CSV column indices.""" date: int | None = None value_date: int | None = None description: int | None = None reference: int | None = None amount: int | None = None # Single signed/unsigned amount column debit: int | None = None # Separate debit column credit: int | None = None # Separate credit column balance: int | None = None currency: int | None = None transaction_type: int | None = None
[docs] @dataclass class ParsedBankStatement: """Final result from the full CSV parsing pipeline.""" bank_name: str | None account_number: str | None # MASKED account_holder: str | None currency: str | None statement_from: date | None statement_to: date | None opening_balance: Decimal | None closing_balance: Decimal | None transactions: list[ParsedTransaction] # CSV format metadata detected_encoding: str detected_delimiter: str detected_format: str raw_headers: dict # {column_index: header_name} column_mapping: ColumnMapping # Quality metrics total_rows_parsed: int total_rows_skipped: int parser_version: str = "csv-parser-v1.0.0" # Aggregated warnings warnings: list[dict] = field(default_factory=list)
[docs] @dataclass class ParsedInvoiceLineItem: line_number: int description: str | None = None quantity: Decimal | None = None unit_price: Decimal | None = None line_total: Decimal | None = None tax_rate: Decimal | None = None tax_amount: Decimal | None = None sku: str | None = None unit_of_measure: str | None = None
[docs] @dataclass class ParsedInvoice: invoice_number: str | None = None invoice_date: date | None = None due_date: date | None = None currency: str | None = None subtotal: Decimal | None = None tax_amount: Decimal | None = None discount_amount: Decimal | None = None total_amount: Decimal | None = None raw_vendor_name: str | None = None raw_date_text: str | None = None raw_total_text: str | None = None confidence: Decimal = Decimal("1.000") notes: str | None = None line_items: list[ParsedInvoiceLineItem] = field(default_factory=list) warnings: list[str] = field(default_factory=list) parser_version: str = "pdf-parser-v1.0.0"