# source: https://github.com/hadley/loan-application

version: 0.1.0
tables:
  account:
    description: >
      Static characteristics of bank accounts. Each account belongs to a
      branch in a district and has a statement issuance frequency.
    source:
      parquet: raw-data/account.parquet
    columns:
      - name: account_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the account.
        examples: [1, 1183, 2367, 3552, 11382]
      - name: district_id
        type: number(id)
        constraints: [required, foreign_key]
        description: District where the account's branch is located.
        examples: [1, 20, 39, 58, 77]
      - name: frequency
        type: enum
        values:
          POPLATEK MESICNE: Monthly
          POPLATEK TYDNE: Weekly
          POPLATEK PO OBRATU: After each transaction
        constraints: [required]
        description: Frequency of statement issuance.
      - name: date
        type: date
        constraints: [required]
        description: Date the account was created.
        range: [1993-01-01, 1997-12-29]

  client:
    description: >
      Characteristics of bank clients. Gender and birth date were originally
      encoded together in a "birth number"; they have been split into
      separate fields in this version.
    source:
      parquet: raw-data/client.parquet
    columns:
      - name: client_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the client.
        examples: [1, 1418, 2839, 4257, 13998]
      - name: gender
        type: enum
        values: [F, M]
        constraints: [required]
        description: Client's gender.
      - name: birth_date
        type: date
        constraints: [required]
        description: Client's date of birth.
        range: [1911-08-20, 1987-09-27]
      - name: district_id
        type: number(id)
        constraints: [required, foreign_key]
        description: District where the client lives.
        examples: [1, 20, 39, 58, 77]

  disp:
    description: >
      Dispositions linking clients to accounts. Describes the rights of
      clients to operate accounts. Only owners can issue permanent orders
      and apply for loans.
    source:
      parquet: raw-data/disp.parquet
    columns:
      - name: disp_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the disposition.
        examples: [1, 1418, 2839, 4257, 13690]
      - name: client_id
        type: number(id)
        constraints: [required, foreign_key]
        description: The client who has access to the account.
        examples: [1, 1418, 2839, 4257, 13998]
      - name: account_id
        type: number(id)
        constraints: [required, foreign_key]
        description: The account the client has access to.
        examples: [1, 1183, 2367, 3552, 11382]
      - name: type
        type: enum
        values:
          OWNER: Full rights (can issue orders and apply for loans)
          DISPONENT: Limited access
        constraints: [required]
        description: Type of disposition.

  order:
    description: >
      Permanent (standing) payment orders on accounts. These are recurring
      debits set up by account owners.
    source:
      parquet: raw-data/order.parquet
    columns:
      - name: order_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the permanent order.
        examples: [29401, 31187, 32988, 34786, 46338]
      - name: account_id
        type: number(id)
        constraints: [required, foreign_key]
        description: Account the order is issued for.
        examples: [1, 1174, 2342, 3502, 11362]
      - name: bank_to
        type: string
        constraints: [required]
        description: Two-letter code identifying the recipient's bank.
        examples: [AB, GH, MN, ST, YZ]
      - name: account_to
        type: number(id)
        constraints: [required]
        description: Account number of the recipient.
        examples: [399, 24175644, 49756062, 73948222, 99994199]
      - name: amount
        type: number(quantity)
        constraints: [required]
        description: Amount debited per payment.
        range: [1, 14882]
      - name: k_symbol
        type: enum
        values:
          POJISTNE: Insurance payment
          SIPO: Household payment
          LEASING: Leasing
          UVER: Loan payment
        description: Characterization of the payment. Empty string if not specified.

  trans:
    description: >
      Individual transactions on accounts, including credits and debits
      from various sources.
    source:
      parquet: raw-data/trans.parquet
    columns:
      - name: trans_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the transaction.
        examples: [1, 430263, 858506, 2060979, 3682987]
      - name: account_id
        type: number(id)
        constraints: [required, foreign_key]
        description: Account the transaction belongs to.
        examples: [1, 1183, 2367, 3552, 11382]
      - name: date
        type: date
        constraints: [required]
        description: Date of the transaction.
        range: [1993-01-01, 1998-12-31]
      - name: type
        type: enum
        values:
          PRIJEM: Credit
          VYDAJ: Withdrawal (debit)
          VYBER: Withdrawal (cash)
        constraints: [required]
        description: Transaction direction.
      - name: operation
        type: enum
        values:
          VYBER KARTOU: Credit card withdrawal
          VKLAD: Credit in cash
          PREVOD Z UCTU: Collection from another bank
          VYBER: Cash withdrawal
          PREVOD NA UCET: Remittance to another bank
        description: Mode of transaction.
      - name: amount
        type: number(quantity)
        constraints: [required]
        description: Amount of money in the transaction.
        range: [0, 87400]
      - name: balance
        type: number(quantity)
        constraints: [required]
        description: Account balance after the transaction.
        range: [-41126, 209637]
      - name: k_symbol
        type: enum
        values:
          POJISTNE: Insurance payment
          SLUZBY: Payment for statement
          UROK: Interest credited
          SANKC. UROK: Sanction interest (negative balance)
          SIPO: Household payment
          DUCHOD: Old-age pension
          UVER: Loan payment
        description: Characterization of the transaction. Empty string if not specified.
      - name: bank
        type: string
        description: Two-letter code identifying the partner's bank.
        examples: [AB, GH, MN, ST, YZ]
      - name: account
        type: number(id)
        description: Account number of the transaction partner.
        examples: [0, 24118912, 49954766, 74343323, 99994199]

  loan:
    description: >
      Loans granted for accounts. At most one loan can be granted per
      account. This is the primary target table for the PKDD'99 discovery
      challenge.
    source:
      parquet: raw-data/loan.parquet
    columns:
      - name: loan_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the loan.
        examples: [4959, 5576, 6178, 6753, 7308]
      - name: account_id
        type: number(id)
        constraints: [required, unique, foreign_key]
        description: Account the loan was granted for.
        examples: [2, 2962, 5742, 8688, 11362]
      - name: date
        type: date
        constraints: [required]
        description: Date the loan was granted.
        range: [1993-07-05, 1998-12-08]
      - name: amount
        type: number(quantity)
        constraints: [required]
        description: Total amount of the loan.
        range: [4980, 590820]
      - name: duration
        type: number(ordinal)
        constraints: [required]
        description: Duration of the loan in months.
        range: [12, 60]
      - name: payments
        type: number(quantity)
        constraints: [required]
        description: Monthly payment amount.
        range: [304, 9910]
      - name: status
        type: enum
        values:
          A: Contract finished, no problems
          B: Contract finished, loan not paid
          C: Running contract, OK so far
          D: Running contract, client in debt
        constraints: [required]
        description: Status of the loan.

  card:
    description: >
      Credit cards issued to account holders via their dispositions.
      Multiple cards can be issued per account.
    source:
      parquet: raw-data/card.parquet
    columns:
      - name: card_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the card.
        examples: [1, 230, 456, 684, 1247]
      - name: disp_id
        type: number(id)
        constraints: [required, foreign_key]
        description: Disposition linking the card to a client and account.
        examples: [9, 1388, 2929, 4457, 13660]
      - name: type
        type: enum
        values: [junior, classic, gold]
        constraints: [required]
        description: Type of credit card.
      - name: issued
        type: date
        constraints: [required]
        description: Date the card was issued.
        range: [1993-11-07, 1998-12-29]

  district:
    description: >
      Demographic data for Czech districts. Column names A2–A16 are
      the original anonymized names from the PKDD'99 challenge.
    source:
      parquet: raw-data/district.parquet
    columns:
      - name: district_id
        type: number(id)
        constraints: [primary_key]
        description: Unique identifier for the district.
        examples: [1, 20, 39, 58, 77]
      - name: A2
        type: string
        constraints: [required, unique]
        description: District name.
        examples: [Benesov, Hradec Kralove, Nachod, Rokycany, Znojmo]
      - name: A3
        type: enum
        values: [Prague, central Bohemia, south Bohemia, west Bohemia, north Bohemia, east Bohemia, south Moravia, north Moravia]
        constraints: [required]
        description: Region name.
      - name: A4
        type: number(quantity)
        constraints: [required]
        description: Number of inhabitants.
        range: [42821, 1204953]
      - name: A5
        type: number(quantity)
        constraints: [required]
        description: Number of municipalities with fewer than 499 inhabitants.
        range: [0, 151]
      - name: A6
        type: number(quantity)
        constraints: [required]
        description: Number of municipalities with 500–1999 inhabitants.
        range: [0, 70]
      - name: A7
        type: number(quantity)
        constraints: [required]
        description: Number of municipalities with 2000–9999 inhabitants.
        range: [0, 20]
      - name: A8
        type: number(quantity)
        constraints: [required]
        description: Number of municipalities with 10,000 or more inhabitants.
        range: [0, 5]
      - name: A9
        type: number(quantity)
        constraints: [required]
        description: Number of cities.
        range: [1, 11]
      - name: A10
        type: number(quantity)
        constraints: [required]
        description: Ratio of urban inhabitants (percentage).
        range: [33.9, 100]
      - name: A11
        type: number(quantity)
        constraints: [required]
        description: Average salary.
        range: [8110, 12541]
      - name: A12
        type: number(quantity)
        description: Unemployment rate in 1995 (percentage).
        range: [0.2, 7.3]
      - name: A13
        type: number(quantity)
        constraints: [required]
        description: Unemployment rate in 1996 (percentage).
        range: [0.43, 9.4]
      - name: A14
        type: number(quantity)
        constraints: [required]
        description: Number of entrepreneurs per 1000 inhabitants.
        range: [81, 167]
      - name: A15
        type: number(quantity)
        description: Number of committed crimes in 1995.
        range: [818, 85677]
      - name: A16
        type: number(quantity)
        constraints: [required]
        description: Number of committed crimes in 1996.
        range: [888, 99107]

relationships:
  - description: Account is located in a district.
    cardinality: many-to-one
    join: account.district_id = district.district_id
  - description: Client lives in a district.
    cardinality: many-to-one
    join: client.district_id = district.district_id
  - description: Disposition links a client to an account.
    cardinality: many-to-one
    join: disp.client_id = client.client_id
  - description: Disposition links a client to an account.
    cardinality: many-to-one
    join: disp.account_id = account.account_id
  - description: Permanent order belongs to an account.
    cardinality: many-to-one
    join: order.account_id = account.account_id
  - description: Transaction belongs to an account.
    cardinality: many-to-one
    join: trans.account_id = account.account_id
  - description: Loan is granted for an account (at most one per account).
    cardinality: one-to-one
    join: loan.account_id = account.account_id
  - description: Credit card is issued via a disposition.
    cardinality: many-to-one
    join: card.disp_id = disp.disp_id

glossary:
  POPLATEK MESICNE: Monthly (statement issuance frequency).
  POPLATEK TYDNE: Weekly (statement issuance frequency).
  POPLATEK PO OBRATU: After each transaction (statement issuance frequency).
  PRIJEM: Credit (incoming transaction).
  VYDAJ: Withdrawal/debit (outgoing transaction).
  VYBER: Cash withdrawal.
  VYBER KARTOU: Credit card withdrawal.
  VKLAD: Cash deposit.
  PREVOD Z UCTU: Collection from another bank.
  PREVOD NA UCET: Remittance to another bank.
  POJISTNE: Insurance payment.
  SIPO: Household payment.
  LEASING: Leasing payment.
  UVER: Loan payment.
  SLUZBY: Payment for statement.
  UROK: Interest credited.
  SANKC. UROK: Sanction interest charged when account has negative balance.
  DUCHOD: Old-age pension.
  DISPONENT: A person with limited access rights to an account.
  OWNER: Account owner with full rights (can issue orders and apply for loans).
