# source: https://github.com/hadley/foodbank

version: 0.1.0

tables:
  food:
    description: >
      Any substance consumed by humans for nutrition, taste and/or aroma.
      Contains only foundation foods from the USDA FoodData Central
      (December 2025).

      <https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2025-12-18.zip>

    source:
      parquet: inst/parquet/food.parquet
      R: foodbank::food

    columns:
      - name: fdc_id
        type: number(id)
        constraints: [primary_key]
        description: >
          Unique permanent identifier of the food.
        examples: [321358, 746775, 2003599, 2646173, 2747676]
      - name: description
        type: string
        constraints: [required]
        description: Description of the food.
        examples:
          - Almond butter, creamy
          - Cheese, mozzarella, low moisture, part-skim
          - Grapefruit juice, red, not fortified, not from concentrate, refrigerated
          - Peppers, banana or Hungarian wax, seeded, raw
          - Yogurt, plain, whole milk
      - name: food_category_id
        type: number(id)
        constraints: [required, foreign_key]
        description: ID of the food's category.
        examples: [1, 7, 12, 16, 25]
      - name: publication_date
        type: date
        constraints: [required]
        range: ["2019-04-01", "2025-12-18"]
        description: >
          Date when the food was published to FoodData Central.

  food_nutrient:
    description: >
      A nutrient value for a food. Amounts are per 100g of food, in the
      unit defined in the nutrient table.

    source:
      parquet: inst/parquet/food_nutrient.parquet
      R: foodbank::food_nutrient

    columns:
      - name: id
        type: number(id)
        constraints: [primary_key]
        description: Unique permanent identifier.
        examples: [2219707, 2263067, 8524084, 27797175, 34969298]
      - name: fdc_id
        type: number(id)
        constraints: [required, foreign_key]
        description: ID of the food.
        examples: [321358, 746775, 2003599, 2646173, 2747676]
      - name: nutrient_id
        type: number(id)
        constraints: [required, foreign_key]
        description: ID of the nutrient.
        examples: [1002, 1119, 1227, 1331, 2069]
      - name: amount
        type: number(quantity)
        range: [-0.705, 38700]
        description: Amount of the nutrient per 100g of food.
      - name: data_points
        type: number(quantity)
        range: [1, 252]
        description: >
          Number of observations on which the value is based.
      - name: derivation
        type: enum
        values:
          Analytical: Measured by chemical analysis
          Summed: Sum of component nutrients
          Calculated: Derived from other data
        description: The technique used to derive the nutrient value.
      - name: min
        type: number(quantity)
        range: [0, 37200]
        description: The minimum amount.
      - name: max
        type: number(quantity)
        range: [0, 40700]
        description: The maximum amount.
      - name: median
        type: number(quantity)
        range: [0, 38500]
        description: The median amount.
      - name: footnote
        type: string
        description: >
          Comments on any unusual aspects of the food nutrient.
        examples:
          - >-
            Results for individual samples should be interpreted taking
            into account measurement uncertainty. The percent relative
            standard deviation ranged from 0.8-32.7, mean 10.4, median
            5.9 (n=10).
          - >-
            1,3:1,6 beta-glucans were determined with the Megazyme
            Mushroom and Yeast Test Kit (K-YBGL), using sulfuric acid
            for hydrolysis.
      - name: min_year_acquired
        type: number(ordinal)
        range: [1999, 2025]
        description: >
          Minimum purchase year of all acquisitions used to derive
          the nutrient value.

  nutrient:
    description: >
      The chemical constituents of a food officially recognized as
      essential to human health.

    source:
      parquet: inst/parquet/nutrient.parquet
      R: foodbank::nutrient

    columns:
      - name: id
        type: number(id)
        constraints: [primary_key]
        description: Unique permanent identifier.
        examples: [1001, 1120, 1239, 1359, 2069]
      - name: name
        type: string
        constraints: [required]
        description: >
          Name of the nutrient.
        examples:
          - (+) -Gallo catechin
          - Fatty acids, total trans-dienoic
          - Nitrogen
          - SFA 8:0
          - Zinc, Zn
      - name: unit_name
        type: enum
        values:
          G: Grams
          IU: International units
          KCAL: Kilocalories
          kJ: Kilojoules
          MCG_RE: Micrograms retinol equivalents
          MG: Milligrams
          MG_ATE: Milligrams alpha-tocopherol equivalents
          MG_GAE: Milligrams gallic acid equivalents
          PH: pH
          SP_GR: Specific gravity
          UG: Micrograms
          UMOL_TE: Micromoles Trolox equivalents
        constraints: [required]
        description: The standard unit of measure for the nutrient.
      - name: nutrient_nbr
        type: number(id)
        description: >
          A unique code identifying a nutrient or food constituent
          (used for historical matching; missing for some newer
          nutrients).
        examples: [200, 321.2, 511, 693, 961]
      - name: rank
        type: number(ordinal)
        range: [50, 999999]
        description: >
          Display order used by USDA to sort nutrients in reports;
          more important nutrients appear first.

  food_portion:
    description: >
      Discrete amounts of food, used to convert from per-100g nutrient
      values to common serving sizes. To convert:
      `food_nutrient.amount * food_portion.gram_weight / 100`.

    source:
      parquet: inst/parquet/food_portion.parquet
      R: foodbank::food_portion

    columns:
      - name: id
        type: number(id)
        constraints: [primary_key]
        description: Unique permanent identifier.
        examples: [118804, 119351, 121438, 187505, 267795]
      - name: fdc_id
        type: number(id)
        constraints: [required, foreign_key]
        description: ID of the food.
        examples: [321358, 326135, 333374, 746773, 1105314]
      - name: seq_num
        type: number(ordinal)
        constraints: [required]
        range: [1, 9]
        description: >
          The order the measure will be displayed on the released
          food.
      - name: amount
        type: number(quantity)
        constraints: [required]
        range: [0.2, 100]
        description: >
          The number of measure units that comprise the measure
          (e.g. if measure is 3 tsp, the amount is 3).
      - name: measure_unit_id
        type: number(id)
        constraints: [required, foreign_key]
        description: ID of the measure unit.
        examples: [1000, 1023, 1043, 1067, 1120]
      - name: portion_description
        type: string
        description: >
          Comments that provide more specificity on the measure.
          Mostly missing.
        examples:
          - shredded
      - name: modifier
        type: string
        description: >
          Qualifier of the measure.
        examples:
          - ' 2-1/2" dia'
          - drained
          - medium
          - slices
          - whole
      - name: gram_weight
        type: number(quantity)
        constraints: [required]
        range: [3.2, 980]
        description: The weight of the measure in grams.
      - name: data_points
        type: number(quantity)
        constraints: [required]
        range: [1, 826]
        description: >
          The number of observations on which the measure is based.
      - name: min_year_acquired
        type: number(ordinal)
        constraints: [required]
        range: [2000, 2019]
        description: >
          Minimum purchase year of all acquisitions used to derive
          the measure value.

  measure_unit:
    description: Units for measuring quantities of foods.

    source:
      parquet: inst/parquet/measure_unit.parquet
      R: foodbank::measure_unit

    columns:
      - name: id
        type: number(id)
        constraints: [primary_key]
        description: Unique permanent identifier.
        examples: [1000, 1031, 1061, 1091, 9999]
      - name: name
        type: string
        constraints: [required]
        description: >
          Name of the unit.
        examples:
          - back
          - cup
          - milliliter
          - sandwich
          - whole

  food_category:
    description: Foods of defined similarity.

    source:
      parquet: inst/parquet/food_category.parquet
      R: foodbank::food_category

    columns:
      - name: id
        type: number(id)
        constraints: [primary_key]
        description: Unique permanent identifier.
        examples: [1, 8, 14, 21, 28]
      - name: code
        type: number(id)
        constraints: [required, unique]
        description: Food group code.
        examples: [100, 800, 1400, 2000, 4500]
      - name: description
        type: string
        constraints: [required]
        description: >
          Description of the food group.
        examples:
          - Alcoholic Beverages
          - Breakfast Cereals
          - Fruits and Fruit Juices
          - Quality Control Materials
          - Vegetables and Vegetable Products

relationships:
  - description: Each food has many nutrient values.
    cardinality: one-to-many
    join: food.fdc_id = food_nutrient.fdc_id

  - description: Each food has zero or more portion measures.
    cardinality: one-to-many
    join: food.fdc_id = food_portion.fdc_id

  - description: Each food nutrient value refers to one nutrient.
    cardinality: many-to-one
    join: food_nutrient.nutrient_id = nutrient.id

  - description: Each food portion uses one measure unit.
    cardinality: many-to-one
    join: food_portion.measure_unit_id = measure_unit.id

  - description: Each food belongs to one food category.
    cardinality: many-to-one
    join: food.food_category_id = food_category.id
    conflicts: [description]

glossary:
  foundation food: >
    A food whose nutrient and food component values are derived
    primarily by chemical analysis. Foundation data include extensive
    underlying metadata such as the number of samples, the location
    and dates on which samples were obtained, analytical approaches
    used, and if appropriate, cultivar, genotype, and production
    practices.
  FDC ID: >
    FoodData Central identifier. A unique permanent integer assigned
    to each food in the USDA FoodData Central database.
  nutrient number: >
    A unique code from the USDA Standard Reference (SR) system
    identifying a specific nutrient or food constituent.
  derivation: >
    The technique used to derive a nutrient value, such as analytical
    measurement, calculation, or imputation.
  measure unit: >
    A standard unit used to express a food portion size, such as cup,
    tablespoon, or piece.
