diff --git a/metrics-library/MD Format.mdx b/metrics-library/MD Format.mdx new file mode 100644 index 0000000..999710a --- /dev/null +++ b/metrics-library/MD Format.mdx @@ -0,0 +1,15 @@ +> Test Blockquote + + + +
& : Create collapsible toggles. + + + For subscripts. + +: For superscripts. + +: For keyboard shortcuts (renders as a little button). + + +: For more advanced table formatting than standard Markdown pipes. diff --git a/metrics-library/assets/custom.scss b/metrics-library/assets/custom.scss new file mode 100644 index 0000000..18de940 --- /dev/null +++ b/metrics-library/assets/custom.scss @@ -0,0 +1,266 @@ +/* Import Khand, Poppins, and Roboto from Google Fonts */ +@import url('https://fonts.googleapis.com/css2?family=Khand:wght@300;400;500;600;700&family=Poppins:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Roboto:ital,wght@0,300;0,400;0,500;0,700;1,400&display=swap'); + +* { + margin: 0; + padding: 0; + -webkit-print-color-adjust: exact; +} + +body { + /* Main body font: Poppins with Roboto as fallbacks */ + font-family: "Roboto", helvetica, arial, sans-serif; + font-weight: 300; + font-size: 16px; + color: #163666; + line-height: 1.4em; + background-color: #FAFAFA; + padding: 0.7em; +} + +p { + margin: 0.5em 0; /* Reduced margin to tighten space after headers */ + line-height: 1.5em; +} + +table { + font-size: inherit; + font: 100%; + margin: 1em; +} + +table th { + /* Table headers: Uppercase, Khand, Weight 700 (max available) */ + font-family: "Khand", sans-serif; + font-weight: 700; + text-transform: uppercase; + border-bottom: 1px solid #163666; + padding: .2em 1em; + color: #163666; + text-align: left; +} + +table td { + border-bottom: 1px solid #ddd; + padding: .2em 1em; +} + +input[type=text], +input[type=password], +input[type=image], +textarea { + font-family: "Poppins", helvetica, arial, sans-serif; + font-size: 99%; +} + +select, +option { + padding: 0 .25em; +} + +optgroup { + margin-top: .5em; +} + +pre, +code { + font-family: "Poppins", "Roboto", helvetica, arial, sans-serif; +} + +pre { + margin: 1em 0; + font-size: 16px; + /* Entire code block container is now dark blue */ + background-color: #163666; + border: 1px solid #163666; + padding: 10px; + line-height: 1.5em; + color: #EFF5F6; + overflow: auto; + -webkit-box-shadow: rgba(0, 0, 0, 0.07) 0 1px 2px inset; + -webkit-border-radius: 3px; + -moz-border-radius: 3px; + border-radius: 3px; +} + +pre code { + padding: 0; + font-family: "Poppins", "Roboto", sans-serif; + font-weight: 400; + font-size: 14px; + color: #EFF5F6; + background-color: transparent; /* Inherits container color */ + border: none; +} + +code { + /* Inline code style */ + font-family: "Khand", "Poppins", "Roboto", sans-serif; + font-size: 16px; + font-weight: 700; + text-transform: uppercase; + color: #163666; + background-color: #EFF5F6; + padding: 0 .2em; + border: 1px solid #EFF5F6; +} + +img { + border: 0; + max-width: 100%; +} + +abbr { + border-bottom: none; +} + +a { + color: #00A6B6; + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + +a code, +a:link code, +a:visited code { + color: #047C97; +} + +h1, +h2, +h3, +h4, +h5, +h6 { + color: #163666; + font-family: "Khand", "Roboto", sans-serif; + font-weight: 700; + text-transform: uppercase; + border: 0; + margin-bottom: 0.2em; /* Decreased space to next line */ +} + +h1 { + font-size: 370%; + line-height: 1; + border-top: 4px solid #aaa; + padding-top: .5em; + margin-top: 3.5em; /* Section spacing */ +} + +h1:first-child { + font-weight: 700; + text-transform: uppercase; + margin-top: 0; + padding-top: .25em; + border-top: none; +} + +h2 { + font-size: 250%; + line-height: 1; + margin-top: 3em; /* Section spacing */ + border-top: 4px solid #e0e0e0; + padding-top: .5em; +} + +h3 { + margin-top: 2.5em; /* Spacing for sub-sections */ + letter-spacing: .115em; +} + +/* Logic to prevent double-spacing if a header follows another header */ +h1 + h2, h2 + h3, h1 + h3, h3 + h2, h2 + h1, h1 + h1, h2 + h2, h3 + h3 { + margin-top: 0.5em !important; +} + +hr { + /* Increased margin around horizontal rules (line breaks) by roughly 30px */ + border: 0; + border-top: 1px solid #ddd; + margin: 3em 0; +} + +ul, +ol { + margin: 1em 0 1em 2em; +} + +ul li, +ol li { + margin-top: .5em; + margin-bottom: .5em; +} + +ul ul, +ul ol, +ol ol, +ol ul { + margin-top: 0; + margin-bottom: 0; +} + +blockquote { + margin: 3em 2em 3em 2em; + border-left: 5px solid #047C97; + line-height: 1.5; + padding: .15em 0em .15em 1em; + + font-weight: 300; + color: #00A6B6; + text-transform: lowercase; + font-family: "Poppins", "Roboto", sans-serif; +} + +dt { + font-weight: bold; + margin-left: 1em; +} + +dd { + margin-left: 2em; + margin-bottom: 1em; +} + +sup { + font-size: 0.83em; + vertical-align: super; + line-height: 0; +} + +kbd { + display: inline-block; + padding: 3px 5px; + font-size: 11px; + line-height: 10px; + color: #555; + vertical-align: middle; + background-color: #fcfcfc; + border: solid 1px #ccc; + border-bottom-color: #bbb; + border-radius: 3px; + box-shadow: inset 0 -1px 0 #bbb; +} + +@media screen and (min-width: 914px) { + body { + width: 854px; + margin: 0 auto; + } +} + +@media print { + table, + pre { + page-break-inside: avoid; + } + pre { + word-wrap: break-word; + } + body { + padding: 2cm; + } +} \ No newline at end of file diff --git a/metrics-library/assets/images/costar_compset_id_extraction_example.png b/metrics-library/assets/images/costar_compset_id_extraction_example.png new file mode 100644 index 0000000..563009d Binary files /dev/null and b/metrics-library/assets/images/costar_compset_id_extraction_example.png differ diff --git a/metrics-library/column-naming-standards.mdx b/metrics-library/column-naming-standards.mdx index 85e6e61..3248b11 100644 --- a/metrics-library/column-naming-standards.mdx +++ b/metrics-library/column-naming-standards.mdx @@ -1,10 +1,6 @@ ---- -title: "column-naming-standards.mdx" -description: "Canonical naming system used across the REVREBEL Metrics Library and analytics warehouse" ---- - # Database Column Naming Standards + ## Metrics Library & Analytics Warehouse This document defines the canonical naming system used across the REVREBEL Metrics Library and analytics warehouse. @@ -15,9 +11,9 @@ The warehouse is designed as a durable semantic layer where analysts, systems, a **Clean naming is not cosmetic, it is operational infrastructure.** ---- -## Core Naming Standard + +# Core Naming Standard All database columns use lowercase `snake_case`. @@ -36,29 +32,27 @@ The structure prioritizes: This ordering keeps related metric families grouped together logically and alphabetically across warehouse schemas. ---- -## Naming Philosophy +# Naming Philosophy The warehouse naming system is designed around semantic predictability. A field name should communicate: - - what the metric represents, - how it is used, - what reporting context applies, - and whether a comparison, ranking, or rolling window exists. -The naming structure intentionally favors consistency over source-system terminology. +The naming structure intentionally favors consistency over source-system terminology. **Hospitality systems rarely agree on naming conventions. The warehouse does.** ---- -## Core Naming Examples + +# Core Naming Examples | Current Variant | Warehouse Standard | Reason | -| --- | --- | --- | +|---|---|---| | `rooms_budget` | `rms_bgt` | Metric first using approved abbreviations. | | `available_rooms_ly` | `available_rms_ly` | Standardized room inventory naming. | | `rev_ly_actual` | `rev_ly` | Comparison context already defines the metric. | @@ -68,28 +62,29 @@ The naming structure intentionally favors consistency over source-system termino | `compset_occ` | `cs_occ` | Standardized competitive-set prefix. | | `adr_index_pct_chg_py` | `adr_index_pct_chg_ly` | Warehouse standard uses `ly`. | ---- -## Standard Formatting Rules -### Lowercase Snake Case +# Standard Formatting Rules -All fields use: +## Lowercase Snake Case +All fields use: - lowercase letters - underscores - numeric suffixes where applicable - +### Preferred -```text Preferred +```text property_code arrival_date roomtype_code revpar_rank ``` -```text Avoid +### Avoid + +```text PropertyCode ArrivalDate roomTypeCode @@ -97,16 +92,14 @@ RevPAR_Rank segment_code_Map ``` - ---- -## Approved Metric Abbreviations +# Approved Metric Abbreviations The warehouse standard consolidates recurring business concepts into a single approved naming structure. | Concept | Standard | -| --- | --- | +|---|---| | Rooms | `rms` | | Available Rooms | `available_rms` | | Revenue | `rev` | @@ -127,17 +120,19 @@ The warehouse standard consolidates recurring business concepts into a single ap | No Show | `ns` | | Reservations | `rsvn` | ---- +This standard removes synonym drift across operational systems, vendor exports, reporting tools, and manually built datasets. + -## Metric Ordering Logic -### Metric First +# Metric Ordering Logic + +## Metric First Metric names begin with the measure or business entity being described. - +### Preferred -```text Preferred +```text rms_bgt rev_bgt rms_fct @@ -147,22 +142,24 @@ rev_act rms_pace_stly ``` -```text Avoid +### Avoid + +```text rooms_budget rev_budget budget_rooms actualroomrevenue ``` - -### Comparison & Period Suffixes + +## Comparison & Period Suffixes Reporting context appears after the metric. - +### Preferred -```text Preferred +```text adr_ly adr_cy adr_stly @@ -171,42 +168,46 @@ rms_bgt_ly rms_pace_stly ``` -```text Avoid +### Avoid + +```text ly_adr prior_year_adr stly_rms_pace ``` - +This keeps related metric families grouped together naturally during schema inspection and downstream modeling. + ---- -## Numeric Window Standards +# Numeric Window Standards -Rolling windows and sequence identifiers always appear last. Numeric windows use three-digit formatting for predictable sorting behavior. +Rolling windows and sequence identifiers always appear last while numeric windows use three-digit formatting for predictable sorting behavior. - +### Preferred -```text Preferred +```text rev_chg_ly_030 rms_chg_ly_030 adr_chg_ly_030 revpar_pct_chg_ly_090 ``` -```text Avoid +### Avoid + +```text rev_chg_30_day_ly rev_30_day_chg_ly ``` - ---- -## Competitive Set Standards +# Competitive Set Standards Competitive-set metrics use the `cs_` prefix. +### Standard + ```text cs_adr cs_occ @@ -216,14 +217,16 @@ cs_rms cs_adr_yoy ``` ---- +The warehouse standard treats competitive benchmarking as a first-class reporting layer, not an exception case. + -## Revenue Naming Standards + +# Revenue Naming Standards Revenue defaults to room revenue unless another revenue category is explicitly prefixed. | Revenue Type | Standard | -| --- | --- | +|---|---| | Room Revenue | `rev` | | Food & Beverage Revenue | `fb_rev` | | Other Revenue | `other_rev` | @@ -231,14 +234,22 @@ Revenue defaults to room revenue unless another revenue category is explicitly p | Cancellation Revenue | `cx_rev` | | Total Revenue | `total_rev` | ---- +This keeps operational reporting concise while preserving semantic clarity for non-room revenue categories. -## Date Naming Standards + + +# Date Naming Standards Date fields describe business meaning rather than storage type. +The warehouse standard daily grain field is: + +```text +date +``` + | Concept | Standard | -| --- | --- | +|---|---| | Daily Reporting Grain | `date` | | Booking Date | `book_date` | | Arrival Date | `arrival_date` | @@ -247,12 +258,16 @@ Date fields describe business meaning rather than storage type. | Updated Date | `updated_date` | | Snapshot Date | `snap_date` | +The goal is immediate interpretability during reporting, transformation, and operational analysis. + --- -## Identifier Standards +# Identifier Standards + +The warehouse uses standardized identifiers across all operational systems. | Concept | Standard | -| --- | --- | +|---|---| | Universal Reservation Identifier | `source_id` | | Property Identifier | `property_code` | | STR / CoStar Identifier | `str_id` | @@ -261,12 +276,16 @@ Date fields describe business meaning rather than storage type. | Segment Code | `segment_code` | | Source Code | `source_code` | ---- +This structure normalizes identifier inconsistencies across PMS, CRS, booking engine, OTA, and third-party exports. + -## Name & Code Pair Standards + +# Name & Code Pair Standards + +Where a business concept contains both a readable label and a system identifier, the distinction remains explicit. | Concept | Name Field | Code Field | -| --- | --- | --- | +|---|---|---| | Rate | `rate` | `rate_code` | | Source | `source` | `source_code` | | Channel | `channel` | `channel_code` | @@ -276,7 +295,9 @@ Date fields describe business meaning rather than storage type. --- -## Source Mapping Standards +# Source Mapping Standards + +Standardized warehouse values may preserve source-system references when traceability or reconciliation is required. Mapping fields use: @@ -296,12 +317,14 @@ source_map source_code_map ``` +The warehouse prioritizes standardized operational reporting while preserving source-level traceability where necessary. + --- -## Core Metric Dictionary +# Core Metric Dictionary | Standard | Definition | -| --- | --- | +|---|---| | `rms` | Room count / room production metric | | `available_rms` | Available room inventory | | `rev` | Room revenue | @@ -326,11 +349,11 @@ source_code_map | `nts` | Nights | | `rsvn` | Reservations | ---- -## Pattern Library -### Budget / Forecast / Actual +# Pattern Library + +## Budget / Forecast / Actual ```text {metric}_{bgt|fct|act} @@ -348,7 +371,9 @@ rms_act rev_act ``` -### Pace Metrics + + +## Pace Metrics ```text {metric}_pace_{period} @@ -363,7 +388,9 @@ adr_pace_stly revpar_pace_stly ``` -### Ranking Metrics + + +## Ranking Metrics ```text {metric}_rank @@ -371,7 +398,17 @@ revpar_pace_stly cs_{metric}_rank ``` -### Change Metrics +Examples: + +```text +adr_rank +revpar_rank +cs_adr_rank +``` + + + +## Change Metrics ```text {metric}_chg @@ -391,12 +428,12 @@ adr_index_pct_chg_ly rev_chg_ly_030 ``` ---- -## Required Common Columns + +# Required Common Columns | Column | Purpose | -| --- | --- | +|---|---| | `property_code` | Property-level reporting alignment | | `date` | Daily reporting grain | | `snap_date` | Snapshot reporting context | @@ -404,16 +441,20 @@ rev_chg_ly_030 | `insert_date` | Data ingestion timestamp | | `updated_date` | Record update timestamp | ---- +These fields establish baseline reporting consistency across fact, dimension, snapshot, and reporting layers. -## Warehouse Philosophy -The warehouse naming standard exists to create a reporting environment where: +# Warehouse Philosophy + +The warehouse naming standard exists to create a reporting environment where: - metrics group together logically, - schemas remain predictable, - dashboards remain stable, - documentation stays aligned, - and automation workflows can infer semantic meaning directly from structure. -**The objective is not shorter column names. The objective is operational clarity at scale.** \ No newline at end of file +**The objective is not shorter column names., the objective is operational clarity at scale.** + + + diff --git a/metrics-library/costar-processing-guide.mdx b/metrics-library/costar-processing-guide.mdx deleted file mode 100644 index 9a3f683..0000000 --- a/metrics-library/costar-processing-guide.mdx +++ /dev/null @@ -1,241 +0,0 @@ ---- -title: "costar-processing-guide.mdx" -description: "Processing standards, schema structure, validation logic, and benchmarking methodology for CoStar STAR reports" ---- - -# CoStar STAR Report Processing Standards - -## REVREBEL Metrics Library & Analytics Warehouse - -This document defines the processing standards, schema structure, validation logic, and benchmarking methodology used for CoStar (STR) performance reporting inside the REVREBEL analytics warehouse. - -The objective is not simply ingesting STAR reports. The objective is preserving competitive context, benchmark integrity, and historical market behavior across operational reporting, forecasting, and performance analysis. - ---- - -## Understanding STAR Reports & Benchmarking Data - -The STAR Report, provided by STR (a CoStar Group company), is the hospitality industry's standard benchmarking framework for measuring hotel performance against a defined competitive set. - -The report centers around three primary performance metrics: - -| Metric | Description | -| --- | --- | -| Occupancy (`occ`) | Percentage of available rooms sold | -| ADR (`adr`) | Average Daily Rate | -| RevPAR (`revpar`) | Revenue Per Available Room | - -These metrics are benchmarked against a selected competitive set to generate industry-standard index metrics: - -| Index | Definition | -| --- | --- | -| MPI | Occupancy Index vs Competitive Set | -| ARI | ADR Index vs Competitive Set | -| RGI | RevPAR Index vs Competitive Set | - - - An index score of `100` represents fair market share. `>100` indicates market outperformance. `<100` indicates underperformance relative to the comp set. - - ---- - -## Data Model Philosophy - -The warehouse structure intentionally separates benchmarking logic from operational production data to preserve: - -- historical benchmark continuity, -- comp set integrity, -- market-relative calculations, -- ranking behavior, -- snapshot comparisons, -- and forecasting consistency. - - - **Changing the benchmark changes the meaning of the data.** - - ---- - -## Core Data Fields - -### Property & Metadata Fields - -| Field | Description | -| --- | --- | -| `property_code` | Standardized property identifier | -| `property_name` | Standardized property name | -| `property_shortname` | Short-form operational property name | -| `str_id` | STR / CoStar property identifier | -| `cs_id` | Competitive set identifier | -| `cs_no` | Competitive set grouping (`Set01`, `Set02`, `Set03`) | -| `brand` | Brand classification | - -### Calendar & Date Fields - -| Field | Format / Logic | -| --- | --- | -| `month` | Full month name | -| `cy` | Current year (`YYYY`) | -| `ly` | Prior year (`YYYY`) | -| `week_no` | Numeric week number | -| `dow` | Day-of-week abbreviation (`SUN–SAT`) | -| `date` | Daily reporting grain (`YYYY-MM-DD`) | -| `date_ly` | Prior-year reporting date | - ---- - -## Metric Expansion Logic - -### Base Metric Structure - -```text -{metric} -{metric}_ly -cs_{metric} -cs_{metric}_ly -industry_{metric} -industry_{metric}_ly -``` - -### Percent Change Structure - -```text -{metric}_pct_chg -cs_{metric}_pct_chg -industry_{metric}_pct_chg -``` - -### Index Structure - -```text -{metric}_index -{metric}_index_ly -cs_{metric}_index -industry_{metric}_index -``` - -### Ranking Structure - -```text -{metric}_rank -{metric}_pct_chg_rank -``` - ---- - -## Derived Metrics - -| Field | Formula | -| --- | --- | -| `rms` | `ROUND(occ * (available_rms / 100))` | -| `rms_ly` | `ROUND(occ_ly * (available_rms_ly / 100))` | -| `cs_rms` | `ROUND(cs_occ * (cs_available_rms / 100))` | -| `rev` | `rms * adr` | -| `rev_ly` | `rms_ly * adr_ly` | -| `cs_rev` | `cs_rms * cs_adr` | - ---- - -## Competitive Set Handling - -STAR reports may contain multiple competitive sets. Each competitive set processes independently, maintains separate benchmark continuity, and supports independent ranking and index calculations. - -Suffix identifiers: - -```text -_1 → Set01 -_2 → Set02 -_3 → Set03 -``` - ---- - -## Property Identification Logic - - - - Extract `property_code` from the filename. - - Example: `MSPHEH` - - - Extract STR / CoStar identifier (`str_id`). - - ```text - cs_id = STR IDs concatenated with hyphens - ``` - - Example: `65206-54429-55653-44555-56751-39388` - - - Match against standardized `property_name`. - - - ---- - -## Competitive Set Change Scenarios - -### Scenario 1 — Set Reclassification - -A hotel reorders an existing competitive set hierarchy (e.g., `Set03 → Set01`). Detection occurs through `cs_id` validation. - -### Scenario 2 — Competitive Set Composition Change - -When actual members of the competitive set change: - -- a new competitive set record is created, -- historical continuity is preserved, -- prior benchmark relationships remain intact. - ---- - -## Common Failure Points - -| Issue | Impact | -| --- | --- | -| Missing `property_code` | Property matching failure | -| Invalid `cs_id` | Benchmark corruption | -| Partial report tabs | Incomplete metric expansion | -| Null percentage changes | Invalid LY calculations | -| Week logic inconsistency | Reporting drift | -| Duplicate comp set rows | Ranking distortion | - - - These conditions are monitored because hospitality benchmarking data rarely fails loudly. It usually fails quietly and expensively. - - ---- - -## Data Pipeline Flow - -```text -1. Ingest raw CoStar export -2. Normalize OCC / ADR / RevPAR tabs -3. Flatten into row-based structure -4. Apply competitive set segmentation -5. Expand LY, index, and ranking metrics -6. Validate against reference tables -7. Load into warehouse -``` - ---- - -## Warehouse Naming Standards - -| Object Type | Standard | -| --- | --- | -| Fact tables | `metrics_fact_*` | -| Views | `vw_metrics_*` | -| Snapshot tables | `snap_metrics_*` | -| Dimensions | `dim_*` | - -Examples: - -```text -metrics_fact_costar -vw_metrics_costar_daily -snap_metrics_costar -dim_property -dim_comp_set -``` \ No newline at end of file diff --git a/metrics-library/costar-processing-spec.mdx b/metrics-library/costar-processing-spec.mdx deleted file mode 100644 index 3aed30e..0000000 --- a/metrics-library/costar-processing-spec.mdx +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: "costar-processing-spec.mdx" -description: "Dimensional structure, benchmark relationships, derived field logic, and processing safeguards for CoStar reporting" ---- - -# CoStar Metrics Processing Specification - -## REVREBEL Metrics Library & Analytics Warehouse - -This document defines the dimensional structure, benchmark relationships, derived field logic, and processing safeguards used throughout the REVREBEL CoStar reporting framework. - -CoStar reporting is not transactional operational data. It is a structured benchmarking layer designed to measure market-relative performance over time. - ---- - -## Benchmarking Data Context - -STAR reporting evaluates hotel performance relative to a competitive market rather than in isolation. - -The dataset combines: - -- property-level metrics, -- competitive-set benchmarks, -- historical comparison periods, -- ranking behavior, -- and index calculations. - -### Processing Flow - -```text -1. Ingest raw CoStar export -2. Normalize OCC / ADR / RevPAR tabs -3. Flatten into row-based structure -4. Apply competitive set segmentation -5. Expand LY, index, and ranking metrics -6. Validate against reference tables -7. Load into warehouse -``` - ---- - -## Core Dimension & Derived Fields - -### Property & Benchmark Metadata - -| Field | Description | Notes | -| --- | --- | --- | -| `property_code` | Standardized property identifier | Preferred primary identifier | -| `property_name` | Standardized property name | | -| `property_shortname` | Operational short name | | -| `str_id` | STR / CoStar identifier | | -| `cs_id` | Competitive set identifier | Defines benchmark composition | -| `cs_no` | Competitive set grouping | `Set01`, `Set02`, `Set03` | -| `cs_reference` | External benchmark reference | | -| `brand` | Brand classification | Independent / Soft Brand / Chain | - -### Calendar & Reporting Fields - -| Field | Description | Notes | -| --- | --- | --- | -| `month` | Full month name | Current-year context | -| `cy` | Current year | `YYYY` | -| `ly` | Prior year | `YYYY` | -| `weekday` | Full weekday name | Current-year context | -| `week_no` | Hospitality week number | See week logic | -| `dow` | Day-of-week abbreviation | `SUN–SAT` | -| `date` | Reporting date | `YYYY-MM-DD` | -| `date_ly` | Prior-year reporting date | | -| `no_days` | Represented days | | - -### Capacity & Inventory Fields - -| Field | Description | -| --- | --- | -| `available_rms` | Property available room inventory | -| `cs_available_rms` | Competitive-set available room inventory | -| `physical_capacity` | Property physical capacity | -| `cs_physical_capacity` | Competitive-set physical capacity | - ---- - -## Competitive Set Structure - -Competitive sets are treated as independent benchmark contexts. Each comp set preserves historical continuity, ranking relationships, market positioning, and benchmark integrity. - -Suffix identifiers map internally as: - -```text -_1 → Set01 -_2 → Set02 -_3 → Set03 -``` - - - **Changing the benchmark changes the meaning of the data.** - - ---- - -## Derived Metric Logic - -### Room Production - -```text -rms = ROUND(occ * (available_rms / 100)) -``` - -### Revenue Production - -```text -rev = rms * adr -``` - -### Competitive Set Revenue - -```text -cs_rev = cs_rms * cs_adr -``` - ---- - -## Week Logic - -Week calculations follow hospitality reporting standards rather than ISO standards. - -- Week starts on Sunday -- Week 1 contains January 1 -- Each Sunday increments the week number - ---- - -## Benchmark Integrity Standards - -Competitive sets evolve over time: properties enter or exit, benchmark positioning changes, and reporting relationships shift. - -Rather than overwriting historical relationships, the warehouse maintains benchmark separation across reporting periods. - -**Benchmark history should remain historically accurate — not retroactively normalized.** - ---- - -## Operational Risk Conditions - -### Known Risk Conditions - -- Missing `property_code` in filenames -- Invalid STR / CoStar identifiers -- Competitive-set composition changes -- Partial tab-level report exports -- Division edge cases from `-100%` percent-change calculations -- Duplicate benchmark rows -- Week-number inconsistencies - ---- - -## Warehouse Design Philosophy - -The REVREBEL warehouse treats benchmarking data as a strategic intelligence layer rather than a transactional export. - -**The goal is not simply storing STAR reports. The goal is preserving the competitive story the data is trying to tell.** \ No newline at end of file diff --git a/metrics-library/costar_metrics_data_model__processing_specification.mdx b/metrics-library/costar_metrics_data_model__processing_specification.mdx new file mode 100644 index 0000000..664554e --- /dev/null +++ b/metrics-library/costar_metrics_data_model__processing_specification.mdx @@ -0,0 +1,244 @@ +# CoStar Metrics Processing Specification + +## REVREBEL Metrics Library & Analytics Warehouse + +This document defines the dimensional structure, benchmark relationships, derived field logic, and processing safeguards used throughout the REVREBEL CoStar reporting framework. + +The purpose of this specification is to preserve consistency across benchmarking, competitive-set analysis, reporting logic, and downstream analytics. + +CoStar reporting is not transactional operational data. + +It is a structured benchmarking layer designed to measure market-relative performance over time. + +That distinction drives how the warehouse models, validates, and processes the dataset. + + + +# Benchmarking Data Context + +STAR reporting evaluates hotel performance relative to a competitive market rather than in isolation. + +The dataset combines: +- property-level metrics, +- competitive-set benchmarks, +- historical comparison periods, +- ranking behavior, +- and index calculations. + +Because of this, the warehouse prioritizes: +- benchmark continuity, +- snapshot consistency, +- competitive-set integrity, +- and historical comparability. + +The structure exists to preserve how the market behaved at a specific point in time — not simply how the hotel performed operationally. + + +## Processing Flow + +```text +1. Ingest raw CoStar export +2. Normalize OCC / ADR / RevPAR tabs +3. Flatten into row-based structure +4. Apply competitive set segmentation +5. Expand LY, index, and ranking metrics +6. Validate against reference tables +7. Load into warehouse +``` + + +# Core Dimension & Derived Fields + +## Property & Benchmark Metadata + +| Field | Description | Notes | +|---|---|---| +| `property_code` | Standardized property identifier | Preferred primary identifier | +| `property_name` | Standardized property name | | +| `property_shortname` | Operational short name | | +| `str_id` | STR / CoStar identifier | | +| `cs_id` | Competitive set identifier | Defines benchmark composition | +| `cs_no` | Competitive set grouping | `Set01`, `Set02`, `Set03` | +| `cs_reference` | External benchmark reference | | +| `brand` | Brand classification | Independent / Soft Brand / Chain | + + + +## Calendar & Reporting Fields + +| Field | Description | Notes | +|---|---|---| +| `month` | Full month name | Current-year context | +| `month_name` | Full month name | Prior-year context | +| `cy` | Current year | `YYYY` | +| `ly` | Prior year | `YYYY` | +| `weekday` | Full weekday name | Current-year context | +| `weekday_ly` | Full weekday name | Prior-year context | +| `week_no` | Hospitality week number | See week logic | +| `week_no_ly` | Prior-year week number | | +| `dow` | Day-of-week abbreviation | `SUN–SAT` | +| `day` | Day with leading zero | `01–31` | +| `date` | Reporting date | `YYYY-MM-DD` | +| `date_ly` | Prior-year reporting date | | +| `no_days` | Represented days | | +| `no_days_ly` | Prior-year represented days | | + + + +## Capacity & Inventory Fields + +| Field | Description | +|---|---| +| `available_rms` | Property available room inventory | +| `cs_available_rms` | Competitive-set available room inventory | +| `physical_capacity` | Property physical capacity | +| `cs_physical_capacity` | Competitive-set physical capacity | + +These fields support occupancy calculations, benchmark alignment, and derived revenue metrics. + + + +# Competitive Set Structure + +Competitive sets are treated as independent benchmark contexts. + +Each comp set preserves: +- historical continuity, +- ranking relationships, +- market positioning, +- and benchmark integrity. + +Suffix identifiers: + +```text +_1 +_2 +_3 +``` + + +Map internally as: + +```text +Set01 +Set02 +Set03 +``` + +The warehouse intentionally separates comp-set contexts rather than flattening them into a single benchmark layer. + +**Changing the benchmark changes the meaning of the data.** + + + +# Derived Metric Logic + +Derived metrics preserve standardized hospitality reporting calculations. + +## Room Production + +```text +rms = ROUND(occ * (available_rms / 100)) +``` + +## Revenue Production + +```text +rev = rms * adr +``` + +## Competitive Set Revenue + +```text +cs_rev = cs_rms * cs_adr +``` + +These calculations maintain reproducibility across: +- occupancy, +- ADR, +- RevPAR, +- benchmark indexes, +- and historical comparisons. + + +# Week Logic + +Week calculations follow hospitality reporting standards rather than ISO standards. + +- Week starts on Sunday +- Week 1 contains January 1 +- Each Sunday increments the week number + +This aligns warehouse reporting behavior with standard STR operational reporting. + + + +# Benchmark Integrity Standards + +The warehouse structure preserves competitive-set continuity historically. + +Competitive sets evolve over time: +- properties enter or exit, +- benchmark positioning changes, +- and reporting relationships shift. + +Rather than overwriting historical relationships, the warehouse maintains benchmark separation across reporting periods. + +This preserves: +- historical index accuracy, +- prior ranking logic, +- and market-relative reporting continuity. + +**Benchmark history should remain historically accurate — not retroactively normalized.** + + + +# Operational Risk Conditions + +The processing framework monitors several known hospitality benchmarking edge cases. + +## Known Risk Conditions + +- Missing `property_code` in filenames +- Invalid STR / CoStar identifiers +- Competitive-set composition changes +- Partial tab-level report exports +- Division edge cases from `-100%` percent-change calculations +- Duplicate benchmark rows +- Week-number inconsistencies + +These conditions are monitored because hospitality benchmarking data tends to fail quietly rather than obviously and quiet failures usually become expensive reporting conversations later. + + + +# Processing Safeguards + +The warehouse processing framework preserves benchmark integrity through: + +- standardized property matching, +- comp-set validation, +- null-safe prior-year calculations, +- benchmark version continuity, +- ingestion logging, +- and reporting-layer validation. + +**The objective is reporting stability across dashboards, forecasts, automation workflows, and downstream analytics.** + + + +# Warehouse Design Philosophy + +The REVREBEL warehouse treats benchmarking data as a strategic intelligence layer rather than a transactional export. + +The structure is intentionally designed so: +- schemas remain predictable, +- benchmark relationships remain traceable, +- competitive positioning remains historically accurate, +- and downstream systems can interpret market behavior consistently. + +The goal is not simply storing STAR reports. + +**The goal is preserving the competitive story the data is trying to tell.** + + + diff --git a/metrics-library/costar_metrics_data_model_processing_guide.mdx b/metrics-library/costar_metrics_data_model_processing_guide.mdx new file mode 100644 index 0000000..c4f0d5e --- /dev/null +++ b/metrics-library/costar_metrics_data_model_processing_guide.mdx @@ -0,0 +1,401 @@ +# CoStar STAR Report Processing Standards + +## REVREBEL Metrics Library & Analytics Warehouse + +This document defines the processing standards, schema structure, validation logic, and benchmarking methodology used for CoStar (STR) performance reporting inside the REVREBEL analytics warehouse. + +The objective is not simply ingesting STAR reports. The objective is preserving competitive context, benchmark integrity, and historical market behavior across operational reporting, forecasting, and performance analysis. + + + +# Understanding STAR Reports & Benchmarking Data + +The STAR Report, provided by STR (a CoStar Group company), is the hospitality industry’s standard benchmarking framework for measuring hotel performance against a defined competitive set. + +Rather than evaluating performance in isolation, STAR reporting measures how a hotel performs relative to its market across occupancy, rate, and revenue efficiency. + +For hotel operators, ownership groups, asset managers, and revenue teams, it functions as both a market scorecard and a competitive positioning system. + +
+ +The report centers around three primary performance metrics: + +| Metric | Description | +|---|---| +| Occupancy (`occ`) | Percentage of available rooms sold | +| ADR (`adr`) | Average Daily Rate | +| RevPAR (`revpar`) | Revenue Per Available Room | + +These metrics are benchmarked against a selected competitive set (“compset”) to generate industry-standard index metrics: + +| Index | Definition | +|---|---| +| MPI | Occupancy Index vs Competitive Set | +| ARI | ADR Index vs Competitive Set | +| RGI | RevPAR Index vs Competitive Set | + +An index score of: + +- `100` represents fair market share +- `>100` indicates market outperformance +- `<100` indicates underperformance relative to the comp set + + +## How the Report is Generally Used +**Hotels use STAR reporting to:** +- evaluate market share, +- monitor pricing strategy, +- assess revenue efficiency, +- benchmark competitive positioning, +- and identify where performance is gaining or losing ground. + +Unlike transactional PMS data, STAR reporting is observational and benchmark-driven by nature. + +**It contains:** +- market-relative positioning, +- competitive rankings, +- snapshot-based comparisons, +- prior-period benchmarking, +- index calculations, +- and comp set relationships that do not always reconcile directly to operational room-night production. + +Because of this, the REVREBEL warehouse models CoStar data as a benchmarking and market-intelligence layer rather than a transactional operational dataset. + +**The objective is not simply storing performance metrics. The objective is preserving competitive context.** + + + +# Data Model Philosophy + +CoStar data behaves differently than PMS operational reporting. + +**Operational systems primarily record transactions:** +- reservations, +- room nights, +- revenue production, +- guest activity, +- and inventory movement. + +STAR reporting evaluates market-relative performance. That distinction matters. + +**The warehouse structure intentionally separates benchmarking logic from operational production data to preserve:** + +- historical benchmark continuity, +- comp set integrity, +- market-relative calculations, +- ranking behavior, +- snapshot comparisons, +- and forecasting consistency. + +Competitive sets evolve over time. Market positioning changes. Benchmarks shift. The warehouse structure is designed to preserve those changes historically rather than flatten or overwrite them. + +**Changing the benchmark changes the meaning of the data.** + + + +# Core Data Fields +This section defines the required schema, processing logic, and data handling standards for ingesting and transforming CoStar (STR) performance reports into the `metrics_fact_costar` dataset. + +## Property & Metadata Fields + +| Field | Description | +|---|---| +| `property_code` | Standardized property identifier used across REVREBEL systems | +| `property_name` | Standardized property name | +| `property_shortname` | Short-form operational property name | +| `str_id` | STR / CoStar property identifier | +| `cs_id` | Competitive set identifier | +| `cs_no` | Competitive set grouping (`Set01`, `Set02`, `Set03`) | +| `cs_reference` | External competitive set reference | +| `brand` | Brand classification | + + + +## Calendar & Date Fields + +| Field | Format / Logic | +|---|---| +| `month` | Full month name | +| `month_name` | Prior-year month name | +| `cy` | Current year (`YYYY`) | +| `ly` | Prior year (`YYYY`) | +| `weekday` | Full weekday name | +| `weekday_ly` | Prior-year weekday | +| `week_no` | Numeric week number | +| `week_no_ly` | Prior-year week number | +| `dow` | Day-of-week abbreviation (`SUN–SAT`) | +| `day` | Day with leading zero (`01–31`) | +| `date` | Daily reporting grain (`YYYY-MM-DD`) | +| `date_ly` | Prior-year reporting date | +| `no_days` | Number of represented days | +| `no_days_ly` | Prior-year represented days | + + + +# Core Metric Structure + +## Primary Metrics + +```text +occ +adr +revpar +``` + +These metrics serve as the foundational benchmarking layer across property, competitive set, and industry performance reporting. + + + +# Metric Expansion Logic + +## Base Metric Structure + +```text +{metric} +{metric}_ly +cs_{metric} +cs_{metric}_ly +industry_{metric} +industry_{metric}_ly +``` + +## Percent Change Structure + +```text +{metric}_pct_chg +cs_{metric}_pct_chg +industry_{metric}_pct_chg +``` + +## Index Structure + +```text +{metric}_index +{metric}_index_ly +cs_{metric}_index +industry_{metric}_index +``` + +## Ranking Structure + +```text +{metric}_rank +{metric}_pct_chg_rank +``` + +The warehouse structure keeps related metric families grouped together logically while preserving semantic consistency across reporting layers. + + + +# Derived Metrics + +## Capacity & Revenue Calculations + +| Field | Formula | +|---|---| +| `rms` | `ROUND(occ * (available_rms / 100))` | +| `rms_ly` | `ROUND(occ_ly * (available_rms_ly / 100))` | +| `cs_rms` | `ROUND(cs_occ * (cs_available_rms / 100))` | +| `cs_rms_ly` | `ROUND(cs_occ_ly * (cs_available_rms_ly / 100))` | +| `rev` | `rms * adr` | +| `rev_ly` | `rms_ly * adr_ly` | +| `cs_rev` | `cs_rms * cs_adr` | +| `cs_rev_ly` | `cs_rms_ly * cs_adr_ly` | + + + +# Competitive Set Handling + +STAR reports may contain multiple competitive sets. + +Competitive sets are identified using suffix-based segmentation: + +```text +_1 +_2 +_3 +``` + +Mapped internally as: + +```text +Set01 +Set02 +Set03 +``` + +Each competitive set: +- processes independently, +- maintains separate benchmark continuity, +- preserves historical positioning, +- and supports independent ranking and index calculations. + +The warehouse intentionally treats comp sets as independent benchmarking contexts rather than interchangeable reporting dimensions. + + + +# Property Identification Logic + +Property identification follows a structured fallback hierarchy designed to maintain ingestion reliability and benchmark continuity. + +## Primary Identification Method + +Extract `property_code` from the filename. + +Example: + +```text +MSPHEH +``` + + + +## Secondary Identification Method + +Extract STR / CoStar identifier (`str_id`). + +``` +cs_id = STR IDs concatenated with hyphens +``` + +![alt](https://raw.githubusercontent.com/REVREBEL/Metrics-Library/main/assets/images/costar_compset_id_extraction_example.png) + + +Example: +``` +65206-54429-55653-44555-56751-39388 +``` + +> This uniquely defines the comp set composition. + + + + +## Tertiary Identification Method + +Match against standardized `property_name`. + +Once identified, the property reference becomes the session-level lookup key throughout processing and validation. + + + +# Validation Standards + +Every processed row validates against the standardized CoStar reference layer. + +Validation includes: +- property alignment, +- competitive set alignment, +- capacity consistency, +- metadata integrity, +- ranking continuity, +- and index calculation stability. + +This validation layer exists to prevent silent reporting drift across dashboards and downstream analytics. + +Because benchmarking data has a remarkable ability to fail quietly right before someone screenshots it for ownership. + + + +# Competitive Set Change Scenarios + +## Scenario 1 — Set Reclassification + +A hotel reorders an existing competitive set hierarchy. + +Example: + +```text +Set03 → Set01 +``` + +This changes benchmark relationships and index interpretation. + +Detection occurs through `cs_id` validation. + + + +## Scenario 2 — Competitive Set Composition Change + +The actual members of the competitive set change. + +When this occurs: +- a new competitive set record is created, +- historical continuity is preserved, +- prior benchmark relationships remain intact, +- and reporting logic maintains separation between benchmark versions. + +Benchmark history remains historically accurate rather than retroactively rewritten. + + + +# Week Logic + +Week structure follows hospitality reporting standards rather than ISO week standards. + +- Week starts on Sunday +- Week 1 contains January 1 +- Each Sunday increments the week number + +This aligns reporting behavior with standard STR operational reporting. + + + +# Warehouse Naming Standards + +The CoStar reporting pipeline follows the REVREBEL warehouse naming framework. + +| Object Type | Standard | +|---|---| +| Fact tables | `metrics_fact_*` | +| Views | `vw_metrics_*` | +| Snapshot tables | `snap_metrics_*` | +| Dimensions | `dim_*` | + +Examples: + +```text +metrics_fact_costar +vw_metrics_costar_daily +snap_metrics_costar +dim_property +dim_comp_set +``` + + + +# Data Pipeline Flow + +```text +1. Ingest raw CoStar export +2. Normalize OCC / ADR / RevPAR tabs +3. Flatten into row-based structure +4. Apply competitive set segmentation +5. Expand LY, index, and ranking metrics +6. Validate against reference tables +7. Load into warehouse +``` + +The pipeline prioritizes: +- reproducibility, +- benchmark integrity, +- traceability, +- semantic consistency, +- and reporting stability. + + + +# Common Failure Points + +| Issue | Impact | +|---|---| +| Missing `property_code` | Property matching failure | +| Invalid `cs_id` | Benchmark corruption | +| Partial report tabs | Incomplete metric expansion | +| Null percentage changes | Invalid LY calculations | +| Week logic inconsistency | Reporting drift | +| Duplicate comp set rows | Ranking distortion | + +These conditions are monitored because hospitality benchmarking data rarely fails loudly. It usually fails quietly and expensively. + + diff --git a/metrics-library/demand-table-models.mdx b/metrics-library/demand-table-models.mdx index aadff26..5ad0d82 100644 --- a/metrics-library/demand-table-models.mdx +++ b/metrics-library/demand-table-models.mdx @@ -1,6 +1,8 @@ --- -title: "demand-table-models.mdx" -description: "Demand data table family for market signal, compset benchmarking, and occupancy index analysis" +title: Demand Table Models +nav_order: 7 +has_toc: true +permalink: /demand-table-models/ --- # Demand Table Models @@ -9,17 +11,13 @@ Demand data has its own table family because it does not behave like PMS actuals The demand model supports market demand analysis, occupancy index review, ADR and RevPAR rank tracking, comp-set benchmarking, market-excluding-property comparisons, and demand variance analysis. ---- - ## How We View Demand Data -We intentionally separate demand data from pace and actual performance datasets because demand behaves differently. - -Demand data is observational by nature. It includes market and compset intelligence, occupancy indexing, ADR and RevPAR rankings, prior-period comparisons, and market-level performance metrics. +We intentionally separate demand data from pace and actual performance datasets because demand behaves differently — and forcing it into operational fact tables creates more confusion than clarity. -By structuring demand as its own dataset, we preserve the integrity of operational reporting while creating a cleaner framework for forecasting, benchmarking, and strategic analysis. +Demand data is observational by nature. It includes market and compset intelligence, occupancy indexing, ADR and RevPAR rankings, prior-period comparisons, and market-level performance metrics that don’t always reconcile directly to PMS revenue or room-night production. It also captures snapshot-based competitive positioning data that reflects market movement at a specific point in time. ---- +By structuring demand as its own dataset, we preserve the integrity of operational reporting while creating a cleaner framework for forecasting, benchmarking, and strategic analysis. It allows us to evaluate not just how the hotel performed — but how the market behaved around it. ## Demand Table Family @@ -31,34 +29,30 @@ snap_demand_segment snap_demand_source ``` - - The source report often named `Demand Channel` maps to `snap_demand_source`. In the REVREBEL model, `source` represents the distribution-oriented view of demand, while `channel` remains available as a standardized grouping when the data supports it. - - ---- +The source report often named `Demand Channel` maps to `snap_demand_source`. In the REVREBEL model, `source` represents the distribution-oriented view of demand, while `channel` remains available as a standardized grouping when the data supports it. ## Naming Standard Notes +Demand tables follow the same column naming standards as the rest of the Metrics Library. + | Source Pattern | Standard Pattern | Usage | -| --- | --- | --- | +|---|---|---| | `snapshot_date` | `snap_date` | Point-in-time date when the demand observation was captured. | | `stay_date` or source `date` | `stay_date` | Hotel stay / occupancy / service date. | | `room_nights` | `rms` | Room nights or rooms sold. | -| `compset` | `cs` | Competitive set. | +| `compset` | `cs` | Competitive set. Short, readable, and mercifully not thirty characters long. | | `prior_year` | `ly` | Prior-year comparison. | | `prior_week` | `lw` | Prior-week comparison. | | `chg` | `chg` | Absolute change / variance. | | `pct_chg` | `pct_chg` | Percent change. | | `market_excl` | `market_excl` | Market excluding the subject property. | | `booking_source` | `source` | Source-oriented distribution cut. | -| `etl_date` | `etl_date` | Technical ETL timestamp. | - ---- +| `etl_date` | `etl_date` | Technical ETL timestamp when it needs to be preserved from the source. | ## Table List | Table | Purpose | -| --- | --- | +|---|---| | `snap_demand_property` | Property-level demand, market, compset, index, and rank metrics. | | `snap_demand_segment` | Segment-level demand, market, compset, index, rank, and variance metrics. | | `snap_demand_source` | Source/channel-level demand, market, compset, index, rank, and variance metrics. | @@ -69,25 +63,51 @@ snap_demand_source ### Grain +The property demand table is keyed at: + ```text property_code + snap_date + stay_date + market_segment + segment ``` +When the source data is truly property-wide and `market_segment` or `segment` are null, total, or not supplied, the effective grain becomes: + +```text +property_code + snap_date + stay_date +``` + ### Standardized Columns | Source Field | Standard Field | Type | Notes | -| --- | --- | --- | --- | +|---|---|---|---| | `market_segment` | `market_segment` | STRING | Market segment from the demand source. | +| `segment` | `segment` | STRING | Standard or source segment value, depending on mapping maturity. | +| `month` | `month` | DATE | Month value normalized to the first day of month. | | `stay_date`, `date` | `stay_date` | DATE | Stay date for the demand observation. | | `compset_rooms_sold` | `cs_rms_sold` | FLOAT64 | Competitive-set rooms sold. | | `occ_index` | `occ_index` | FLOAT64 | Property occupancy index versus compset or market. | -| `occ_rank` | `occ_rank` | STRING | Occupancy rank. | +| `occ_rank` | `occ_rank` | STRING | Occupancy rank. Cast in views if the source is consistently numeric. | | `property_adr` | `adr` | FLOAT64 | Property ADR. | | `compset_adr` | `cs_adr` | FLOAT64 | Competitive-set ADR. | -| `adr_rank` | `adr_rank` | STRING | ADR rank. | -| `revpar_rank` | `revpar_rank` | STRING | RevPAR rank. | +| `adr_rank` | `adr_rank` | STRING | ADR rank. Cast in views if the source is consistently numeric. | +| `revpar_rank` | `revpar_rank` | STRING | RevPAR rank. Cast in views if the source is consistently numeric. | +| `property_occ_yoy` | `occ_yoy` | FLOAT64 | Property occupancy year-over-year value or change as provided by source. | +| `compset_occ_yoy` | `cs_occ_yoy` | FLOAT64 | Competitive-set occupancy year-over-year value or change as provided by source. | +| `occ_index_yoy` | `occ_index_yoy` | FLOAT64 | Occupancy index year-over-year value or change. | +| `property_adr_yoy` | `adr_yoy` | FLOAT64 | Property ADR year-over-year value or change. | +| `compset_adr_yoy` | `cs_adr_yoy` | FLOAT64 | Competitive-set ADR year-over-year value or change. | | `snapshot_date` | `snap_date` | DATE | Demand snapshot date. | | `property_code` | `property_code` | STRING | Property code. | +| `etl_date` | `etl_date` | TIMESTAMP | Technical ETL timestamp from the source process. | + +### Metadata Columns + +| Column | Type | Notes | +|---|---|---| +| `source_system` | STRING | Source system or provider. | +| `source_report` | STRING | Source report or feed name. | +| `source_file` | STRING | Source file path or name. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | --- @@ -95,22 +115,61 @@ property_code + snap_date + stay_date + market_segment + segment ### Grain +The segment demand table is keyed at: + ```text property_code + snap_date + stay_date + market_segment + segment_detail ``` +`segment_detail` preserves the source detail field. Where mapping exists, it resolves into the standard segment model using `segment`, `segment_code`, and related segment attributes. + ### Standardized Columns | Source Field | Standard Field | Type | Notes | -| --- | --- | --- | --- | +|---|---|---|---| +| `month` | `month` | DATE | Month value normalized to the first day of month. | | `date`, `stay_date` | `stay_date` | DATE | Demand observation date / stay date. | | `occ` | `occ` | FLOAT64 | Property occupancy. | | `compset_occ` | `cs_occ` | FLOAT64 | Competitive-set occupancy. | -| `occ_index` | `occ_index` | FLOAT64 | Property occupancy index. | -| `room_nights_current_my_hotel_totals` | `rms` | INT64 | Current property room nights. | +| `occ_index` | `occ_index` | FLOAT64 | Property occupancy index versus compset or market. | +| `occ_rank` | `occ_rank` | STRING | Occupancy rank. Cast in views if the source is consistently numeric. | +| `occ_index_vs_prior_year_pct` | `occ_index_pct_chg_ly` | FLOAT64 | Occupancy index percent change versus prior year. | +| `occ_index_chg_vs_prior_week_pct` | `occ_index_pct_chg_lw` | FLOAT64 | Occupancy index percent change versus prior week. | +| `room_nights_current_my_hotel_totals` | `rms` | INT64 | Current property room nights / rooms sold. | +| `room_nights_chg_from_last_wk_my_hotel_totals` | `rms_chg_lw` | INT64 | Property room nights change versus last week. | +| `room_nights_var_pct_to_last_yr_my_hotel_totals` | `rms_pct_chg_ly` | FLOAT64 | Property room nights percent variance versus last year. | +| `room_nights_var_pct_to_last_yr_market_excl_totals` | `market_excl_rms_pct_chg_ly` | FLOAT64 | Market-excluding-property room nights percent variance versus last year. | +| `room_nights_chg_pct_from_last_wk_my_hotel_totals` | `rms_pct_chg_lw` | FLOAT64 | Property room nights percent change versus last week. | +| `room_nights_chg_pct_from_last_wk_market_excl_totals` | `market_excl_rms_pct_chg_lw` | FLOAT64 | Market-excluding-property room nights percent change versus last week. | | `adr` | `adr` | FLOAT64 | Property ADR. | +| `adr_rank` | `adr_rank` | FLOAT64 | ADR rank. | | `revpar` | `revpar` | FLOAT64 | Property RevPAR. | +| `revpar_rank` | `revpar_rank` | FLOAT64 | RevPAR rank. | +| `market_segment` | `market_segment` | STRING | Market segment from source. | +| `detail` | `segment_detail` | STRING | Source detail field. | +| `compset_no` | `cs_no` | INT64 | Competitive-set number or identifier. | | `snapshot_date` | `snap_date` | DATE | Demand snapshot date. | +| `property_code` | `property_code` | STRING | Property code. | + +### Segment Mapping Columns + +| Column | Type | Notes | +|---|---|---| +| `segment` | STRING | Standard segment, when mapped. | +| `segment_code` | STRING | Standard segment code, when mapped. | +| `segment_category` | STRING | Standard segment category, when mapped. | +| `segment_map` | STRING | Source segment/detail value. | +| `segment_code_map` | STRING | Source segment code, when supplied. | + +### Metadata Columns + +| Column | Type | Notes | +|---|---|---| +| `source_system` | STRING | Source system or provider. | +| `source_report` | STRING | Source report or feed name. | +| `source_file` | STRING | Source file path or name. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | --- @@ -118,26 +177,78 @@ property_code + snap_date + stay_date + market_segment + segment_detail ### Grain +The source demand table is keyed at: + ```text property_code + snap_date + stay_date + source ``` +Source values can be mapped into the broader source/channel model as the library matures. + ### Standardized Columns | Source Field | Standard Field | Type | Notes | -| --- | --- | --- | --- | +|---|---|---|---| +| `month` | `month` | DATE | Month value normalized to the first day of month. | | `booking_source` | `source` | STRING | Booking source from the source table. | | `date`, `stay_date` | `stay_date` | DATE | Demand observation date / stay date. | | `occ` | `occ` | FLOAT64 | Property occupancy. | -| `occ_index` | `occ_index` | FLOAT64 | Property occupancy index. | +| `compset_occ` | `cs_occ` | FLOAT64 | Competitive-set occupancy. | +| `occ_index` | `occ_index` | FLOAT64 | Property occupancy index versus compset or market. | +| `occ_rank` | `occ_rank` | STRING | Occupancy rank. Cast in views if the source is consistently numeric. | +| `occ_index_vs_prior_year_pct` | `occ_index_pct_chg_ly` | FLOAT64 | Occupancy index percent change versus prior year. | +| `occ_index_chg_vs_prior_week_pct` | `occ_index_pct_chg_lw` | FLOAT64 | Occupancy index percent change versus prior week. | +| `room_nights_current_my_hotel_totals` | `rms` | INT64 | Current property room nights / rooms sold. | +| `room_nights_var_pct_to_last_yr_my_hotel_totals` | `rms_pct_chg_ly` | FLOAT64 | Property room nights percent variance versus last year. | +| `room_nights_var_pct_to_last_yr_market_excl_totals` | `market_excl_rms_pct_chg_ly` | FLOAT64 | Market-excluding-property room nights percent variance versus last year. | +| `room_nights_chg_from_last_wk_my_hotel_totals` | `rms_chg_lw` | INT64 | Property room nights change versus last week. | +| `room_nights_chg_pct_from_last_wk_my_hotel_totals` | `rms_pct_chg_lw` | FLOAT64 | Property room nights percent change versus last week. | +| `room_nights_chg_pct_from_last_wk_market_excl_totals` | `market_excl_rms_pct_chg_lw` | FLOAT64 | Market-excluding-property room nights percent change versus last week. | | `adr` | `adr` | FLOAT64 | Property ADR. | +| `adr_rank` | `adr_rank` | FLOAT64 | ADR rank. | | `revpar` | `revpar` | FLOAT64 | Property RevPAR. | +| `revpar_rank` | `revpar_rank` | FLOAT64 | RevPAR rank. | +| `compset_no` | `cs_no` | INT64 | Competitive-set number or identifier. | | `snapshot_date` | `snap_date` | DATE | Demand snapshot date. | +| `property_code` | `property_code` | STRING | Property code. | + +### Source and Channel Mapping Columns + +| Column | Type | Notes | +|---|---|---| +| `source` | STRING | Standard source. | +| `source_code` | STRING | Standard source code, when mapped. | +| `source_group` | STRING | Standard source group, when mapped. | +| `source_group_code` | STRING | Standard source group code, when mapped. | +| `channel` | STRING | Standard channel. | +| `channel_code` | STRING | Standard channel code. | +| `source_map` | STRING | Source booking source value. | +| `source_code_map` | STRING | Source booking source code, when supplied. | + +### Metadata Columns + +| Column | Type | Notes | +|---|---|---| +| `source_system` | STRING | Source system or provider. | +| `source_report` | STRING | Source report or feed name. | +| `source_file` | STRING | Source file path or name. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | --- ## BigQuery DDL Pattern +The Dataform implementation creates these tables in the demand dataset using the same naming logic: + +```text +metrics_demand.snap_demand_property +metrics_demand.snap_demand_segment +metrics_demand.snap_demand_source +``` + +Example pattern: + ```sql CREATE TABLE IF NOT EXISTS `PROJECT_ID.metrics_demand.snap_demand_property` ( property_code STRING OPTIONS(description="Property code."), @@ -145,7 +256,7 @@ CREATE TABLE IF NOT EXISTS `PROJECT_ID.metrics_demand.snap_demand_property` ( stay_date DATE OPTIONS(description="Stay date for the demand observation."), month DATE OPTIONS(description="Month value normalized to the first day of month."), market_segment STRING OPTIONS(description="Market segment from the demand source."), - segment STRING OPTIONS(description="Standard or source segment value."), + segment STRING OPTIONS(description="Standard or source segment value, depending on mapping maturity."), cs_no STRING OPTIONS(description="Competitive set number or identifier."), cs_rms_sold FLOAT64 OPTIONS(description="Competitive-set rooms sold."), occ_index FLOAT64 OPTIONS(description="Property occupancy index versus compset or market."), @@ -154,6 +265,15 @@ CREATE TABLE IF NOT EXISTS `PROJECT_ID.metrics_demand.snap_demand_property` ( cs_adr FLOAT64 OPTIONS(description="Competitive-set ADR."), adr_rank STRING OPTIONS(description="ADR rank."), revpar_rank STRING OPTIONS(description="RevPAR rank."), + occ_yoy FLOAT64 OPTIONS(description="Property occupancy year-over-year value or change as provided by the source."), + cs_occ_yoy FLOAT64 OPTIONS(description="Competitive-set occupancy year-over-year value or change as provided by the source."), + occ_index_yoy FLOAT64 OPTIONS(description="Occupancy index year-over-year value or change."), + adr_yoy FLOAT64 OPTIONS(description="Property ADR year-over-year value or change."), + cs_adr_yoy FLOAT64 OPTIONS(description="Competitive-set ADR year-over-year value or change."), + etl_date TIMESTAMP OPTIONS(description="ETL timestamp/date from source process."), + source_system STRING OPTIONS(description="Source system or provider."), + source_report STRING OPTIONS(description="Source report or feed name."), + source_file STRING OPTIONS(description="Source file path or name."), insert_date DATE OPTIONS(description="Insert date."), updated_date DATE OPTIONS(description="Updated date.") ) @@ -162,8 +282,6 @@ CLUSTER BY property_code, stay_date OPTIONS(description="Property-level demand, compset, index, and rank metrics."); ``` ---- - ## Relationship to Other Tables Demand connects to property, event, price, and pace data by: @@ -172,6 +290,18 @@ Demand connects to property, event, price, and pace data by: property_code + stay_date ``` +Demand segment connects by: + +```text +property_code + stay_date + segment / segment_code +``` + +Demand source connects by: + +```text +property_code + stay_date + source / source_code / channel +``` + BI-facing demand views use the `vw_` prefix: ```text @@ -180,13 +310,11 @@ vw_demand_segment vw_demand_source ``` ---- - ## Operating Notes -1. Rank fields arrive as strings in some sources and numbers in others. Preserve the source-compatible type in snapshot tables and cast in BI views when values are consistently numeric. -2. `month` is stored as a DATE using the first day of the month. +1. Rank fields arrive as strings in some sources and numbers in others. Preserve the source-compatible type in snapshot tables and cast in BI views when the values are consistently numeric. +2. `month` is stored as a DATE using the first day of the month. This avoids the slow-motion chaos of mixing strings, datetimes, and calendar labels. 3. `snapshot_date` standardizes to `snap_date`. 4. `date` or `stay_date` standardizes to `stay_date`. -5. Demand Channel maps to `snap_demand_source`, not `snap_demand_channel`. -6. `market_excl` means market excluding the subject property. \ No newline at end of file +5. Demand Channel maps to `snap_demand_source`, not `snap_demand_channel`, because source is the standard distribution cut in the library. +6. `market_excl` means market excluding the subject property. diff --git a/metrics-library/events-table-model.mdx b/metrics-library/events-table-model.mdx index 0da1032..043f010 100644 --- a/metrics-library/events-table-model.mdx +++ b/metrics-library/events-table-model.mdx @@ -1,8 +1,3 @@ ---- -title: "events-table-model.mdx" -description: "Event modeling framework used throughout the Metrics Platform" ---- - # Events Table Model ## Metrics Platform @@ -11,49 +6,116 @@ This document defines the event-modeling framework used throughout the Metrics P Events are modeled as a dedicated contextual intelligence layer rather than embedded as free-form text fields inside pace, actual, pickup, forecast, or pricing fact tables. -**Events influence hotel performance. They are not performance metrics themselves — and that distinction drives how the warehouse models event data.** +The objective is preserving reusable event context across: +- forecasting, +- demand analysis, +- compression tracking, +- pricing analysis, +- operational reporting, +- and BI workflows. + +**Events influence hotel performance. They are not performance metrics themselves and that distinction drives how the warehouse models event data.** + ---- -## Event Modeling Philosophy +# Event Modeling Philosophy -Hospitality demand does not exist in isolation. Performance is constantly influenced by conventions, concerts, citywide events, sporting events, holidays, weather events, renovations, closures, and market disruptions. +Hospitality demand does not exist in isolation. + +Performance is constantly influenced by: +- conventions, +- concerts, +- citywide events, +- sporting events, +- holidays, +- weather events, +- renovations, +- closures, +- and market disruptions. + +These conditions create operational context around: +- occupancy, +- pricing, +- booking pace, +- compression behavior, +- and market demand. Because of this, the Metrics Platform models events as a reusable contextual layer that can integrate across multiple reporting datasets without duplicating event text across operational fact tables. ---- +**The objective is not simply attaching notes to dates, the objective is preserving the market conditions surrounding hotel performance.** + -## Event Architecture Structure + +# Event Architecture Structure + +The event framework uses two core tables: | Table | Purpose | -| --- | --- | +|---|---| | `dim_event` | Master event calendar and standardized event metadata | | `bridge_property_event_date` | Property/date-level event relationship layer | This structure allows: - - one event to impact multiple properties, - multiple events to affect the same date, - market-wide event analysis, - property-specific impact classification, - and reusable event intelligence across reporting layers. ---- -## `dim_event` -### Table Purpose +# Why Events Remain Separate + +Events behave differently than transactional or additive operational metrics. + +They are contextual drivers that explain changes in demand and performance behavior. + +Separating events from operational fact tables preserves: +- reporting clarity, +- event reusability, +- historical context, +- and dimensional flexibility. + +The structure supports: + +1. One event affecting multiple properties and multiple dates +2. Multiple simultaneous events impacting the same market +3. Shared event intelligence across dashboards, forecasting, and pricing workflows +4. Start and end date ranges +5. Event categories and impact classifications +6. Property-specific event relationships +7. Market-wide compression analysis +8. Operational overlays such as closures and renovations + +The warehouse keeps pace, actual, pickup, and pricing datasets focused on measurable operational performance while events provide the contextual layer around those metrics. + + + +# `dim_event` + +## Table Purpose `dim_event` stores the centralized event calendar and standardized event metadata used throughout the Metrics Platform. -### Table Grain +This table acts as the master event reference layer for: +- demand analysis, +- pricing analysis, +- compression tracking, +- forecasting, +- and BI reporting. + + + +## Table Grain One row per unique event. -### Core Event Fields + + +## Core Event Fields | Column | Type | Notes | -| --- | --- | --- | +|---|---|---| | `event_id` | STRING | Unique event identifier | | `event` | STRING | Standardized event name | | `event_category` | STRING | Standardized event classification | @@ -66,20 +128,34 @@ One row per unique event. | `state` | STRING | State or province | | `country` | STRING | Country | | `event_notes` | STRING | Supporting operational notes | +| `source_system` | STRING | Source system or imported source | +| `source_report` | STRING | Source report or feed | +| `source_file` | STRING | Imported source file reference | | `insert_date` | DATE | Insert timestamp | | `updated_date` | DATE | Last update timestamp | ---- -## `bridge_property_event_date` -### Table Purpose +# `bridge_property_event_date` + +## Table Purpose `bridge_property_event_date` connects events to specific properties and dates. -A citywide convention may strongly impact one property while creating minimal demand movement for another depending on location, segment mix, pricing strategy, brand positioning, and proximity to demand generators. +This bridge layer exists because events rarely affect every hotel equally. + +A citywide convention may strongly impact one property while creating minimal demand movement for another depending on: +- location, +- segment mix, +- pricing strategy, +- brand positioning, +- and proximity to demand generators. -### Table Grain +The bridge structure preserves those differences. + + + +## Table Grain One row per: @@ -87,10 +163,12 @@ One row per: property_code + date + event_id ``` -### Core Bridge Fields + + +## Core Bridge Fields | Column | Type | Notes | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property impacted by the event | | `date` | DATE | Impacted reporting date | | `event_id` | STRING | Linked event identifier | @@ -103,12 +181,20 @@ property_code + date + event_id | `insert_date` | DATE | Insert timestamp | | `updated_date` | DATE | Last update timestamp | ---- -## Standard Event Categories + +# Standard Event Categories + +Event categories use standardized classifications to maintain reporting consistency across operational systems and dashboards. + +Use field: + +```text +event_category +``` | Value | -| --- | +|---| | `Convention` | | `Holiday` | | `Observance` | @@ -123,12 +209,20 @@ property_code + date + event_id | `Special Event` | | `Natural Event` | ---- -## Standard Event Impact Levels + +# Standard Event Impact Levels + +Event impact classifications standardize operational demand expectations. + +Use field: + +```text +event_impact +``` | Value | -| --- | +|---| | `Citywide Sellout` | | `Citywide Limited` | | `High Impact` | @@ -139,9 +233,29 @@ property_code + date + event_id | `No Impact` | | `Note` | ---- +These classifications support: +- forecast adjustments, +- compression analysis, +- pricing evaluation, +- and demand interpretation. + + + +# Event Relationships to Fact Tables + +Events are joined into operational datasets through standardized dimensional relationships. -## Event Relationships to Fact Tables +## Property-Level Event Relationships + +```text +property_code + date +``` + +## Market-Level Event Relationships + +```text +market + date +``` Recommended reporting join path: @@ -151,12 +265,53 @@ fact_pace_property.date -> dim_event.event_id ``` ---- +This structure keeps event intelligence reusable across: +- pace reporting, +- pricing analysis, +- forecasting, +- pickup analysis, +- and dashboard reporting. + + + +# Pace Table Event Standards + +The Metrics Platform intentionally avoids storing free-form event fields directly inside operational fact tables as the primary event model. + +Legacy fields such as: + +```text +special_events +special_events_ly +``` -## BI Mart Convenience Fields +are replaced by standardized event relationships. + +BI marts may still expose convenience fields such as: + +```text +primary_event +primary_event_category +primary_event_impact +``` + +but the warehouse source of truth remains: + +```text +dim_event +bridge_property_event_date +``` + +This preserves consistency across all downstream reporting layers. + + + +# BI Mart Convenience Fields + +BI marts may flatten event relationships into reporting-friendly fields for dashboard simplicity. | Column | Type | Purpose | -| --- | --- | --- | +|---|---|---| | `primary_event` | STRING | Main event affecting the property/date | | `primary_event_category` | STRING | Category of primary event | | `primary_event_impact` | STRING | Primary event impact level | @@ -165,9 +320,13 @@ fact_pace_property.date | `has_citywide_event` | BOOL | Citywide event impact flag | | `has_property_disruption` | BOOL | Renovation or operational disruption flag | ---- +These fields improve dashboard usability while preserving normalized warehouse relationships underneath. + + -## Operational Use Cases +# Operational Use Cases + +The event framework supports: 1. Demand explanation analysis 2. Forecast variance interpretation @@ -180,12 +339,20 @@ fact_pace_property.date 9. Market demand intelligence 10. Operational commentary workflows -**Because sometimes the answer is not "the forecast was wrong." Sometimes Beyoncé announced a stadium tour.** +**Because sometimes the answer is not “the forecast was wrong.” Sometimes Beyoncé announced a stadium tour.** + ---- -## Warehouse Design Philosophy +# Warehouse Design Philosophy The Metrics Platform treats events as contextual market intelligence rather than operational production metrics. -**The objective is not simply storing event names — the objective is preserving the operational story surrounding hotel performance.** \ No newline at end of file +This structure preserves: +- reusable event relationships, +- historical market context, +- property-specific impact behavior, +- and forecasting continuity. + +**The objective is not simply storing event names the objective is preserving the operational story surrounding hotel performance.** + + diff --git a/metrics-library/forecast-budget-and-pricing-table-models.mdx b/metrics-library/forecast-budget-and-pricing-table-models.mdx new file mode 100644 index 0000000..ce55f7e --- /dev/null +++ b/metrics-library/forecast-budget-and-pricing-table-models.mdx @@ -0,0 +1,332 @@ +# Forecast Budget & Pricing Table Models + +## Metrics Platform + +This document defines the planning, pricing, and writeback table models used throughout the Metrics Platform. + +These models support forecasting, budgeting, pricing intelligence, and user-managed planning workflows across hospitality reporting and revenue operations. + +Unlike operational fact tables, these datasets represent: +- planning inputs, +- observed market pricing, +- user-generated assumptions, +- override logic,**** +- and strategic forecasting scenarios. + +Because of this, they operate independently from pace, actual, pickup, and transactional reporting datasets. + +The structure intentionally separates operational production data from planning and market-observation layers to preserve: +- auditability, +- version control, +- pricing history, +- scenario modeling, +- and forecasting flexibility. + +**The objective is not simply storing values. The objective is preserving planning context.** + + + +# Manual Forecast & Budget Writeback + +## Planning Layer Philosophy + +Forecasting and budgeting data behaves differently than operational reporting data. + +Operational systems record what happened. + +Planning systems model what may happen. + +That distinction matters. + +Manual forecasts, overrides, budgets, and strategic planning assumptions require: +- versioning, +- scenario management, +- workflow tracking, +- approval states, +- and user-level auditability. + +The Metrics Platform models these inputs as a dedicated writeback layer rather than embedding them directly into operational fact tables. + +This structure preserves: +- source-system integrity, +- historical planning continuity, +- and forecasting traceability. + + + +# `fact_manual_plan` + +## Table Purpose + +`fact_manual_plan` stores: +- forecast values, +- budget submissions, +- override logic, +- strategic targets, +- planning scenarios, +- and user-managed forecasting inputs. + +The table is designed to support: +- Google Sheets writeback, +- Apps Script integrations, +- workflow automation, +- API submissions, +- and future Metrics Platform planning interfaces. + + + +## Table Grain + +One row per: + +```text +property_code + date + plan_type + scenario + version + grain_type + grain_key + metric_code +``` + +The table intentionally uses a long-format structure because planning inputs vary significantly across operational and strategic use cases. + +Some forecasts may include: +- rooms, +- revenue, +- ADR, +- OCC, +- demand, +- wash, +- LRV, +- or custom planning assumptions. + +The structure preserves flexibility without forcing rigid schema expansion every time the business evolves. + + + +## Standard Planning Dimensions + +| Field | Purpose | +|---|---| +| `plan_type` | Forecast, budget, override, target, scenario, reforecast | +| `scenario` | Planning scenario grouping | +| `version` | Version tracking (`v001`, `v002`, `final`) | +| `status` | Draft, submitted, approved, locked, archived | +| `grain_type` | Property, segment, source, roomtype, roompool, event | +| `metric_code` | Standardized metric reference | +| `metric_value` | User-submitted planning value | + + + +# Forecasting & Planning Standards + +The planning framework supports multiple forecasting layers simultaneously. + +Examples include: + +| Planning Type | Purpose | +|---|---| +| `Forecast` | Operational forecast values | +| `Budget` | Approved budget values | +| `Reforecast` | Updated operational forecast | +| `Override` | Manual adjustment to modeled forecasts | +| `Target` | Goal-based planning values | +| `Scenario` | Strategic planning simulations | + +This structure allows planning workflows to evolve independently from transactional reporting. + +Because forecasting is rarely static for long. + + + +# Planning Workflow States + +Planning records preserve operational workflow status through: + +```text +Draft +Submitted +Approved +Locked +Archived +``` + +This supports: +- approval tracking, +- forecasting governance, +- scenario comparison, +- and historical planning continuity. + + + +# Planning Views & Reporting Layers + +The Metrics Platform generates reporting-friendly views from the long-format planning structure. + +Examples: + +```text +vw_manual_plan_property_daily +vw_manual_plan_segment_daily +vw_manual_plan_source_daily +vw_manual_plan_roomtype_daily +``` + +These reporting views pivot standardized `metric_code` values into operational reporting structures. + +Examples: + +```text +rms_bgt +rev_bgt +adr_bgt +rms_fct +rev_fct +adr_fct +occ_fct +``` + +This preserves operational reporting usability while maintaining flexible planning architecture underneath. + + + +# Pricing Observation Framework + +## Pricing Data Philosophy + +Pricing observations behave differently than: +- operational production, +- pace reporting, +- or forecasting data. + +Pricing data represents observed market conditions at a specific point in time. + +That distinction matters. + +Observed pricing is influenced by: +- booking channel, +- LOS, +- guest count, +- cancellation rules, +- membership access, +- room type, +- market compression, +- and competitor positioning. + +The Metrics Platform models pricing observations as an independent market-intelligence layer rather than embedding pricing snapshots into operational fact tables. + +This preserves: +- pricing history, +- rate-position analysis, +- compression analysis, +- and competitive pricing behavior. + + + +# `fact_price_shop` + +## Table Purpose + +`fact_price_shop` stores observed market pricing from: +- OTA observations, +- rate shopping tools, +- CRS exports, +- booking engine captures, +- public pricing checks, +- and competitive pricing snapshots. + +The table supports: +- pricing intelligence, +- rate-position analysis, +- competitive pricing analysis, +- and pricing trend evaluation. + + + +## Table Grain + +One row per observed pricing event. + +```text +property_code + shop_date + date + los + guest_count + shop_channel + roomtype_code + rate_plan_code + price_type +``` + +This structure preserves the full pricing context of the observed market condition. + + + +# Standard Pricing Dimensions + +| Field | Purpose | +|---|---| +| `shop_date` | Date the pricing observation occurred | +| `shop_channel` | OTA, Brand.com, CRS, Google Hotels, etc. | +| `price_type` | BAR, Lowest Available, Package, Promo, Member Rate | +| `los` | Length of stay used during pricing observation | +| `guest_count` | Guest occupancy used in pricing request | +| `availability_status` | Available, soldout, unavailable, closed | +| `is_comp` | Competitive property observation flag | + + + +# Pricing Intelligence Standards + +The pricing framework supports: +- BAR analysis, +- competitive pricing comparisons, +- rate-position tracking, +- price-index calculations, +- compression analysis, +- and historical pricing trends. + +Derived pricing analytics include: + +| Metric | Purpose | +|---|---| +| `price_chg_ly` | Price change versus prior year | +| `price_pct_chg_ly` | Price percentage change versus prior year | +| `price_vs_bar` | Variance against BAR | +| `price_vs_comp_avg` | Variance against comp average | +| `price_index` | Relative pricing index | +| `is_price_leader` | Highest-priced competitive position | +| `is_price_lagging` | Below-market pricing position | + + + +# Relationship to Operational Fact Tables + +Planning and pricing datasets integrate with operational reporting layers through standardized dimensional joins. + +## Planning Layer Relationships + +```text +property_code + date + grain_type + grain_key + metric_code +``` + +## Pricing Layer Relationships + +```text +property_code + date +``` + +Additional dimensional joins support: +- roomtype-level pricing, +- roompool analysis, +- event impact evaluation, +- and competitive market positioning. + + + +# Warehouse Design Philosophy + +The Metrics Platform separates: +- operational production, +- forecasting assumptions, +- user-managed planning, +- and observed market pricing + +into distinct but interoperable reporting layers. + +This structure preserves: +- reporting stability, +- pricing history, +- planning auditability, +- forecasting flexibility, +- and market-intelligence continuity. + +**The goal is not simply storing forecasts or pricing observations. The goal is preserving the operational and strategic context behind the numbers.** \ No newline at end of file diff --git a/metrics-library/forecast-budget-pricing.mdx b/metrics-library/forecast-budget-pricing.mdx deleted file mode 100644 index b20919d..0000000 --- a/metrics-library/forecast-budget-pricing.mdx +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: "forecast-budget-pricing.mdx" -description: "Planning, pricing, and writeback table models used throughout the Metrics Platform" ---- - -# Forecast Budget & Pricing Table Models - -## Metrics Platform - -This document defines the planning, pricing, and writeback table models used throughout the Metrics Platform. - -Unlike operational fact tables, these datasets represent planning inputs, observed market pricing, user-generated assumptions, override logic, and strategic forecasting scenarios. - -The structure intentionally separates operational production data from planning and market-observation layers to preserve auditability, version control, pricing history, scenario modeling, and forecasting flexibility. - -**The objective is not simply storing values. The objective is preserving planning context.** - ---- - -## Manual Forecast & Budget Writeback - -### Planning Layer Philosophy - -Operational systems record what happened. Planning systems model what may happen. - -Manual forecasts, overrides, budgets, and strategic planning assumptions require versioning, scenario management, workflow tracking, approval states, and user-level auditability. - -The Metrics Platform models these inputs as a dedicated writeback layer rather than embedding them directly into operational fact tables. - ---- - -## `fact_manual_plan` - -### Table Purpose - -`fact_manual_plan` stores forecast values, budget submissions, override logic, strategic targets, planning scenarios, and user-managed forecasting inputs. - -The table is designed to support: - -- Google Sheets writeback, -- Apps Script integrations, -- workflow automation, -- API submissions, -- and future Metrics Platform planning interfaces. - -### Table Grain - -One row per: - -```text -property_code + date + plan_type + scenario + version + grain_type + grain_key + metric_code -``` - -### Standard Planning Dimensions - -| Field | Purpose | -| --- | --- | -| `plan_type` | Forecast, budget, override, target, scenario, reforecast | -| `scenario` | Planning scenario grouping | -| `version` | Version tracking (`v001`, `v002`, `final`) | -| `status` | Draft, submitted, approved, locked, archived | -| `grain_type` | Property, segment, source, roomtype, roompool, event | -| `metric_code` | Standardized metric reference | -| `metric_value` | User-submitted planning value | - ---- - -## Forecasting & Planning Standards - -| Planning Type | Purpose | -| --- | --- | -| `Forecast` | Operational forecast values | -| `Budget` | Approved budget values | -| `Reforecast` | Updated operational forecast | -| `Override` | Manual adjustment to modeled forecasts | -| `Target` | Goal-based planning values | -| `Scenario` | Strategic planning simulations | - ---- - -## Planning Workflow States - -```text -Draft -Submitted -Approved -Locked -Archived -``` - ---- - -## Planning Views & Reporting Layers - -The Metrics Platform generates reporting-friendly views from the long-format planning structure. - -```text -vw_manual_plan_property_daily -vw_manual_plan_segment_daily -vw_manual_plan_source_daily -vw_manual_plan_roomtype_daily -``` - -These reporting views pivot standardized `metric_code` values into operational reporting structures: - -```text -rms_bgt -rev_bgt -adr_bgt -rms_fct -rev_fct -adr_fct -occ_fct -``` - ---- - -## Pricing Observation Framework - -### Pricing Data Philosophy - -Pricing data represents observed market conditions at a specific point in time. - -Observed pricing is influenced by booking channel, LOS, guest count, cancellation rules, membership access, room type, market compression, and competitor positioning. - -The Metrics Platform models pricing observations as an independent market-intelligence layer. - ---- - -## `fact_price_shop` - -### Table Purpose - -`fact_price_shop` stores observed market pricing from OTA observations, rate shopping tools, CRS exports, booking engine captures, public pricing checks, and competitive pricing snapshots. - -### Table Grain - -```text -property_code + shop_date + date + los + guest_count + shop_channel + roomtype_code + rate_plan_code + price_type -``` - -### Standard Pricing Dimensions - -| Field | Purpose | -| --- | --- | -| `shop_date` | Date the pricing observation occurred | -| `shop_channel` | OTA, Brand.com, CRS, Google Hotels, etc. | -| `price_type` | BAR, Lowest Available, Package, Promo, Member Rate | -| `los` | Length of stay used during pricing observation | -| `guest_count` | Guest occupancy used in pricing request | -| `availability_status` | Available, soldout, unavailable, closed | -| `is_comp` | Competitive property observation flag | - ---- - -## Pricing Intelligence Standards - -| Metric | Purpose | -| --- | --- | -| `price_chg_ly` | Price change versus prior year | -| `price_pct_chg_ly` | Price percentage change versus prior year | -| `price_vs_bar` | Variance against BAR | -| `price_vs_comp_avg` | Variance against comp average | -| `price_index` | Relative pricing index | -| `is_price_leader` | Highest-priced competitive position | -| `is_price_lagging` | Below-market pricing position | - ---- - -## Warehouse Design Philosophy - -The Metrics Platform separates operational production, forecasting assumptions, user-managed planning, and observed market pricing into distinct but interoperable reporting layers. - -**The goal is not simply storing forecasts or pricing observations. The goal is preserving the operational and strategic context behind the numbers.** \ No newline at end of file diff --git a/metrics-library/index.mdx b/metrics-library/index.mdx index a1da910..23f8eda 100644 --- a/metrics-library/index.mdx +++ b/metrics-library/index.mdx @@ -1,13 +1,14 @@ --- -title: "Metrics Library" -description: "REVREBEL's internal business intelligence and analytics platform" +title: Metrics Library +layout: home +nav_order: 1 --- # Metrics Library ## Metrics Platform -Metrics is REVREBEL's internal business intelligence and analytics platform designed to centralize, standardize, and analyze hospitality data across operational systems. +Metrics is REVREBEL’s internal business intelligence and analytics platform designed to centralize, standardize, and analyze hospitality data across operational systems. The platform exists because hotel data is rarely clean, consistent, or organized the same way twice. @@ -17,67 +18,48 @@ The objective is not simply consolidating data, the objective is creating a reli Metrics supports: -- revenue strategy, -- forecasting, -- pace analysis, -- pickup reporting, -- pricing intelligence, -- distribution analysis, -- benchmarking, -- and operational performance tracking. +* revenue strategy, +* forecasting, +* pace analysis, +* pickup reporting, +* pricing intelligence, +* distribution analysis, +* benchmarking, +* and operational performance tracking. The platform is intentionally structured around how hospitality reporting behaves operationally — including snapshot-based pace reporting, comp-set benchmarking, forecast versioning, event overlays, and multi-source reconciliation workflows. -**Because hospitality reporting is complicated enough already. The infrastructure underneath it shouldn't make it worse.** +**Because hospitality reporting is complicated enough already. The infrastructure underneath it shouldn’t make it worse.** + ---- -## Documentation Standards +# Documentation Standards This documentation library defines the canonical standards used throughout the Metrics Platform, including: -- warehouse architecture, -- naming conventions, -- table structures, -- dimensional modeling, -- benchmark processing, -- event intelligence, -- planning frameworks, -- and semantic reporting standards. +* warehouse architecture, +* naming conventions, +* table structures, +* dimensional modeling, +* benchmark processing, +* event intelligence, +* planning frameworks, +* and semantic reporting standards. These standards exist to preserve: -- reporting consistency, -- benchmark integrity, -- operational traceability, -- and downstream analytical stability across the platform. - ---- - -## Standards - - - - Canonical naming system used across the warehouse - +* reporting consistency, +* benchmark integrity, +* operational traceability, +* and downstream analytical stability across the platform. - - Standardized table prefixes used throughout the warehouse - - - Canonical table structure used throughout the platform - - - Processing standards for CoStar STAR report benchmarking - +# Standards - - Planning, pricing, and writeback table models - +* [Database Column Naming Standards](./column-naming-standards/) +* [Metrics Table Architecture](./metrics-table-architecture/) +* [CoStar Processing Standards](./costar-processing-standards/) +* [Forecast & Pricing Table Models](./forecast-pricing-models/) +* [Events Table Model](./events-table-model/) - - Event modeling framework for the Metrics Platform - - \ No newline at end of file diff --git a/metrics-library/revrebel-bi-table-architecture.mdx b/metrics-library/revrebel-bi-table-architecture.mdx new file mode 100644 index 0000000..be23261 --- /dev/null +++ b/metrics-library/revrebel-bi-table-architecture.mdx @@ -0,0 +1,333 @@ + +title: Metrics Table Architecture +nav_order: 3 +has_toc: true +permalink: /metrics-table-architecture/ + + +# Metrics Table Architecture + +## Metrics Platform + +This document defines the canonical table structure used throughout the Metrics Platform. + +The objective is to maintain a stable, predictable reporting framework across hospitality systems that rarely speak the same language. + +PMS exports, CRS reports, RMS feeds, booking engine datasets, spreadsheets, and vendor reports all structure data differently. Some arrive wide. Some arrive sparse. Some arrive held together with merged Excel cells and optimism. + +The Metrics Platform standardizes those inputs into a consistent warehouse architecture designed for: +- operational reporting, +- benchmarking, +- forecasting, +- dashboarding, +- automation workflows, +- and downstream analytics. + +**The goal is not simply storing hotel data, the goal is creating a durable semantic layer where metrics remain interpretable regardless of source system.** + + + +# Architecture Philosophy + +The warehouse structure separates datasets by operational grain rather than forcing all reporting into a single generalized fact table. + +Hospitality reporting behaves differently depending on: +- reporting cadence, +- snapshot behavior, +- dimensional complexity, +- pickup logic, +- and forecasting requirements. + +Because of this, the Metrics Platform intentionally separates: +- pace reporting, +- actual production, +- pickup activity, +- pricing observations, +- benchmark intelligence, +- and planning data + +into independent but interoperable reporting layers. + +This preserves: +- reporting consistency, +- dimensional flexibility, +- historical continuity, +- and operational clarity. + + + +# Core Design Principles + +## Grain-First Architecture + +Fact tables are separated by business grain: +- property, +- segment, +- source/channel, +- and roomtype. + +This structure prevents unnecessary dimensional sparsity while preserving reporting flexibility. + + + +## Wide Pace & Actual Tables + +Core pace and actual tables remain intentionally wide. + +Hospitality reporting frequently relies on: +- same-time-last-year comparisons, +- forecast overlays, +- budget variance, +- pickup analysis, +- and operational KPI calculations. + +Wide structures simplify dashboard development, semantic modeling, and BI tool performance. + + + +## Long Pickup Structures + +Pickup datasets remain long/narrow because pickup windows and comparison periods expand rapidly. + +This structure prevents schema explosion while preserving reporting flexibility across: +- rolling pickup windows, +- comparison periods, +- and dynamic pace analysis. + + + +## Source Metrics vs Derived Metrics + +Fact tables preserve additive and source-provided metrics. + +Derived KPIs such as: +- ADR, +- OCC, +- RevPAR, +- indexes, +- and percentage changes + +are calculated within marts or semantic reporting views when base metrics are available. + +This preserves: +- calculation consistency, +- warehouse flexibility, +- and reporting reproducibility. + + + +## Mapping & Normalization + +Source systems rarely align on naming conventions. + +The Metrics Platform uses mapping layers to normalize: +- segments, +- sources, +- roomtypes, +- channels, +- and raw metric structures. + +This preserves source traceability while maintaining standardized reporting behavior downstream. + + + +# Warehouse Table Structure + +## Core Dimensions + +Core dimensions establish the standardized operational reference layer used throughout the warehouse. + +| Table | Purpose | +|---|---| +| `dim_property` | Standardized hotel and property reference layer | +| `dim_date` | Calendar and reporting date dimension | +| `dim_segment` | Standardized segment reference layer | +| `dim_source` | Standardized booking source and channel layer | +| `dim_roomtype` | Standardized roomtype and room classification layer | +| `dim_metric` | Centralized metric dictionary and calculation framework | +| `dim_source_report` | Source-report metadata and ingestion tracking | + + + +## Mapping Tables + +Mapping tables preserve source-system relationships while standardizing operational reporting structures. + +| Table | Purpose | +|---|---| +| `map_segment` | Maps source segment values to standardized segment structures | +| `map_source` | Maps source/channel values to standardized channel structures | +| `map_roomtype` | Maps source roomtypes to standardized roomtype structures | +| `map_source_metric` | Maps raw report columns to standardized metric structures | + +These layers allow inconsistent vendor exports and operational systems to resolve into a unified reporting framework. + +Because hospitality systems have an almost artistic relationship with inconsistent naming conventions. + + + +# Pace Fact Tables + +Pace tables store snapshot-based on-the-books performance data. + +These datasets preserve booking-position behavior at a specific point in time. + +Pace reporting supports: +- demand analysis, +- forecasting, +- pickup tracking, +- booking velocity, +- wash analysis, +- and operational pacing. + +| Table | Purpose | +|---|---| +| `fact_pace_property` | Property-level pace snapshots | +| `fact_pace_segment` | Segment-level pace snapshots | +| `fact_pace_source` | Source/channel-level pace snapshots | +| `fact_pace_roomtype` | Roomtype-level pace snapshots | + + + +# Actual Fact Tables + +Actual tables store finalized operational production. + +These datasets represent: +- realized room production, +- revenue production, +- occupancy, +- arrivals, +- cancellations, +- no-shows, +- and transactional operational behavior. + +Actual datasets maintain the same dimensional grain as their matching pace tables while removing unnecessary snapshot behavior. + +| Table | Purpose | +|---|---| +| `fact_actual_property` | Property-level actual performance | +| `fact_actual_segment` | Segment-level actual performance | +| `fact_actual_source` | Source/channel-level actual performance | +| `fact_actual_roomtype` | Roomtype-level actual performance | + + + +# Pickup Fact Tables + +Pickup reporting measures booking movement between reporting snapshots. + +Pickup tables intentionally use a long-format structure because: +- pickup windows expand over time, +- comparison periods vary, +- and reporting flexibility matters more than fixed-schema simplicity. + +| Table | Purpose | +|---|---| +| `fact_pickup_property` | Property-level pickup activity | +| `fact_pickup_segment` | Segment-level pickup activity | +| `fact_pickup_source` | Source/channel-level pickup activity | +| `fact_pickup_roomtype` | Roomtype-level pickup activity | + +Pickup tables support: +- booking velocity analysis, +- trend monitoring, +- demand pacing, +- and forecast movement analysis. + + + +# Flexible Metric Observation Layer + +Some hospitality metrics behave inconsistently across systems. + +Rather than forcing every irregular metric into standardized wide tables, the Metrics Platform uses a flexible observation framework. + +| Table | Purpose | +|---|---| +| `fact_metric_observation` | Flexible storage layer for sparse, irregular, or evolving metrics | + +This structure supports: +- experimental metrics, +- vendor-specific reporting, +- one-off operational metrics, +- dynamic benchmarking fields, +- and evolving reporting requirements. + +The warehouse remains structured without becoming rigid. + + + +# BI Marts & Semantic Views + +BI marts provide dashboard-ready reporting layers derived from operational fact tables. + +These views standardize: +- KPI calculations, +- comparison logic, +- reporting relationships, +- and dashboard consumption. + +| View | Purpose | +|---|---| +| `mart_property_daily` | Property-level daily reporting mart | +| `mart_segment_daily` | Segment-level daily reporting mart | +| `mart_source_daily` | Source/channel daily reporting mart | +| `mart_roomtype_daily` | Roomtype-level daily reporting mart | + +These marts calculate: +- ADR, +- OCC, +- RevPAR, +- variance metrics, +- pace comparisons, +- and percentage-change calculations. + +The objective is creating stable reporting logic once — instead of rebuilding KPI calculations inside every dashboard. + + + +# Shared Warehouse Standards + +All warehouse objects follow: +- standardized lowercase `snake_case`, +- approved metric abbreviations, +- standardized dimensional relationships, +- and canonical semantic naming structures. + +Reference: +- Database Column Naming Standards +- CoStar Processing Standards +- Forecast & Pricing Table Models + + + +# Warehouse Design Philosophy + +The Metrics Platform warehouse is intentionally structured around how hospitality reporting behaves operationally. + +Different datasets: +- update differently, +- snapshot differently, +- aggregate differently, +- and support different business questions. + +The warehouse architecture preserves those distinctions rather than flattening everything into a single reporting layer. + +The objective is not simply organizing tables. + +The objective is creating a reporting framework where: +- metrics remain interpretable, +- dashboards remain stable, +- benchmark logic remains trustworthy, +- and operational decisions remain grounded in consistent data behavior. + +Because clean architecture matters. + +Especially when the source file arrived named: + +```text +final_v2_actual_USE_THIS_latest(3).xlsx +``` + + diff --git a/metrics-library/roomtype-model.mdx b/metrics-library/roomtype-model.mdx index 22a7631..e7eecdb 100644 --- a/metrics-library/roomtype-model.mdx +++ b/metrics-library/roomtype-model.mdx @@ -1,28 +1,30 @@ --- -title: "roomtype-model.mdx" -description: "Standardized roomtype modeling framework for the Metrics Platform" +title: Roomtype Model +nav_order: 9 +has_toc: true +permalink: /roomtype-model/ --- # Roomtype Model -Roomtype data looks simple until every PMS, RMS, CRS, and booking engine decides to describe the same physical room in a slightly different way. One system sees a room code. Another sees a sellable room name. Another groups rooms into pools. Another adds bed type, class, feature, and category in one free-text label and calls it a day. +Roomtype data looks simple until every PMS, RMS, CRS, and booking engine decides to describe the same physical room in a slightly different way. One system sees a room code. Another sees a sellable room name. Another groups rooms into pools. Another adds bed type, class, feature, and category in one free-text label and calls it a day. Very generous of them. -The Metrics roomtype model separates those concepts into controlled building blocks. Room category, room class, bed type, and room feature stay independent so they can be reused in different combinations across properties and systems. +The Metrics roomtype model separates those concepts into controlled building blocks. Room category, room class, bed type, and room feature stay independent so they can be reused in different combinations across properties and systems. The standardized roomtype is then generated from those mapped attributes. ---- +This keeps source-system messiness in the mapping layer and gives Metrics a stable roomtype structure for pace, actuals, pricing, demand, availability, room pools, and reporting views. ## How We View Roomtype Data Roomtype is not just a label. It is a structured commercial object. -The model starts with independent lookup tables, maps source room values into standard attributes, and publishes `dim_roomtype` as the production reference used by ingestion and reporting. +A roomtype tells us what can be sold, how it should be grouped, how it behaves in pricing and demand analysis, and how it rolls into room pools or room classes. A standard roomtype needs enough structure to support rate strategy, inventory analysis, room mix reporting, price sensitivity, and demand calculations without forcing every property into the same naming pattern. ---- +The model starts with independent lookup tables, maps source room values into standard attributes, and publishes `dim_roomtype` as the production reference used by ingestion and reporting. ## Table Family | Table | Purpose | -| --- | --- | +|---|---| | `lkp_roomcategory` | Controlled list of broad room categories, such as Room or Suite. | | `lkp_roomclass` | Controlled list of commercial room class levels, such as Standard, Upgrade, or Best. | | `lkp_bedtype` | Controlled list of bed types, such as King, Queen, or Double. | @@ -37,12 +39,21 @@ The model starts with independent lookup tables, maps source room values into st ## Lookup Tables -Lookup tables define the controlled vocabulary. They should stay small, stable, and independent. +Lookup tables define the controlled vocabulary. They should stay small, stable, and independent. The table name provides the context, so the columns can remain simple: `code`, `name`, and `description`. ### Shared Lookup Columns +Applies to: + +```text +lkp_roomcategory +lkp_roomclass +lkp_bedtype +lkp_roomfeature +``` + | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `code` | STRING | Standard lookup code. | | `name` | STRING | Display name. | | `description` | STRING | Definition or usage notes. | @@ -51,36 +62,38 @@ Lookup tables define the controlled vocabulary. They should stay small, stable, | `insert_date` | DATE | Insert date. | | `updated_date` | DATE | Updated date. | -### `lkp_roomcategory` +### Lookup Examples + +#### `lkp_roomcategory` | code | name | description | -| --- | --- | --- | +|---|---|---| | `ROOM` | Room | Standard hotel room or guestroom product. | | `SUITE` | Suite | Larger or separated room product generally sold at a premium. | | `VILLA` | Villa | Villa, residence, or standalone accommodation product. | | `OTHER` | Other | Used when the product does not fit an existing category. | -### `lkp_roomclass` +#### `lkp_roomclass` | code | name | description | -| --- | --- | --- | +|---|---|---| | `STD` | Standard | Base or entry room class. | | `UPGRADE` | Upgrade | Enhanced room class above base. | | `BEST` | Best | Highest commercial room class or premium sellable tier. | -### `lkp_bedtype` +#### `lkp_bedtype` | code | name | description | -| --- | --- | --- | +|---|---|---| | `KG` | King | King bed configuration. | | `QN` | Queen | Queen bed configuration. | | `DB` | Double | Double bed configuration. | | `TW` | Twin | Twin bed configuration. | -### `lkp_roomfeature` +#### `lkp_roomfeature` | code | name | description | -| --- | --- | --- | +|---|---|---| | `VIEW` | View | Room has a meaningful view premium. | | `BALC` | Balcony | Room includes a balcony or terrace. | | `ADA` | Accessible | Accessible room configuration. | @@ -91,7 +104,11 @@ Lookup tables define the controlled vocabulary. They should stay small, stable, ## `map_roomtype` -`map_roomtype` is the source-to-standard translation layer. +`map_roomtype` is the source-to-standard translation layer. It stores the roomtype value as it appears in a source system and maps it into the standard room attributes used to create `dim_roomtype`. + +The mapping is universal by property and system. It is not tied to a specific ingestion report because the same source roomtype can appear across multiple files, tabs, exports, and workflows. The report that delivered the value belongs in ingestion/file metadata, not in the mapping table. + +The source fields in this table use plain names because the table itself provides the context. Inside `map_roomtype`, `code`, `name`, and `description` mean the mapped source roomtype code, name, and description. ### Grain @@ -99,75 +116,158 @@ Lookup tables define the controlled vocabulary. They should stay small, stable, property_code + system + code ``` +If a source system does not provide a reliable code, the effective grain may use: + +```text +property_code + system + name +``` + ### Columns | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property code. | | `system` | STRING | Source system providing the roomtype value. | | `code` | STRING | Source-system roomtype code. | | `name` | STRING | Source-system roomtype name or label. | -| `roomcategory_code` | STRING | Standard room category code. | -| `roomclass_code` | STRING | Standard room class code. | -| `bedtype_code` | STRING | Standard bed type code. | -| `roomfeature_code` | STRING | Standard primary room feature code. | -| `roompool` | STRING | Optional roompool grouping label. | -| `roomtype_code` | STRING | Generated standard roomtype code. | +| `description` | STRING | Source-system roomtype description, if supplied. | +| `no_beds` | INT64 | Number of beds represented by the mapped roomtype. | +| `roomcategory_code` | STRING | Standard room category code. Joins to `lkp_roomcategory.code`. | +| `roomclass_code` | STRING | Standard room class code. Joins to `lkp_roomclass.code`. | +| `bedtype_code` | STRING | Standard bed type code. Joins to `lkp_bedtype.code`. | +| `roomfeature_code` | STRING | Standard primary room feature code. Joins to `lkp_roomfeature.code`. | +| `roompool` | STRING | Optional roompool grouping label. Used to build `dim_roompool`. | +| `roomtype_code` | STRING | Generated standard roomtype code. This becomes `dim_roomtype.code`. | | `is_active` | BOOL | Indicates whether the mapping is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +### Working Field Alignment + +| Working Field | Standard Field | Notes | +|---|---|---| +| `code` | `code` | Source-system roomtype code. | +| `name` | `name` | Source-system roomtype name. | +| `description` | `description` | Source-system roomtype description. | +| `no_rooms` | `no_beds` | Rename if the value represents bed count. Use `no_rooms` only if it truly means room count. | +| `roomclass_code` | `roomclass_code` | Standard room class. | +| `bedtype_code` | `bedtype_code` | Standard bed type. | +| `roomfeature_code` | `roomfeature_code` | Standard room feature. | +| `roomcategory` | `roomcategory_code` | Store the code, not the label. | +| `roompool` | `roompool` | Used to group roomtypes into room pools. | --- ## `dim_roomtype` -`dim_roomtype` is the production roomtype reference table. It is generated from `map_roomtype`. +`dim_roomtype` is the production roomtype reference table. It is generated from `map_roomtype`, but it should not carry every source field from the map table. + +The dimension only needs the single mapped source lookup value required during processing: + +```text +map_roomtype_code +``` + +That value connects incoming cleaned data back to the source roomtype code that was mapped, while keeping the dimension focused on the standardized roomtype attributes. This avoids the three-hop lookup problem without turning the dimension into a duplicate copy of the mapping table. ### Processing Logic +Incoming source files can resolve roomtypes directly through `dim_roomtype.map_roomtype_code`: + ```sql LEFT JOIN metrics_core.dim_roomtype rt ON incoming.property_code = rt.property_code AND incoming.roomtype_code = rt.map_roomtype_code ``` +Once resolved, the pipeline writes the standard code to the target table: + +```text +incoming source roomtype code → dim_roomtype.map_roomtype_code → dim_roomtype.code → target.roomtype_code +``` + +The full source context remains in `map_roomtype`: + +```text +map_roomtype.system +map_roomtype.code +map_roomtype.name +map_roomtype.description +``` + ### Code Generation -The standard roomtype code is deterministic: +The standard roomtype code is deterministic. It is built from the commercial components that define the roomtype: -```sql -CONCAT( - CAST(no_beds AS STRING), - bedtype_code, - roomclass_code, - roomfeature_code, - roomcategory_code -) AS code +```text +CONCAT(no_beds, bedtype_code, roomclass_code, roomfeature_code, roomcategory_code) ``` +If `roomfeature_code` is null or not meaningful, use `NONE` or omit it only if the code-generation standard explicitly allows that. The important part is consistency. The system should not generate three versions of the same room because one source forgot to say “none.” + ### Code Examples -| no\_beds | bedtype\_code | roomclass\_code | roomfeature\_code | roomcategory\_code | Generated code | -| --: | --- | --- | --- | --- | --- | +| no_beds | bedtype_code | roomclass_code | roomfeature_code | roomcategory_code | Generated roomtype code | +|---:|---|---|---|---|---| | 1 | `KG` | `STD` | `NONE` | `SUITE` | `1KGSTDNONESUITE` | | 2 | `QN` | `UPGRADE` | `NONE` | `ROOM` | `2QNUPGRADENONEROOM` | | 1 | `KG` | `BEST` | `VIEW` | `ROOM` | `1KGBESTVIEWROOM` | +If the library decides to omit `NONE` from generated codes, the same examples become: + +| no_beds | bedtype_code | roomclass_code | roomfeature_code | roomcategory_code | Generated roomtype code | +|---:|---|---|---|---|---| +| 1 | `KG` | `STD` | `NONE` | `SUITE` | `1KGSTDSUITE` | +| 2 | `QN` | `UPGRADE` | `NONE` | `ROOM` | `2QNUPGRADEROOM` | + +The shorter format is easier to read. The stricter format is easier to audit. Pick one and make it boringly consistent. + ### Columns | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property code. | -| `map_roomtype_code` | STRING | Source roomtype code from `map_roomtype.code`. | -| `code` | STRING | Standard roomtype code. | +| `map_roomtype_code` | STRING | Source roomtype code from `map_roomtype.code`. Used to resolve incoming source data. | +| `code` | STRING | Standard roomtype code generated from mapped attributes. This is the value written to target tables as `roomtype_code`. | | `name` | STRING | Standard roomtype display name. | +| `description` | STRING | Standard roomtype description. | | `no_beds` | INT64 | Number of beds represented by the roomtype. | | `roomcategory_code` | STRING | Standard room category code. | | `roomclass_code` | STRING | Standard room class code. | | `bedtype_code` | STRING | Standard bed type code. | | `roomfeature_code` | STRING | Standard primary room feature code. | | `roompool` | STRING | Optional roompool grouping label. | -| `available_rms` | INT64 | Room inventory for the roomtype. | +| `available_rms` | INT64 | Room inventory for the roomtype, when stable and known. | | `sort` | INT64 | Display order for reporting. | | `is_active` | BOOL | Indicates whether the roomtype is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +### Build Logic + +The standard build logic is: + +```sql +CONCAT( + CAST(no_beds AS STRING), + bedtype_code, + roomclass_code, + roomfeature_code, + roomcategory_code +) AS code +``` + +If the approved standard omits `NONE` room features: + +```sql +CONCAT( + CAST(no_beds AS STRING), + bedtype_code, + roomclass_code, + IF(roomfeature_code = 'NONE', '', roomfeature_code), + roomcategory_code +) AS code +``` --- @@ -175,33 +275,145 @@ CONCAT( Room pools group related roomtypes that can be treated together for pricing, demand, inventory, or operational analysis. +A room pool can represent interchangeable roomtypes, a strategic grouping, or a source-system room pool. The roompool model should preserve which roomtypes are included, but it should not replace the roomtype dimension. Room pools are groupings. Roomtypes are the sellable product. + ### Columns | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property code. | -| `code` | STRING | Standard roompool code. | +| `code` | STRING | Standard roompool code. Built from the included roomtype codes. | | `name` | STRING | Roompool display name. | -| `related_roomtypes` | STRING | Comma-separated list of roomtype codes in the pool. | +| `description` | STRING | Roompool description or usage notes. | +| `related_roomtypes` | STRING | Comma-separated list of roomtype codes included in the pool. | | `roompool` | STRING | Source or mapped roompool grouping label. | | `sort` | INT64 | Display order for reporting. | | `is_active` | BOOL | Indicates whether the roompool is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | ### Build Logic +From the working notes: + +```text +map_roomtype.roompool = group 1, group 2, group 3 +``` + +Each roompool can be built from the mapped roomtype codes included in that group. + +Roompool code: + +```text +CONCAT(roomtype_code + roomtype_code + roomtype_code) +``` + +Related roomtypes: + +```text +roomtype_code, roomtype_code, roomtype_code +``` + +In BigQuery, the roompool code should be generated from sorted roomtype codes so the same pool does not get different codes because rows arrived in a different order. Order should not be allowed to create chaos. We have enough of that from source systems. + +Example: + ```sql STRING_AGG(roomtype_code, '' ORDER BY roomtype_code) AS code, STRING_AGG(roomtype_code, ',' ORDER BY roomtype_code) AS related_roomtypes ``` +Example output: + +| roompool | Included roomtype codes | dim_roompool.code | dim_roompool.related_roomtypes | +|---|---|---|---| +| `group 1` | `1KGSTDSUITE`, `2QNUPGRADEROOM` | `1KGSTDSUITE2QNUPGRADEROOM` | `1KGSTDSUITE,2QNUPGRADEROOM` | +| `group 2` | `1KGBESTVIEWROOM`, `2QNUPGRADEROOM` | `1KGBESTVIEWROOM2QNUPGRADEROOM` | `1KGBESTVIEWROOM,2QNUPGRADEROOM` | + --- +## Enriched Views + +Views expose sort fields so BI tools can display roomtypes, room pools, and lookup labels in the intended operating order instead of alphabetizing everything into polite nonsense. + +### `vw_roomtype` + +`vw_roomtype` joins the standard roomtype dimension to lookup labels and descriptions. + +| Column | Source | Definition | +|---|---|---| +| `property_code` | `dim_roomtype` | Property code. | +| `map_roomtype_code` | `dim_roomtype` | Source roomtype code used for ingestion lookup. | +| `roomtype_code` | `dim_roomtype.code` | Standard roomtype code. | +| `roomtype` | `dim_roomtype.name` | Standard roomtype name. | +| `roomtype_description` | `dim_roomtype.description` | Standard roomtype description. | +| `no_beds` | `dim_roomtype.no_beds` | Number of beds. | +| `roomcategory_code` | `dim_roomtype.roomcategory_code` | Standard room category code. | +| `roomcategory` | `lkp_roomcategory.name` | Standard room category name. | +| `roomcategory_description` | `lkp_roomcategory.description` | Room category description. | +| `roomclass_code` | `dim_roomtype.roomclass_code` | Standard room class code. | +| `roomclass` | `lkp_roomclass.name` | Standard room class name. | +| `roomclass_description` | `lkp_roomclass.description` | Room class description. | +| `bedtype_code` | `dim_roomtype.bedtype_code` | Standard bed type code. | +| `bedtype` | `lkp_bedtype.name` | Standard bed type name. | +| `bedtype_description` | `lkp_bedtype.description` | Bed type description. | +| `roomfeature_code` | `dim_roomtype.roomfeature_code` | Standard room feature code. | +| `roomfeature` | `lkp_roomfeature.name` | Standard room feature name. | +| `roomfeature_description` | `lkp_roomfeature.description` | Room feature description. | +| `roompool` | `dim_roomtype.roompool` | Roompool grouping label. | +| `available_rms` | `dim_roomtype.available_rms` | Roomtype inventory. | +| `sort` | `dim_roomtype.sort` | Display order for BI/reporting. | +| `is_active` | `dim_roomtype.is_active` | Active flag. | + +### `vw_roompool` + +`vw_roompool` exposes the roompool groupings in a readable format. + +| Column | Source | Definition | +|---|---|---| +| `property_code` | `dim_roompool` | Property code. | +| `roompool_code` | `dim_roompool.code` | Standard roompool code. | +| `roompool` | `dim_roompool.name` | Roompool name. | +| `roompool_description` | `dim_roompool.description` | Roompool description. | +| `related_roomtypes` | `dim_roompool.related_roomtypes` | Comma-separated roomtype codes in the pool. | +| `sort` | `dim_roompool.sort` | Display order for BI/reporting. | +| `is_active` | `dim_roompool.is_active` | Active flag. | + +--- + +## Relationship to Pace, Actuals, Pricing, and Demand + +Roomtype-level source files should resolve against `dim_roomtype` during processing. The target metric table should store the standard roomtype code, not every descriptive attribute. + +For example: + +```text +source file roomtype code + → dim_roomtype.map_roomtype_code + → dim_roomtype.code + → snap_pace_roomtype.roomtype_code +``` + +Target tables then use: + +```text +snap_pace_roomtype.roomtype_code +fact_actual_roomtype.roomtype_code +fact_price_shop.roomtype_code +snap_demand_roomtype.roomtype_code +``` + +The reporting layer can join to `vw_roomtype` to bring in room category, room class, bed type, feature labels, sort order, and the mapped roomtype lookup code. + ## Operating Notes 1. `lkp_` tables define controlled values. They do not know anything about a specific property. 2. `map_roomtype` translates and governs source-system room values before they are published into the production dimension. -3. `dim_roomtype` carries `map_roomtype_code` as the single lookup back to the mapped source roomtype code. -4. `dim_roompool` groups roomtype codes into strategic or interchangeable room pools. -5. Use `roomcategory_code`, not `roomcategory`, in modeled tables. -6. Use `no_beds`, not `no_rooms`, when the value means bed count. -7. Generated codes should be deterministic. Same attributes in, same code out. Always. \ No newline at end of file +3. `map_roomtype` uses `system`, `code`, `name`, and `description`; it does not use the `source_` prefix because the table context already makes those fields source-oriented. +4. Mapping tables do not include `report`; source reports belong in ingestion metadata because the same mapping may support multiple ingestion files. +5. `dim_roomtype` carries `map_roomtype_code` as the single lookup back to the mapped source roomtype code. +6. `dim_roompool` groups roomtype codes into strategic or interchangeable room pools. +7. `vw_roomtype` and `vw_roompool` expose sort fields so reporting tools display values in the intended order. +8. Use `roomcategory_code`, not `roomcategory`, in modeled tables. Labels belong in lookup joins and views. +9. Use `no_beds`, not `no_rooms`, when the value means bed count. +10. Generated codes should be deterministic. Same attributes in, same code out. Always. diff --git a/metrics-library/segment-model.mdx b/metrics-library/segment-model.mdx index 0ac85a0..7ce7d3b 100644 --- a/metrics-library/segment-model.mdx +++ b/metrics-library/segment-model.mdx @@ -1,30 +1,28 @@ --- -title: "segment-model.mdx" -description: "Standardized segment modeling framework for commercial and finance reporting" +title: Segment Model +nav_order: 10 +has_toc: true +permalink: /segment-model/ --- # Segment Model -Segment data is one of the first places hotel reporting starts to drift. PMS, CRS, RMS, finance, and BI tools all use segment logic, but they rarely agree on names, grouping, codes, or ledger treatment. +Segment data is one of the first places hotel reporting starts to drift. PMS, CRS, RMS, finance, and BI tools all use segment logic, but they rarely agree on names, grouping, codes, or ledger treatment. Delightful. The Metrics segment model separates source-system segment values from the standardized commercial and finance structure. Source values are mapped once, then published into a production segment dimension that can be used consistently by pace, actuals, forecast, budget, pickup, and finance reporting. ---- - ## How We View Segment Data Segment is both a commercial lens and a financial reporting bridge. -Commercial teams need segment logic to understand demand behavior, booking mix, pricing strategy, and pace. Finance needs segment logic to align production to ledger treatment, budget structure, and reporting rollups. +Commercial teams need segment logic to understand demand behavior, booking mix, pricing strategy, and pace. Finance needs segment logic to align production to ledger treatment, budget structure, and reporting rollups. Those needs overlap, but they are not always identical. -The model keeps segment group, segment, and finance segment as separate controlled structures. - ---- +The model keeps segment group, segment, and finance segment as separate controlled structures. That gives Metrics enough flexibility to support revenue strategy without breaking finance alignment or forcing every source system into one flat label. ## Table Family | Table | Purpose | -| --- | --- | +|---|---| | `lkp_segment_group` | Controlled list of high-level commercial segment groups. | | `lkp_segment` | Controlled list of standard commercial segments. Each segment belongs to a segment group. | | `lkp_finance_segment` | Controlled list of finance reporting segments. | @@ -34,12 +32,30 @@ The model keeps segment group, segment, and finance segment as separate controll --- +## Lookup Tables + +Lookup tables define controlled values. They should stay small, readable, and stable. The table name provides the context, so the columns can remain simple: `code`, `name`, `description`, and `sort`. + ## `lkp_segment_group` +Segment group defines the highest commercial rollup for standard segment reporting. + +### Columns + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard segment group code. | +| `name` | STRING | Segment group display name. | +| `description` | STRING | Definition or usage notes for the segment group. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the segment group is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + ### Example Values | code | name | description | sort | -| --- | --- | --- | --: | +|---|---|---|---:| | `TRANSIENT` | Transient | Individual transient demand not contracted as group or long-term business. | 10 | | `GROUP` | Group | Group blocks, meetings, events, and contracted group production. | 20 | | `CONTRACT` | Contract | Contracted or negotiated long-term production. | 30 | @@ -49,28 +65,57 @@ The model keeps segment group, segment, and finance segment as separate controll ## `lkp_segment` +Segment defines the standard commercial segment used by Metrics. + Each segment belongs to a segment group through `segment_group_code`. +### Columns + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard segment code. | +| `name` | STRING | Segment display name. | +| `description` | STRING | Definition or usage notes for the segment. | +| `sort` | INT64 | Display order for reporting. | +| `segment_group_code` | STRING | Standard segment group code. Joins to `lkp_segment_group.code`. | +| `is_active` | BOOL | Indicates whether the segment is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + ### Example Values -| code | name | description | sort | segment\_group\_code | -| --- | --- | --- | --: | --- | +| code | name | description | sort | segment_group_code | +|---|---|---|---:|---| | `BAR` | BAR / Retail | Best available or retail transient production. | 10 | `TRANSIENT` | | `DISCOUNT` | Discount | Discounted transient production. | 20 | `TRANSIENT` | | `NEGOTIATED` | Negotiated | Negotiated transient or corporate production. | 30 | `TRANSIENT` | | `GROUP` | Group | Standard group production. | 10 | `GROUP` | -| `WHOLESALE` | Wholesale | Wholesale, tour, or net-rated production. | 40 | `CONTRACT` | +| `WHOLESALE` | Wholesale | Wholesale, tour, or net-rated production where applicable. | 40 | `CONTRACT` | --- ## `lkp_finance_segment` -Finance segment is intentionally separate from commercial segment. When they do not align, pretending they are the same creates reporting debt. And reporting debt always collects interest. +Finance segment defines the segment structure used for finance, accounting, ledger reporting, or budget alignment. + +This is intentionally separate from `lkp_segment`. Commercial segment and finance segment often align, but when they do not, pretending they are the same usually creates reporting debt. And reporting debt always collects interest. + +### Columns + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard finance segment code. | +| `name` | STRING | Finance segment display name. | +| `description` | STRING | Definition or usage notes for the finance segment. | +| `sort` | INT64 | Display order for finance reporting. | +| `is_active` | BOOL | Indicates whether the finance segment is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | ### Example Values | code | name | description | sort | -| --- | --- | --- | --: | +|---|---|---|---:| | `TRANSIENT` | Transient | Finance transient segment rollup. | 10 | | `GROUP` | Group | Finance group segment rollup. | 20 | | `CONTRACT` | Contract | Finance contract segment rollup. | 30 | @@ -80,7 +125,9 @@ Finance segment is intentionally separate from commercial segment. When they do ## `map_segment` -`map_segment` is the source-to-standard translation layer. +`map_segment` is the source-to-standard translation layer. It stores the segment value as it appears in a source system and maps it into the standard commercial and finance segment structures. + +The source fields use plain names because the table provides the context. Inside `map_segment`, `system`, `code`, `name`, and `description` refer to the source-system segment values. ### Grain @@ -88,47 +135,101 @@ Finance segment is intentionally separate from commercial segment. When they do property_code + system + code ``` +If a source system does not provide a reliable segment code, the effective grain may use: + +```text +property_code + system + name +``` + ### Columns | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property code. | | `system` | STRING | Source system providing the segment value. | | `code` | STRING | Source-system segment code. | | `name` | STRING | Source-system segment name or label. | -| `segment_code` | STRING | Standard commercial segment code. | -| `finance_segment_code` | STRING | Standard finance segment code. | -| `gl_code` | STRING | General ledger or guest ledger code associated with the mapped segment. | +| `description` | STRING | Source-system segment description, if supplied. | +| `segment_code` | STRING | Standard commercial segment code. Joins to `lkp_segment.code`. | +| `finance_segment_code` | STRING | Standard finance segment code. Joins to `lkp_finance_segment.code`. | +| `gl_code` | STRING | General ledger or guest ledger code associated with the mapped segment, when available. | | `is_active` | BOOL | Indicates whether the mapping is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +### Working Field Alignment + +| Working Field | Standard Field | Notes | +|---|---|---| +| `property_code` | `property_code` | Property code. | +| `system` | `system` | Source system. | +| `code` | `code` | Source-system segment code. | +| `name` | `name` | Source-system segment name. | +| `description` | `description` | Source-system segment description. | +| `segment_code` | `segment_code` | Standard commercial segment code. | +| `finance_segment_code` | `finance_segment_code` | Standard finance segment code. | +| `gl_code` | `gl_code` | General ledger or guest ledger code. | +| `is_active` | `is_active` | Active mapping flag. | +| `insert_date` | `insert_date` | Insert date. | +| `updated_date` | `updated_date` | Updated date. | --- ## `dim_segment` -`dim_segment` is the production segment reference table generated from `map_segment`. +`dim_segment` is the production segment reference table. It is generated from `map_segment` and the segment lookup tables. + +The dimension carries the single mapped source lookup value needed during processing: + +```text +map_segment_code +``` + +That value connects incoming cleaned data back to the source segment code that was mapped, while keeping the dimension focused on standardized commercial and finance structure. ### Processing Logic +Incoming source files can resolve segments directly through `dim_segment.map_segment_code`: + ```sql LEFT JOIN metrics_core.dim_segment s ON incoming.property_code = s.property_code AND incoming.segment_code = s.map_segment_code ``` +Once resolved, the pipeline writes the standard commercial segment code to the target table: + +```text +incoming source segment code → dim_segment.map_segment_code → dim_segment.segment_code → target.segment_code +``` + +The full source context remains in `map_segment`: + +```text +map_segment.system +map_segment.code +map_segment.name +map_segment.description +``` + ### Columns | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property code. | -| `map_segment_code` | STRING | Source segment code from `map_segment.code`. | -| `segment_code` | STRING | Standard commercial segment code. | -| `segment_group_code` | STRING | Standard segment group code. | -| `finance_segment_code` | STRING | Standard finance segment code. | -| `gl_code` | STRING | General ledger or guest ledger code. | -| `is_active` | BOOL | Active indicator. | +| `map_segment_code` | STRING | Source segment code from `map_segment.code`. Used to resolve incoming source data. | +| `segment_code` | STRING | Standard commercial segment code. Joins to `lkp_segment.code`. | +| `segment_group_code` | STRING | Standard segment group code. Joins to `lkp_segment_group.code`. Usually inherited from `lkp_segment.segment_group_code`. | +| `finance_segment_code` | STRING | Standard finance segment code. Joins to `lkp_finance_segment.code`. | +| `gl_code` | STRING | General ledger or guest ledger code associated with the segment, when available. | +| `is_active` | BOOL | Indicates whether the segment reference is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | ### Build Logic +The production dimension should inherit `segment_group_code` from `lkp_segment` wherever possible. + ```sql SELECT m.property_code, @@ -137,35 +238,77 @@ SELECT sg.code AS segment_group_code, m.finance_segment_code, m.gl_code, - m.is_active + m.is_active, + CURRENT_DATE() AS insert_date, + CURRENT_DATE() AS updated_date FROM metrics_core.map_segment m -LEFT JOIN metrics_core.lkp_segment s ON m.segment_code = s.code -LEFT JOIN metrics_core.lkp_segment_group sg ON s.segment_group_code = sg.code +LEFT JOIN metrics_core.lkp_segment s + ON m.segment_code = s.code +LEFT JOIN metrics_core.lkp_segment_group sg + ON s.segment_group_code = sg.code ``` +This keeps group assignment controlled by the segment lookup instead of manually repeating the group in every mapped source row. One source row should not get to freelance the segment hierarchy. + --- ## `vw_segment` `vw_segment` joins the production dimension to lookup labels and descriptions. This is the version reporting, QA, and exports should use when users need readable segment names. +### Columns + | Column | Source | Definition | -| --- | --- | --- | +|---|---|---| +| `property_code` | `dim_segment` | Property code. | +| `map_segment_code` | `dim_segment` | Source segment code used for ingestion lookup. | | `segment_code` | `dim_segment` | Standard commercial segment code. | | `segment` | `lkp_segment.name` | Standard commercial segment name. | +| `segment_description` | `lkp_segment.description` | Standard commercial segment description. | | `segment_group_code` | `dim_segment` | Standard segment group code. | | `segment_group` | `lkp_segment_group.name` | Standard segment group name. | +| `segment_group_description` | `lkp_segment_group.description` | Standard segment group description. | | `finance_segment_code` | `dim_segment` | Standard finance segment code. | | `finance_segment` | `lkp_finance_segment.name` | Finance segment name. | +| `finance_segment_description` | `lkp_finance_segment.description` | Finance segment description. | | `gl_code` | `dim_segment` | General ledger or guest ledger code. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | `dim_segment` | Active flag. | --- +## Relationship to Pace, Actuals, Forecast, Budget, and Finance + +Segment-level source files should resolve against `dim_segment` during processing. The target metric table should store the standard segment code and any finance/GL values required by the table’s purpose. + +For example: + +```text +source file segment code + → dim_segment.map_segment_code + → dim_segment.segment_code + → snap_pace_segment.segment_code +``` + +Target tables then use: + +```text +snap_pace_segment.segment_code +fact_actual_segment.segment_code +fact_pickup_segment.segment_code +fact_manual_plan.segment_code +fact_finance_gl.segment_code +``` + +The reporting layer can join to `vw_segment` to bring in segment name, group name, finance segment, and GL context. + ## Operating Notes 1. `lkp_segment_group`, `lkp_segment`, and `lkp_finance_segment` define controlled values. 2. `lkp_segment.segment_group_code` controls the commercial segment hierarchy. -3. `dim_segment` carries `map_segment_code` as the single lookup back to the mapped source segment code. -4. `dim_segment.segment_group_code` should be inherited from `lkp_segment` unless there is a specific, documented exception. -5. Finance segment is separate from commercial segment on purpose. -6. Target metric tables should store `segment_code`; readable names belong in `vw_segment`. \ No newline at end of file +3. `map_segment` translates and governs source-system segment values before they are published into the production dimension. +4. `map_segment` uses `system`, `code`, `name`, and `description`; it does not use the `source_` prefix because the table context already makes those fields source-oriented. +5. `dim_segment` carries `map_segment_code` as the single lookup back to the mapped source segment code. +6. `dim_segment.segment_group_code` should be inherited from `lkp_segment` unless there is a specific, documented exception. +7. Finance segment is separate from commercial segment on purpose. Sometimes finance and revenue strategy agree. Sometimes they have met, briefly, in a hallway. +8. Target metric tables should store `segment_code`; readable names belong in `vw_segment`. diff --git a/metrics-library/source-channel-rate-model.mdx b/metrics-library/source-channel-rate-model.mdx index 13717da..9c735be 100644 --- a/metrics-library/source-channel-rate-model.mdx +++ b/metrics-library/source-channel-rate-model.mdx @@ -1,28 +1,28 @@ --- -title: "source-channel-rate-model.mdx" -description: "Source, channel, agency, company, consortia, and rate data modeling for the Metrics Platform" +title: Source, Channel, and Rate Model +nav_order: 11 +has_toc: true +permalink: /source-channel-rate-model/ --- # Source, Channel, and Rate Model Source, channel, agency, company, consortia, and rate data sit at the messy intersection of distribution, sales, marketing, revenue strategy, and finance. Every hotel system has its own version of the truth. Sometimes it is a source. Sometimes it is a subsource. Sometimes it is a channel. Sometimes it is a rate code wearing a trench coat. -The Metrics model separates these concepts into controlled lookup tables, then uses mapping tables to translate source-system values into the standard structure. - ---- +The Metrics model separates these concepts into controlled lookup tables, then uses mapping tables to translate source-system values into the standard structure. The goal is not to force every source system into the same shape. The goal is to give Metrics a clean, consistent reporting layer while preserving enough flexibility to handle systems that do not have every level of hierarchy. ## How We View Source and Rate Data -Source and channel data explain how demand enters the hotel. Rate and company data explain why the booking exists, how it should be priced, and how it should roll into commercial and finance reporting. +Source and channel data explain how demand enters the hotel. Rate and company data explain why the booking exists, how it should be priced, and how it should roll into commercial and finance reporting. Consortia and agency data add another layer of distribution context, especially for GDS and negotiated travel business. -Metrics keeps those relationships explicit so reporting can answer the practical questions: Where did the booking come from? What commercial bucket does it belong to? Which rate structure drove it? Which company or agency influenced it? +These concepts overlap, but they should not be collapsed into one flat field. A booking can have a channel, source, subsource, rate type, rate code, company, agency, consortia, segment, and finance treatment. Each one answers a different business question. ---- +Metrics keeps those relationships explicit so reporting can answer the practical questions: Where did the booking come from? What commercial bucket does it belong to? Which rate structure drove it? Which company or agency influenced it? And does finance agree, or are we having that meeting again? ## Table Family | Table | Purpose | -| --- | --- | +|---|---| | `lkp_consortia_category` | Controlled list of consortia categories. | | `lkp_consortia_focus` | Controlled list of consortia focus areas. | | `lkp_consortia` | Controlled consortia reference table. | @@ -42,49 +42,203 @@ Metrics keeps those relationships explicit so reporting can answer the practical --- +## Consortia Lookup Tables + +## `lkp_consortia_category` + +Consortia category defines the broad type of consortia relationship. + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard consortia category code. | +| `name` | STRING | Consortia category display name. | +| `description` | STRING | Definition or usage notes. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_consortia_focus` + +Consortia focus defines the primary commercial focus or audience of the consortia relationship. + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard consortia focus code. | +| `name` | STRING | Consortia focus display name. | +| `description` | STRING | Definition or usage notes. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_consortia` + +`lkp_consortia` stores the standard consortia reference list. + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard consortia code. | +| `name` | STRING | Full consortia name. | +| `short_name` | STRING | Short display name used in reporting. | +| `description` | STRING | Definition or usage notes. | +| `sort` | INT64 | Display order for reporting. | +| `consortia_focus_code` | STRING | Standard consortia focus code. Joins to `lkp_consortia_focus.code`. | +| `consortia_category_code` | STRING | Standard consortia category code. Joins to `lkp_consortia_category.code`. | +| `is_active` | BOOL | Indicates whether the consortia value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +--- + +## Agency, Industry, and Company Tables + +## `lkp_agency` + +`lkp_agency` stores agency and IATA reference values. This is especially useful for GDS, consortia, corporate, and negotiated production analysis. + +| Column | Type | Definition | +|---|---|---| +| `iata` | STRING | IATA number or agency identifier. | +| `name` | STRING | Agency name. | +| `corp_profile` | STRING | Corporate profile or agency profile reference, when supplied. | +| `corporate_code` | STRING | Corporate code associated with the agency, when available. | +| `consortia_code` | STRING | Standard consortia code. Joins to `lkp_consortia.code`. | +| `consortia_short_name` | STRING | Short consortia name for easy display and source compatibility. | +| `rate_type` | STRING | Rate type or negotiated rate classification associated with the agency, when supplied. | +| `is_active` | BOOL | Indicates whether the agency record is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_industry` + +`lkp_industry` defines controlled industry values used by company and account reporting. + +| Column | Type | Definition | +|---|---|---| +| `category` | STRING | High-level industry category. | +| `industry` | STRING | Industry name. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the industry value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_company` + +`lkp_company` stores company, corporate account, and profile reference values. + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard company or account code. | +| `name` | STRING | Company or account name. | +| `description` | STRING | Company description or usage notes. | +| `category` | STRING | Company category. | +| `industry` | STRING | Industry value. Can join to `lkp_industry.industry` when standardized. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the company value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +--- + ## Source and Channel Lookup Tables -### `lkp_source` +## `lkp_source` + +`lkp_source` defines the standard source value used by Metrics. | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `code` | STRING | Standard source code. | | `name` | STRING | Source display name. | | `description` | STRING | Definition or usage notes. | | `sort` | INT64 | Display order for reporting. | -| `source` | STRING | Optional source family or legacy source label. | +| `source` | STRING | Optional source family or legacy source label when a source needs to be grouped under a broader source value. | | `is_active` | BOOL | Indicates whether the source value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_subsource` + +`lkp_subsource` defines the standard subsource value used by Metrics when the source system supports a subsource level. + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard subsource code. | +| `name` | STRING | Subsource display name. | +| `description` | STRING | Definition or usage notes. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the subsource value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_channel_group` -### `lkp_channel` +`lkp_channel_group` defines the high-level channel grouping used by reporting. | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| +| `code` | STRING | Standard channel group code. | +| `name` | STRING | Channel group display name. | +| `description` | STRING | Definition or usage notes. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the channel group is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_channel` + +`lkp_channel` defines the standard channel used by Metrics. + +| Column | Type | Definition | +|---|---|---| | `code` | STRING | Standard channel code. | | `name` | STRING | Channel display name. | | `description` | STRING | Definition or usage notes. | | `sort` | INT64 | Display order for reporting. | -| `channel_group_code` | STRING | Standard channel group code. | +| `channel_group_code` | STRING | Standard channel group code. Joins to `lkp_channel_group.code`. | | `is_active` | BOOL | Indicates whether the channel value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | --- ## Rate Lookup Tables -### `lkp_rate` +## `lkp_ratetype` + +`lkp_ratetype` defines the standard rate type used by Metrics. + +| Column | Type | Definition | +|---|---|---| +| `code` | STRING | Standard rate type code. | +| `name` | STRING | Rate type display name. | +| `description` | STRING | Definition or usage notes. | +| `sort` | INT64 | Display order for reporting. | +| `is_active` | BOOL | Indicates whether the rate type is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | + +## `lkp_rate` + +`lkp_rate` defines standard rate codes and their related commercial attributes. | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `code` | STRING | Standard rate code. | | `name` | STRING | Rate display name. | | `description` | STRING | Rate description or usage notes. | -| `ratetype_code` | STRING | Standard rate type code. | -| `company_code` | STRING | Standard company code, when rate is associated with a company. | -| `segment_code` | STRING | Standard segment code. | -| `channel_code` | STRING | Standard channel code. | -| `source_code` | STRING | Standard source code. | -| `subsource_code` | STRING | Standard subsource code, when applicable. | -| `consortia_code` | STRING | Standard consortia code, when applicable. | +| `ratetype_code` | STRING | Standard rate type code. Joins to `lkp_ratetype.code`. | +| `company_code` | STRING | Standard company code, when the rate is associated with a company. Joins to `lkp_company.code`. | +| `segment_code` | STRING | Standard segment code. Joins to `lkp_segment.code`. | +| `channel_code` | STRING | Standard channel code. Joins to `lkp_channel.code`. | +| `source_code` | STRING | Standard source code. Joins to `lkp_source.code`. | +| `subsource_code` | STRING | Standard subsource code, when applicable. Joins to `lkp_subsource.code`. | +| `consortia_code` | STRING | Standard consortia code, when applicable. Joins to `lkp_consortia.code`. | +| `sort` | INT64 | Display order for reporting. | | `is_active` | BOOL | Indicates whether the rate value is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | --- @@ -92,18 +246,26 @@ Metrics keeps those relationships explicit so reporting can answer the practical `map_source` is the source-to-standard translation table for source and subsource values. +The mapping is universal by property and system. It is not tied to a specific ingestion report because the same source or subsource code can appear across many reports and workflows. + ### Why `code_reference` Exists +Some hotel systems provide both source and subsource. Others only provide one code. Metrics needs to map both patterns without inventing a fake hierarchy. + `code_reference` tells the pipeline what the incoming source-system `code` represents: -| code\_reference | Meaning | -| --- | --- | +| code_reference | Meaning | +|---|---| | `source_code` | The source-system code maps to the standard `source_code`. | | `subsource_code` | The source-system code maps to the standard `subsource_code`. | - - When a system does not provide a true subsource, the target fact or snapshot table should populate `subsource_code = source_code` to keep the model structurally consistent. - +When a system does not provide a true subsource, the target fact or snapshot table should populate: + +```text +subsource_code = source_code +``` + +That keeps fact tables structurally consistent while making it clear that no separate subsource was supplied. It is better than nulls everywhere, and much better than pretending the PMS had nuance it absolutely did not have. ### Grain @@ -114,22 +276,27 @@ property_code + system + code ### Columns | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property code. | | `system` | STRING | Source system providing the source/subsource value. | | `code` | STRING | Source-system source or subsource code. | | `name` | STRING | Source-system source or subsource name. | -| `code_reference` | STRING | Indicates whether `code` maps to `source_code` or `subsource_code`. | -| `source_code` | STRING | Standard source code. | -| `subsource_code` | STRING | Standard subsource code. | -| `channel_code` | STRING | Standard channel code. | +| `description` | STRING | Source-system source or subsource description, if supplied. | +| `code_reference` | STRING | Indicates whether `code` maps to `source_code` or `subsource_code`. Allowed values: `source_code`, `subsource_code`. | +| `source_code` | STRING | Standard source code. Joins to `lkp_source.code`. | +| `subsource_code` | STRING | Standard subsource code. Joins to `lkp_subsource.code`. If the source system does not have subsource, this may be filled with `source_code` in target fact tables. | +| `channel_code` | STRING | Standard channel code. Joins to `lkp_channel.code`. | | `is_active` | BOOL | Indicates whether the mapping is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | --- ## `map_rate` -Rate mapping connects the source-system rate code to rate type, company, segment, channel, source, subsource, and consortia context. +`map_rate` is the source-to-standard translation table for rate codes. + +Rate mapping does more than rename a rate. It connects the source-system rate code to rate type, company, segment, channel, source, subsource, and consortia context where available. ### Grain @@ -140,23 +307,98 @@ property_code + system + code ### Columns | Column | Type | Definition | -| --- | --- | --- | +|---|---|---| | `property_code` | STRING | Property code. | | `system` | STRING | Source system providing the rate value. | | `code` | STRING | Source-system rate code. | | `name` | STRING | Source-system rate name. | -| `ratetype_code` | STRING | Standard rate type code. | -| `company_code` | STRING | Standard company code. | -| `segment_code` | STRING | Standard segment code. | -| `channel_code` | STRING | Standard channel code. | -| `source_code` | STRING | Standard source code. | -| `subsource_code` | STRING | Standard subsource code, when applicable. | -| `consortia_code` | STRING | Standard consortia code, when applicable. | +| `description` | STRING | Source-system rate description, if supplied. | +| `ratetype_code` | STRING | Standard rate type code. Joins to `lkp_ratetype.code`. | +| `company_code` | STRING | Standard company code. Joins to `lkp_company.code`. | +| `segment_code` | STRING | Standard segment code. Joins to `lkp_segment.code`. | +| `channel_code` | STRING | Standard channel code. Joins to `lkp_channel.code`. | +| `source_code` | STRING | Standard source code. Joins to `lkp_source.code`. | +| `subsource_code` | STRING | Standard subsource code, when applicable. Joins to `lkp_subsource.code`. | +| `consortia_code` | STRING | Standard consortia code, when applicable. Joins to `lkp_consortia.code`. | | `is_active` | BOOL | Indicates whether the mapping is active. | +| `insert_date` | DATE | Insert date. | +| `updated_date` | DATE | Updated date. | --- -## Source Resolution Example +## Enriched Views + +Views expose readable labels and `sort` fields so reporting tools display sources, channels, rates, companies, and consortia in the intended operating order instead of alphabetizing a commercial strategy into nonsense. + +## `vw_source` + +`vw_source` joins mapped source/subsource values to source, subsource, channel, and channel group labels. + +| Column | Source | Definition | +|---|---|---| +| `property_code` | `map_source` | Property code. | +| `system` | `map_source` | Source system. | +| `map_source_code` | `map_source.code` | Source-system source/subsource code. | +| `map_source_name` | `map_source.name` | Source-system source/subsource name. | +| `code_reference` | `map_source` | Indicates whether the mapped code resolves to source or subsource. | +| `source_code` | `map_source` | Standard source code. | +| `source` | `lkp_source.name` | Standard source name. | +| `source_description` | `lkp_source.description` | Source description. | +| `source_sort` | `lkp_source.sort` | Source display order. | +| `subsource_code` | `map_source` | Standard subsource code. | +| `subsource` | `lkp_subsource.name` | Standard subsource name. | +| `subsource_description` | `lkp_subsource.description` | Subsource description. | +| `subsource_sort` | `lkp_subsource.sort` | Subsource display order. | +| `channel_code` | `map_source` | Standard channel code. | +| `channel` | `lkp_channel.name` | Standard channel name. | +| `channel_sort` | `lkp_channel.sort` | Channel display order. | +| `channel_group_code` | `lkp_channel.channel_group_code` | Standard channel group code. | +| `channel_group` | `lkp_channel_group.name` | Standard channel group name. | +| `channel_group_sort` | `lkp_channel_group.sort` | Channel group display order. | +| `is_active` | `map_source` | Active mapping flag. | + +## `vw_rate` + +`vw_rate` joins mapped rate values to rate type, company, segment, channel, source, subsource, and consortia labels. + +| Column | Source | Definition | +|---|---|---| +| `property_code` | `map_rate` | Property code. | +| `system` | `map_rate` | Source system. | +| `map_rate_code` | `map_rate.code` | Source-system rate code. | +| `map_rate_name` | `map_rate.name` | Source-system rate name. | +| `ratetype_code` | `map_rate` | Standard rate type code. | +| `ratetype` | `lkp_ratetype.name` | Standard rate type name. | +| `ratetype_sort` | `lkp_ratetype.sort` | Rate type display order. | +| `company_code` | `map_rate` | Standard company code. | +| `company` | `lkp_company.name` | Company name. | +| `company_category` | `lkp_company.category` | Company category. | +| `industry` | `lkp_company.industry` | Company industry. | +| `segment_code` | `map_rate` | Standard segment code. | +| `segment` | `lkp_segment.name` | Standard segment name. | +| `segment_sort` | `lkp_segment.sort` | Segment display order. | +| `channel_code` | `map_rate` | Standard channel code. | +| `channel` | `lkp_channel.name` | Standard channel name. | +| `channel_sort` | `lkp_channel.sort` | Channel display order. | +| `source_code` | `map_rate` | Standard source code. | +| `source` | `lkp_source.name` | Standard source name. | +| `source_sort` | `lkp_source.sort` | Source display order. | +| `subsource_code` | `map_rate` | Standard subsource code. | +| `subsource` | `lkp_subsource.name` | Standard subsource name. | +| `subsource_sort` | `lkp_subsource.sort` | Subsource display order. | +| `consortia_code` | `map_rate` | Standard consortia code. | +| `consortia` | `lkp_consortia.name` | Consortia name. | +| `consortia_short_name` | `lkp_consortia.short_name` | Consortia short name. | +| `consortia_sort` | `lkp_consortia.sort` | Consortia display order. | +| `is_active` | `map_rate` | Active mapping flag. | + +--- + +## Relationship to Booking, Pace, Actuals, and Pricing + +Source and rate mappings are used during ingestion to resolve source-system values into standard reporting codes. + +Example source resolution: ```text source file source/subsource code @@ -165,22 +407,36 @@ source file source/subsource code → target table source_code, subsource_code, channel_code ``` -## Rate Resolution Example +Example rate resolution: ```text source file rate code → map_rate.code - → ratetype_code / company_code / segment_code / channel_code / source_code / consortia_code + → ratetype_code / company_code / segment_code / channel_code / source_code / subsource_code / consortia_code → target table rate and commercial attributes ``` ---- +Target tables may use these fields depending on grain and source availability: + +```text +source_code +subsource_code +channel_code +channel_group_code +rate_code +ratetype_code +company_code +segment_code +consortia_code +iata +``` ## Operating Notes 1. Use `lkp_`, not `lpk_`, for lookup tables. 2. Mapping tables are universal by `property_code + system + code`; they do not include report-level grain. 3. Source reports belong in ingestion metadata, not source or rate mapping tables. -4. `code_reference` is required in `map_source` because some systems map source and subsource differently. -5. When no subsource exists, target fact/snapshot tables may set `subsource_code = source_code`. -6. Keep agency, company, consortia, source, channel, and rate separate. They answer different questions. \ No newline at end of file +4. `code_reference` is required in `map_source` because some systems map source and subsource differently, and some do not provide subsource at all. +5. When no subsource exists, target fact/snapshot tables may set `subsource_code = source_code` to keep the model structurally consistent. +6. Views should expose sort fields from lookup tables so Metrics can display values in the intended order. +7. Keep agency, company, consortia, source, channel, and rate separate. They answer different questions, even when a source system tries to stuff them into one field. diff --git a/metrics-library/source-file-ingestion-model.mdx b/metrics-library/source-file-ingestion-model.mdx new file mode 100644 index 0000000..1474f90 --- /dev/null +++ b/metrics-library/source-file-ingestion-model.mdx @@ -0,0 +1,451 @@ +--- +title: Source File Ingestion Model +nav_order: 8 +has_toc: true +permalink: /source-file-ingestion-model/ +--- + +# Source File Ingestion Model + +## Metrics Platform + +This document defines the file ingestion architecture used throughout the Metrics Platform. Hospitality data rarely arrives in a clean, consistent, warehouse-ready format. + +**Source data may originate from:** + +* PMS exports, +* RMS systems, +* CRS reports, +* Booking engines, +* OTA extranets, +* Rate shopping tools, +* Google Sheets, +* Finance systems, +* Manual operational files, +* or vendor-generated Excel workbooks that appear to have survived several ownership transitions. + +The Metrics Platform standardizes these disconnected source files into a unified reporting framework used for: + +* operational reporting, +* forecasting, +* pricing analysis, +* benchmark intelligence, +* automation workflows, +* and downstream analytics. + +**The objective is not simply loading files into BigQuery, the objective is preserving operational meaning, source traceability, and reporting consistency throughout the ingestion pipeline.** + + + +# Ingestion Architecture Philosophy + +The Metrics Platform intentionally separates: + +* file ingestion, +* raw source preservation, +* staging transformations, +* semantic standardization, +* and reporting-layer modeling + +into independent architectural layers. + +This separation preserves: + +* source traceability, +* ingestion flexibility, +* warehouse stability, +* transformation reproducibility, +* and operational auditability. + +Because hospitality source systems rarely agree on: + +* naming conventions, +* reporting grains, +* date formats, +* metric structures, +* or whether “final_final_v2_USE_THIS.xlsx” was actually the final file. + + + +# Core Ingestion Architecture + +```text +Source CSV / Excel Files + ↓ +Landing Location + ↓ +Raw BigQuery Tables + ↓ +Staging & Standardization + ↓ +Dataform Transformations + ↓ +Standardized Metrics Tables + ↓ +BI Marts & Reporting Views +``` + +The architecture intentionally separates ingestion from transformation responsibilities. + + + +# Core Principle + +Dataform owns: + +* warehouse structure, +* transformations, +* semantic modeling, +* standardized reporting layers, +* and downstream marts/views. + +A separate ingestion process owns: + +* file detection, +* CSV/XLSX parsing, +* workbook conversion, +* raw table loading, +* metadata preservation, +* and file-level processing workflows. + +This separation keeps warehouse logic stable even as source systems evolve. + + + +# Source File Landing Standards + +Raw source files are stored in standardized landing locations before ingestion. + +Supported landing patterns include: + +```text +Google Drive / Shared Drive +Google Cloud Storage +Manual Upload Folder +SFTP Landing Folder +API Export Storage +``` + +Landing locations preserve original source files before transformation occurs. + +Because once someone overwrites the “corrected final export,” archaeology becomes part of the analytics workflow. + + + +# Source File Metadata Standards + +Every ingested file preserves operational metadata for auditability and traceability. + +| Metadata | Purpose | +|---|---| +| `source_file` | Original filename or storage path | +| `source_system` | PMS, RMS, CRS, OTA, rate shop, manual, etc. | +| `source_report` | Original report/export name | +| `property_code` | Property associated with the file | +| `extract_date` | Date the report was generated/exported | +| `load_ts` | Timestamp loaded into the warehouse | +| `source_file_id` | External file identifier (Drive/GCS/etc.) | + +This metadata layer preserves lineage throughout the ingestion and reporting pipeline. + + + +# Raw & Staging Architecture + +## Raw Layer Philosophy + +Raw tables preserve source-system structures as closely as possible. + +The raw layer acts as: + +* an ingestion checkpoint, +* a recovery layer, +* and a source-of-truth archive for imported operational data. + +Raw tables intentionally avoid aggressive transformation logic. + + + +## Raw Table Standards + +```text +raw_{source_system}_{source_report} +``` + +Examples: + +```text +raw_duetto_rms_otb_segment +raw_duetto_rms_pickup_roomtype +raw_costar_demand_segment +raw_costar_demand_channel +raw_bookingdotcom_bar_price_shop +raw_bookingdotcom_lowest_price_shop +``` + + + +## Staging Layer Philosophy + +Staging tables normalize: + +* data types, +* date structures, +* metric formatting, +* source-system inconsistencies, +* and operational naming conventions + +before transformation into standardized warehouse models. + +Staging preserves enough source structure for traceability while making the data usable for semantic modeling. + + + +# BigQuery Dataset Structure + +The Metrics Platform separates datasets by operational domain and architectural responsibility. + +| Dataset | Purpose | +|---|---| +| `raw` | Raw source-loaded tables preserving original structure | +| `stg` | Typed and normalized staging tables | +| `metrics_core` | Shared dimensions, mappings, and reference tables | +| `metrics_pace` | Pace, pickup, forecast, and snapshot reporting | +| `metrics_demand` | Demand, benchmarking, comp-set, and index reporting | +| `metrics_booking` | Booking engine, CRS, reservation, and pricing data | +| `metrics_web` | Web analytics, landing page, GA4, and search data | +| `metrics_finance` | Financial reporting, payroll, budgets, and expenses | +| `metrics_sales` | Sales activity, RFPs, accounts, and lead tracking | +| `metrics_social` | Campaign, engagement, and social platform metrics | + +This separation keeps reporting domains organized while preserving warehouse scalability. + + + +# CSV Handling Standards + +CSV files may be loaded through: + +* BigQuery UI uploads, +* `bq load`, +* Cloud Storage ingestion, +* Python loaders, +* n8n workflows, +* Apps Script integrations, +* or scheduled ingestion pipelines. + +## Standard CSV Flow + +```text +CSV File + → Landing Location + → Raw BigQuery Table + → Staging Layer + → Dataform Transformation + → Standardized Metrics Tables +``` + + + +# Excel Handling Standards + +Excel files require conversion or custom parsing before warehouse ingestion. + +The ingestion framework supports: + +* workbook tab parsing, +* tab-level extraction, +* Google Sheets conversion, +* Python loaders, +* Apps Script workflows, +* and n8n ingestion pipelines. + +## Standard Excel Flow + +```text +Excel File + → Workbook Parsing + → Raw BigQuery Table + → Staging Layer + → Dataform Transformation + → Standardized Metrics Tables +``` + +Alternative path: + +```text +Excel File + → Google Sheets Conversion + → BigQuery External Table + → Dataform Transformation + → Standardized Metrics Tables +``` + +Because hospitality reporting still has an intense emotional attachment to Excel tabs named: + +```text +OTB_FINAL_USE_THIS_v7 +``` + + + +# Google Sheets Writeback Standards + +Google Sheets may remain the user-facing planning layer for: + +* forecasting, +* budgeting, +* overrides, +* targets, +* and operational planning workflows. + +## Standard Flow + +```text +Google Sheets + → Apps Script / n8n / Connector + → fact_manual_plan + → Dataform Marts & Reporting Views +``` + +The planning framework preserves: + +* user attribution, +* approval tracking, +* source sheet lineage, +* and planning auditability. + + + +# Dataform Responsibilities + +Dataform owns: + +* standardized warehouse structures, +* transformations, +* semantic modeling, +* staging logic, +* dimensional mapping, +* mart creation, +* KPI standardization, +* and reporting-layer calculations. + +Examples include: + +* pace transformations, +* benchmark calculations, +* metric standardization, +* semantic reporting views, +* and BI marts. + + + +# Ingestion Framework Responsibilities + +The ingestion framework owns: + +* file detection, +* workbook parsing, +* CSV/XLSX conversion, +* raw table loading, +* metadata preservation, +* malformed-row handling, +* processing workflows, +* and file archival management. + +This separation keeps warehouse transformations stable regardless of source-file complexity. + + + +# Automation Workflow Pattern + +A standard automation workflow follows: + +```text +Landing Folder / GCS / Drive Watcher + → Ingestion Workflow + → File Metadata Extraction + → Workbook/CSV Parsing + → Raw BigQuery Load + → Dataform Execution + → File Archival + → Success / Failure Notification +``` + +The ingestion framework parses: + +* property, +* source system, +* report type, +* extract date, +* and operational metadata + +directly from filenames and workbook structures whenever possible. + + + +# File Registry Framework + +The Metrics Platform maintains a centralized file registry table: + +```text +ctl_file_load +``` + +This table tracks: + +* ingestion status, +* row counts, +* source lineage, +* load timestamps, +* processing outcomes, +* and archival references. + + + +# `ctl_file_load` + +| Column | Type | Notes | +|---|---|---| +| `file_load_id` | STRING | Unique file load identifier | +| `source_file` | STRING | Original filename or storage path | +| `source_file_id` | STRING | External Drive/GCS/file identifier | +| `source_system` | STRING | PMS, RMS, CRS, OTA, etc. | +| `source_report` | STRING | Original report/export name | +| `property_code` | STRING | Associated property | +| `extract_date` | DATE | Report extraction date | +| `load_ts` | TIMESTAMP | Warehouse load timestamp | +| `raw_table` | STRING | Raw table destination | +| `row_count` | INT64 | Loaded row count | +| `load_status` | STRING | Pending, Loaded, Transformed, Error, Archived | +| `error_message` | STRING | Processing error details | +| `processed_file_path` | STRING | Archived file location | +| `insert_date` | DATE | Insert timestamp | +| `updated_date` | DATE | Last update timestamp | + + + +# Warehouse Design Philosophy + +The Metrics Platform treats source ingestion as an operational intelligence pipeline rather than a simple file-upload process. + +**The architecture intentionally separates:** + +* ingestion, +* normalization, +* semantic transformation, +* benchmark logic, +* and reporting consumption + +into independent but connected layers. + +**This structure preserves:** + +* operational traceability, +* warehouse stability, +* reporting consistency, +* source lineage, +* and analytical reproducibility. + +**The goal is not simply loading files, the goal is creating a reliable operational reporting system from source data that was never designed to work together in the first place.** + diff --git a/metrics-library/source-file-ingestion.mdx b/metrics-library/source-file-ingestion.mdx deleted file mode 100644 index 5fc09af..0000000 --- a/metrics-library/source-file-ingestion.mdx +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: "source-file-ingestion.mdx" -description: "File ingestion architecture used throughout the Metrics Platform" ---- - -# Source File Ingestion Model - -## Metrics Platform - -This document defines the file ingestion architecture used throughout the Metrics Platform. Hospitality data rarely arrives in a clean, consistent, warehouse-ready format. - -**Source data may originate from:** - -- PMS exports, -- RMS systems, -- CRS reports, -- Booking engines, -- OTA extranets, -- Rate shopping tools, -- Google Sheets, -- Finance systems, -- Manual operational files, -- or vendor-generated Excel workbooks that appear to have survived several ownership transitions. - -**The objective is not simply loading files into BigQuery, the objective is preserving operational meaning, source traceability, and reporting consistency throughout the ingestion pipeline.** - ---- - -## Ingestion Architecture Philosophy - -The Metrics Platform intentionally separates: - -- file ingestion, -- raw source preservation, -- staging transformations, -- semantic standardization, -- and reporting-layer modeling - -into independent architectural layers. - ---- - -## Core Ingestion Architecture - -```text -Source CSV / Excel Files - ↓ -Landing Location - ↓ -Raw BigQuery Tables - ↓ -Staging & Standardization - ↓ -Dataform Transformations - ↓ -Standardized Metrics Tables - ↓ -BI Marts & Reporting Views -``` - ---- - -## Core Principle - - - - - warehouse structure - - transformations - - semantic modeling - - standardized reporting layers - - downstream marts/views - - - - - file detection - - CSV/XLSX parsing - - workbook conversion - - raw table loading - - metadata preservation - - file-level processing workflows - - - ---- - -## Source File Landing Standards - -Supported landing patterns: - -```text -Google Drive / Shared Drive -Google Cloud Storage -Manual Upload Folder -SFTP Landing Folder -API Export Storage -``` - ---- - -## Source File Metadata Standards - -| Metadata | Purpose | -| --- | --- | -| `source_file` | Original filename or storage path | -| `source_system` | PMS, RMS, CRS, OTA, rate shop, manual, etc. | -| `source_report` | Original report/export name | -| `property_code` | Property associated with the file | -| `extract_date` | Date the report was generated/exported | -| `load_ts` | Timestamp loaded into the warehouse | -| `source_file_id` | External file identifier (Drive/GCS/etc.) | - ---- - -## Raw & Staging Architecture - -### Raw Table Standards - -```text -raw_{source_system}_{source_report} -``` - -Examples: - -```text -raw_duetto_rms_otb_segment -raw_duetto_rms_pickup_roomtype -raw_costar_demand_segment -raw_costar_demand_channel -raw_bookingdotcom_bar_price_shop -``` - ---- - -## BigQuery Dataset Structure - -| Dataset | Purpose | -| --- | --- | -| `raw` | Raw source-loaded tables preserving original structure | -| `stg` | Typed and normalized staging tables | -| `metrics_core` | Shared dimensions, mappings, and reference tables | -| `metrics_pace` | Pace, pickup, forecast, and snapshot reporting | -| `metrics_demand` | Demand, benchmarking, comp-set, and index reporting | -| `metrics_booking` | Booking engine, CRS, reservation, and pricing data | -| `metrics_web` | Web analytics, landing page, GA4, and search data | -| `metrics_finance` | Financial reporting, payroll, budgets, and expenses | -| `metrics_sales` | Sales activity, RFPs, accounts, and lead tracking | -| `metrics_social` | Campaign, engagement, and social platform metrics | - ---- - -## CSV Handling Standards - -### Standard CSV Flow - -```text -CSV File - → Landing Location - → Raw BigQuery Table - → Staging Layer - → Dataform Transformation - → Standardized Metrics Tables -``` - ---- - -## Excel Handling Standards - -### Standard Excel Flow - -```text -Excel File - → Workbook Parsing - → Raw BigQuery Table - → Staging Layer - → Dataform Transformation - → Standardized Metrics Tables -``` - -Alternative path: - -```text -Excel File - → Google Sheets Conversion - → BigQuery External Table - → Dataform Transformation - → Standardized Metrics Tables -``` - ---- - -## Google Sheets Writeback Standards - -```text -Google Sheets - → Apps Script / n8n / Connector - → fact_manual_plan - → Dataform Marts & Reporting Views -``` - ---- - -## Automation Workflow Pattern - -```text -Landing Folder / GCS / Drive Watcher - → Ingestion Workflow - → File Metadata Extraction - → Workbook/CSV Parsing - → Raw BigQuery Load - → Dataform Execution - → File Archival - → Success / Failure Notification -``` - ---- - -## `ctl_file_load` - -| Column | Type | Notes | -| --- | --- | --- | -| `file_load_id` | STRING | Unique file load identifier | -| `source_file` | STRING | Original filename or storage path | -| `source_file_id` | STRING | External Drive/GCS/file identifier | -| `source_system` | STRING | PMS, RMS, CRS, OTA, etc. | -| `source_report` | STRING | Original report/export name | -| `property_code` | STRING | Associated property | -| `extract_date` | DATE | Report extraction date | -| `load_ts` | TIMESTAMP | Warehouse load timestamp | -| `raw_table` | STRING | Raw table destination | -| `row_count` | INT64 | Loaded row count | -| `load_status` | STRING | Pending, Loaded, Transformed, Error, Archived | -| `error_message` | STRING | Processing error details | -| `processed_file_path` | STRING | Archived file location | -| `insert_date` | DATE | Insert timestamp | -| `updated_date` | DATE | Last update timestamp | - ---- - -## Warehouse Design Philosophy - -The Metrics Platform treats source ingestion as an operational intelligence pipeline rather than a simple file-upload process. - -**The goal is not simply loading files, the goal is creating a reliable operational reporting system from source data that was never designed to work together in the first place.** \ No newline at end of file diff --git a/metrics-library/column-propagation-audit.mdx b/metrics-library/table-architecture-column-propagation-audit.mdx similarity index 53% rename from metrics-library/column-propagation-audit.mdx rename to metrics-library/table-architecture-column-propagation-audit.mdx index 85b1742..4c89a45 100644 --- a/metrics-library/column-propagation-audit.mdx +++ b/metrics-library/table-architecture-column-propagation-audit.mdx @@ -1,14 +1,14 @@ --- -title: "column-propagation-audit.mdx" -description: "Audit of newly added columns and their propagation across the REVREBEL BI Table Architecture" +title: Table Architecture Column Propagation Audit +nav_order: 4 +has_toc: true +permalink: /table-architecture-column-propagation-audit/ --- # Table Architecture Column Propagation Audit This audit checks whether newly added columns in the REVREBEL BI Table Architecture are represented in the required related tables. ---- - ## Summary Several newly added fields are correctly present in their core dimensions, but some were not yet propagated into the corresponding mapping tables and fact table grains. @@ -21,16 +21,16 @@ The affected areas are: 4. Roomtype detail fields 5. Room pool / related-roomtype grouping fields ---- - ## Propagation Rules ### Dimension-only fields Some fields should live only in dimensions and should not be repeated in every fact table unless they are needed for downstream BI convenience. +Examples: + | Field | Recommended Home | Notes | -| --- | --- | --- | +|---|---|---| | `property_shortname` | `dim_property` | Can be joined in marts. Not required in raw fact tables. | | `rating` | `dim_property` | Descriptive property attribute. Dimension only. | | `crs_id` | `dim_property` | System identifier. Dimension only unless needed in source reconciliation. | @@ -43,169 +43,276 @@ If a field changes how data is grouped, mapped, modeled, priced, or interpreted, Room pool fields fall into this category because they support price sensitivity, room demand, upgrade/downgrade behavior, sell-through analysis, and demand calculations across related room types. ---- - ## Findings -### 1. `dim_property` +## 1. `dim_property` + +### Newly added fields found | Field | Present in `dim_property` | Should propagate? | Recommendation | -| --- | --: | --: | --- | +|---|---:|---:|---| | `property_shortname` | Yes | Optional | Keep in `dim_property`; add to marts only if useful for dashboards. | | `rating` | Yes | No | Keep in `dim_property`. | | `crs_id` | Yes | No | Keep in `dim_property`. | | `pms_id` | Yes | No | Keep in `dim_property`. | | `rateshop_id` | Yes | No | Keep in `dim_property`. | -**Status:** No required fact-table propagation needed. +### Status ---- +No required fact-table propagation needed. -### 2. `dim_segment` +## 2. `dim_segment` + +### Newly added fields found | Field | Present in `dim_segment` | Present in `map_segment` | Present in segment facts | Recommendation | -| --- | --: | --: | --: | --- | +|---|---:|---:|---:|---| | `segment_group` | Yes | No | No | Add to `map_segment`, `fact_pace_segment`, `fact_actual_segment`, and `fact_pickup_segment`. | | `segment_group_code` | Yes | No | No | Add to `map_segment`, `fact_pace_segment`, `fact_actual_segment`, and `fact_pickup_segment`. | | `finance_segment` | Yes | No | No | Add to `map_segment`; optional in facts, recommended in marts. | | `finance_segment_code` | Yes | No | No | Add to `map_segment`; optional in facts, recommended in marts. | | `gl_code` | Yes | No | No | Add to `map_segment`; not required in pace facts unless used for finance reconciliation. | -| `rate_basis` | Yes | No | No | Add to `map_segment`, `fact_pace_segment`, and `fact_actual_segment`. | +| `rate_basis` | Yes | No | No | Add to `map_segment`, `fact_pace_segment`, and `fact_actual_segment` because it affects interpretation of revenue. | -**Status:** Needs propagation. +### Status ---- +Needs propagation. + +## 3. `dim_source` + +### Newly added / related fields found -### 3. `dim_source` +The field pair `source_group` and `source_group_code` appears in `fact_pace_source`, but not yet in `dim_source`, `map_source`, `fact_actual_source`, or `fact_pickup_source`. | Field | Present in `dim_source` | Present in `map_source` | Present in source facts | Recommendation | -| --- | --: | --: | --: | --- | +|---|---:|---:|---:|---| | `source_group` | No | No | Pace only | Add to `dim_source`, `map_source`, `fact_actual_source`, and `fact_pickup_source`. | | `source_group_code` | No | No | Pace only | Add to `dim_source`, `map_source`, `fact_actual_source`, and `fact_pickup_source`. | -**Status:** Needs propagation. +### Status ---- +Needs propagation. + +## 4. `dim_roomtype` -### 4. `dim_roomtype` +### Newly added fields found | Field | Present in `dim_roomtype` | Present in `map_roomtype` | Present in roomtype facts | Recommendation | -| --- | --: | --: | --: | --- | +|---|---:|---:|---:|---| | `bedtype` | Yes | No | No | Add to `map_roomtype`, `fact_pace_roomtype`, `fact_actual_roomtype`, and `fact_pickup_roomtype`. | | `bedtype_code` | Yes | No | No | Add to `map_roomtype`, `fact_pace_roomtype`, `fact_actual_roomtype`, and `fact_pickup_roomtype`. | | `roomfeature` | Yes | No | No | Add to `map_roomtype`, `fact_pace_roomtype`, `fact_actual_roomtype`, and `fact_pickup_roomtype`. | -| `related_roomtypes` | Yes | No | No | Replace as core analysis field with `roompool` / `roompool_code`. | +| `related_roomtypes` | Yes | No | No | Replace as a core analysis field with `roompool` / `roompool_code`; keep `related_roomtypes` as descriptive/reference metadata if needed. | -**Status:** Needs propagation. +### Status ---- +Needs propagation. + +## 5. Room pool / related-roomtype modeling -### 5. Room Pool / Related-Roomtype Modeling +`related_roomtypes` is analytically important for room pools, price sensitivity, demand calculations, and inventory/product grouping. However, a free-form `related_roomtypes` field is not ideal as the primary fact-table grouping key. -The recommended standard is to model room pools explicitly using `roompool` and `roompool_code`, using `related_roomtypes` as supporting metadata only. +The recommended standard is to model room pools explicitly using: -#### Recommended room pool fields +```text +roompool +roompool_code +``` + +Use `related_roomtypes` as supporting metadata that describes which room types are grouped together, but use `roompool` and `roompool_code` as the actual analytical grain fields. + +### Recommended room pool fields | Field | Purpose | -| --- | --- | +|---|---| | `roompool` | Standard room pool name used for analysis. | | `roompool_code` | Standard room pool code used for joins, filtering, and BI grouping. | | `roompool_map` | Source-system room pool or related-roomtype value, if provided. | | `roompool_code_map` | Source-system room pool code, if provided. | -| `related_roomtypes` | Descriptive list of roomtypes included in the pool. | +| `related_roomtypes` | Descriptive list of roomtypes included in the pool. Useful for documentation, not ideal as the primary join key. | ---- +### Why this should propagate + +Room pools are used to answer questions like: + +1. Which related room products are showing pricing sensitivity? +2. Is demand shifting between bed types, views, premium tiers, or accessible variants? +3. Are room types within the same sellable pool behaving differently by ADR or pickup? +4. Should pricing or restrictions be adjusted at roomtype level or pool level? +5. Are upgrades, substitutions, or inventory constraints distorting roomtype-level demand? + +Because of this, `roompool` and `roompool_code` should be available in roomtype mapping and roomtype fact tables. ## Recommended Updates - - - Add: - - ```text - segment_group - segment_group_code - finance_segment - finance_segment_code - gl_code - rate_basis - ``` - - - - Add: - - ```text - segment_group - segment_group_code - finance_segment - finance_segment_code - rate_basis - ``` - - - - Add: - - ```text - source_group - source_group_code - ``` - - - - Add to `dim_roomtype`: - - ```text - roompool - roompool_code - ``` - - Add to `map_roomtype`: - - ```text - bedtype - bedtype_code - roomfeature - roompool - roompool_code - roompool_map - roompool_code_map - related_roomtypes - ``` - - - - Add: - - ```text - bedtype - bedtype_code - roomfeature - roompool - roompool_code - ``` - - - - Add: - - ```text - roompool - roompool_code - roompool_available_rms - roompool_rms_otb - roompool_rev_otb - roompool_adr_otb - roompool_occ_otb - roompool_rms_pickup - roompool_rev_pickup - ``` - - +## `map_segment` ---- +Add: + +```text +segment_group +segment_group_code +finance_segment +finance_segment_code +gl_code +rate_basis +``` + +## `fact_pace_segment` + +Add: + +```text +segment_group +segment_group_code +finance_segment +finance_segment_code +rate_basis +``` + +## `fact_actual_segment` + +Add: + +```text +segment_group +segment_group_code +finance_segment +finance_segment_code +rate_basis +``` + +## `fact_pickup_segment` + +Add: + +```text +segment_group +segment_group_code +``` + +## `dim_source` + +Add: + +```text +source_group +source_group_code +``` + +## `map_source` + +Add: + +```text +source_group +source_group_code +``` + +## `fact_actual_source` + +Add: + +```text +source_group +source_group_code +``` + +## `fact_pickup_source` + +Add: + +```text +source_group +source_group_code +``` + +## `dim_roomtype` + +Add: + +```text +roompool +roompool_code +``` + +Keep: + +```text +related_roomtypes +``` + +## `map_roomtype` + +Add: + +```text +bedtype +bedtype_code +roomfeature +roompool +roompool_code +roompool_map +roompool_code_map +related_roomtypes +``` + +## `fact_pace_roomtype` + +Add: + +```text +bedtype +bedtype_code +roomfeature +roompool +roompool_code +``` + +## `fact_actual_roomtype` + +Add: + +```text +bedtype +bedtype_code +roomfeature +roompool +roompool_code +``` + +## `fact_pickup_roomtype` + +Add: + +```text +bedtype +bedtype_code +roomfeature +roompool +roompool_code +``` + +## BI mart recommendation + +Roomtype marts should include both roomtype-level and roompool-level fields. + +Add to `mart_roomtype_daily`: + +```text +roompool +roompool_code +roompool_available_rms +roompool_rms_otb +roompool_rev_otb +roompool_adr_otb +roompool_occ_otb +roompool_rms_pickup +roompool_rev_pickup +``` + +The `roompool_*` metrics may be calculated in the mart by aggregating roomtype facts across `roompool_code`. ## Notes -The property fields `property_shortname`, `rating`, `crs_id`, `pms_id`, and `rateshop_id` are best kept in `dim_property` and joined into BI marts/views when needed. Repeating them in every fact table would make the facts wider without improving metric storage quality. \ No newline at end of file +The property fields `property_shortname`, `rating`, `crs_id`, `pms_id`, and `rateshop_id` are best kept in `dim_property` and joined into BI marts/views when needed. Repeating them in every fact table would make the facts wider without improving metric storage quality. diff --git a/metrics-library/table-architecture.mdx b/metrics-library/table-architecture.mdx deleted file mode 100644 index c1fbbb7..0000000 --- a/metrics-library/table-architecture.mdx +++ /dev/null @@ -1,170 +0,0 @@ ---- -title: "table-architecture.mdx" -description: "Canonical table structure used throughout the Metrics Platform" ---- - -# Metrics Table Architecture - -## Metrics Platform - -This document defines the canonical table structure used throughout the Metrics Platform. - -The objective is to maintain a stable, predictable reporting framework across hospitality systems that rarely speak the same language. - -PMS exports, CRS reports, RMS feeds, booking engine datasets, spreadsheets, and vendor reports all structure data differently. The Metrics Platform standardizes those inputs into a consistent warehouse architecture designed for operational reporting, benchmarking, forecasting, dashboarding, automation workflows, and downstream analytics. - -**The goal is not simply storing hotel data, the goal is creating a durable semantic layer where metrics remain interpretable regardless of source system.** - ---- - -## Architecture Philosophy - -The warehouse structure separates datasets by operational grain rather than forcing all reporting into a single generalized fact table. - -Hospitality reporting behaves differently depending on reporting cadence, snapshot behavior, dimensional complexity, pickup logic, and forecasting requirements. - -Because of this, the Metrics Platform intentionally separates pace reporting, actual production, pickup activity, pricing observations, benchmark intelligence, and planning data into independent but interoperable reporting layers. - ---- - -## Core Design Principles - - - - Fact tables are separated by business grain: - - - property, - - segment, - - source/channel, - - and roomtype. - - This structure prevents unnecessary dimensional sparsity while preserving reporting flexibility. - - - - Core pace and actual tables remain intentionally wide. - - Hospitality reporting frequently relies on same-time-last-year comparisons, forecast overlays, budget variance, pickup analysis, and operational KPI calculations. Wide structures simplify dashboard development, semantic modeling, and BI tool performance. - - - - Pickup datasets remain long/narrow because pickup windows and comparison periods expand rapidly. - - This structure prevents schema explosion while preserving reporting flexibility across rolling pickup windows, comparison periods, and dynamic pace analysis. - - - - Fact tables preserve additive and source-provided metrics. - - Derived KPIs such as ADR, OCC, RevPAR, indexes, and percentage changes are calculated within marts or semantic reporting views when base metrics are available. - - - - Source systems rarely align on naming conventions. - - The Metrics Platform uses mapping layers to normalize segments, sources, roomtypes, channels, and raw metric structures while preserving source traceability. - - - ---- - -## Warehouse Table Structure - -### Core Dimensions - -| Table | Purpose | -| --- | --- | -| `dim_property` | Standardized hotel and property reference layer | -| `dim_date` | Calendar and reporting date dimension | -| `dim_segment` | Standardized segment reference layer | -| `dim_source` | Standardized booking source and channel layer | -| `dim_roomtype` | Standardized roomtype and room classification layer | -| `dim_metric` | Centralized metric dictionary and calculation framework | -| `dim_source_report` | Source-report metadata and ingestion tracking | - -### Mapping Tables - -| Table | Purpose | -| --- | --- | -| `map_segment` | Maps source segment values to standardized segment structures | -| `map_source` | Maps source/channel values to standardized channel structures | -| `map_roomtype` | Maps source roomtypes to standardized roomtype structures | -| `map_source_metric` | Maps raw report columns to standardized metric structures | - ---- - -## Pace Fact Tables - -Pace tables store snapshot-based on-the-books performance data. - -| Table | Purpose | -| --- | --- | -| `fact_pace_property` | Property-level pace snapshots | -| `fact_pace_segment` | Segment-level pace snapshots | -| `fact_pace_source` | Source/channel-level pace snapshots | -| `fact_pace_roomtype` | Roomtype-level pace snapshots | - ---- - -## Actual Fact Tables - -Actual tables store finalized operational production. - -| Table | Purpose | -| --- | --- | -| `fact_actual_property` | Property-level actual performance | -| `fact_actual_segment` | Segment-level actual performance | -| `fact_actual_source` | Source/channel-level actual performance | -| `fact_actual_roomtype` | Roomtype-level actual performance | - ---- - -## Pickup Fact Tables - -Pickup reporting measures booking movement between reporting snapshots. - -| Table | Purpose | -| --- | --- | -| `fact_pickup_property` | Property-level pickup activity | -| `fact_pickup_segment` | Segment-level pickup activity | -| `fact_pickup_source` | Source/channel-level pickup activity | -| `fact_pickup_roomtype` | Roomtype-level pickup activity | - ---- - -## Flexible Metric Observation Layer - -| Table | Purpose | -| --- | --- | -| `fact_metric_observation` | Flexible storage layer for sparse, irregular, or evolving metrics | - ---- - -## BI Marts & Semantic Views - -| View | Purpose | -| --- | --- | -| `mart_property_daily` | Property-level daily reporting mart | -| `mart_segment_daily` | Segment-level daily reporting mart | -| `mart_source_daily` | Source/channel daily reporting mart | -| `mart_roomtype_daily` | Roomtype-level daily reporting mart | - -These marts calculate ADR, OCC, RevPAR, variance metrics, pace comparisons, and percentage-change calculations. - ---- - -## Warehouse Design Philosophy - -The Metrics Platform warehouse is intentionally structured around how hospitality reporting behaves operationally. - -Different datasets update differently, snapshot differently, aggregate differently, and support different business questions. The warehouse architecture preserves those distinctions rather than flattening everything into a single reporting layer. - -The objective is creating a reporting framework where metrics remain interpretable, dashboards remain stable, benchmark logic remains trustworthy, and operational decisions remain grounded in consistent data behavior. - -Because clean architecture matters. - -Especially when the source file arrived named: - -```text -final_v2_actual_USE_THIS_latest(3).xlsx -``` \ No newline at end of file diff --git a/metrics-library/table-prefix-standards.mdx b/metrics-library/table-prefix-standards.mdx deleted file mode 100644 index 7df37ba..0000000 --- a/metrics-library/table-prefix-standards.mdx +++ /dev/null @@ -1,217 +0,0 @@ ---- -title: "table-prefix-standards.mdx" -description: "Standardized table prefixes used throughout the Metrics Platform warehouse architecture" ---- - -# Table Prefix Standards - -## Metrics Platform - -This document defines the standardized table prefixes used throughout the Metrics Platform warehouse architecture. - -**Prefixes are used to communicate:** - -- the purpose of a table, -- how the data behaves operationally, -- where the table exists in the pipeline, -- and how the dataset should be consumed downstream. - -The objective is immediate interpretability. - -A table name should communicate enough context that analysts, pipelines, dashboards, and automation workflows can infer how the dataset behaves without opening the schema first. - -**Because in a healthy warehouse, structure should explain intent.** - ---- - -## Prefix Philosophy - -The Metrics Platform separates datasets by operational responsibility rather than forcing every table into a generic reporting structure. - -**Different tables serve different purposes:** - -- some preserve raw operational behavior, -- some normalize source inconsistencies, -- some store finalized production metrics, -- and others exist purely for reporting convenience. - -Prefixes make those distinctions visible immediately. - ---- - -## Standard Prefix Framework - -| Prefix | Use For | Examples | -| --- | --- | --- | -| `stg_` | Cleaned and typed staging tables before business modeling and semantic transformation | `stg_ideas_pace_property` | -| `snap_` | Snapshot-based datasets that change over time, including pace, OTB, forecast, and other fluid reporting states | `snap_pace_property` | -| `vw_` | BI-facing reporting views layered over fact and snapshot tables | `vw_pace_property` | -| `dim_` | Shared descriptive dimensions and standardized reference entities | `dim_property`, `dim_roomtype` | -| `map_` | Source-to-standard mapping relationships used for normalization | `map_segment`, `map_roomtype` | -| `lkp_` | Controlled lookup lists and allowed-value reference tables | `lkp_plan_type`, `lkp_event_category` | -| `fact_` | Finalized operational fact tables containing measurable business activity | `fact_actual_property`, `fact_booking`, `fact_finance_gl` | - ---- - -## Staging Tables (`stg_`) - -Staging tables normalize raw source data before semantic modeling occurs. - -**These tables typically handle:** - -- type casting, -- date normalization, -- field cleanup, -- source-system inconsistencies, -- and initial structural transformations. - -Staging tables preserve enough source-system behavior for traceability while making the data usable for warehouse processing. - -```text -stg_ideas_pace_property -stg_duetto_rms_segment -stg_costar_market_summary -``` - ---- - -## Snapshot Tables (`snap_`) - -Snapshot tables store reporting states captured at a specific point in time. - -**These datasets are commonly used for:** - -- pace reporting, -- OTB analysis, -- pickup tracking, -- forecast snapshots, -- and evolving operational states. - -Snapshot data is intentionally fluid. The same stay date may appear multiple times across different snapshot dates because the business position changes over time. - -```text -snap_pace_property -snap_pace_segment -snap_forecast_daily -``` - ---- - -## Reporting Views (`vw_`) - -Views provide BI-facing reporting layers designed for dashboards, semantic models, and downstream analytics. - -**Views typically:** - -- simplify joins, -- standardize KPI calculations, -- flatten reporting relationships, -- and expose reporting-friendly structures. - -```text -vw_pace_property -vw_actual_segment -vw_forecast_summary -``` - ---- - -## Dimension Tables (`dim_`) - -Dimension tables store descriptive reference entities shared throughout the warehouse. - -**These tables provide:** - -- standardized labels, -- controlled business definitions, -- dimensional hierarchies, -- and reporting relationships. - -```text -dim_property -dim_roomtype -dim_source -dim_segment -``` - ---- - -## Mapping Tables (`map_`) - -Mapping tables normalize source-system values into standardized warehouse structures. - -**Hospitality systems rarely agree on:** - -- segment names, -- source/channel naming, -- roomtype structures, -- or operational classifications. - -Mapping tables resolve those inconsistencies while preserving source traceability. - -```text -map_segment -map_roomtype -map_source -map_rate_code -``` - ---- - -## Lookup Tables (`lkp_`) - -Lookup tables define controlled value sets used throughout the warehouse. - -**These tables standardize:** - -- enums, -- workflow states, -- classifications, -- allowed values, -- and operational categories. - -```text -lkp_plan_type -lkp_event_category -lkp_forecast_status -``` - ---- - -## Fact Tables (`fact_`) - -Fact tables store measurable operational business activity. - -**These datasets represent:** - -- transactions, -- production metrics, -- financial activity, -- booking behavior, -- operational performance, -- and finalized reporting outcomes. - -```text -fact_actual_property -fact_booking -fact_finance_gl -fact_price_shop -``` - ---- - -## Prefix Standards in Practice - -Prefixes are intentionally concise and operationally descriptive. - -**The objective is immediate readability during:** - -- warehouse development, -- dashboard modeling, -- SQL analysis, -- automation workflows, -- and debugging. - -A well-structured warehouse should allow someone to infer what the table contains, how stable the data is, and how the dataset behaves operationally directly from the table name itself. - -**Because good warehouse architecture scales. Mystery tables do not.** \ No newline at end of file diff --git a/metrics-library/table_prefix_standards.mdx b/metrics-library/table_prefix_standards.mdx new file mode 100644 index 0000000..ab97e8b --- /dev/null +++ b/metrics-library/table_prefix_standards.mdx @@ -0,0 +1,255 @@ +--- +title: Table Prefix Standards +has_toc: false +nav_order: 2 +--- + +# Table Prefix Standards + +## Metrics Platform + +This document defines the standardized table prefixes used throughout the Metrics Platform warehouse architecture. + +**Prefixes are used to communicate:** + +* the purpose of a table, +* how the data behaves operationally, +* where the table exists in the pipeline, +* and how the dataset should be consumed downstream. + +The objective is immediate interpretability. + +A table name should communicate enough context that analysts, pipelines, dashboards, and automation workflows can infer how the dataset behaves without opening the schema first. + +**Because in a healthy warehouse, structure should explain intent.** + + + +# Prefix Philosophy + +The Metrics Platform separates datasets by operational responsibility rather than forcing every table into a generic reporting structure. + +**Different tables serve different purposes:** + +* some preserve raw operational behavior, +* some normalize source inconsistencies, +* some store finalized production metrics, +* and others exist purely for reporting convenience. + +Prefixes make those distinctions visible immediately. + + + +# Standard Prefix Framework + +| Prefix | Use For | Examples | +|---|---|---| +| `stg_` | Cleaned and typed staging tables before business modeling and semantic transformation| `stg_ideas_pace_property` | +| `snap_` | Snapshot-based datasets that change over time, including pace, OTB, forecast, and other fluid reporting states | `snap_pace_property` | +| `vw_` | BI-facing reporting views layered over fact and snapshot tables | `vw_pace_property` | +| `dim_` | Shared descriptive dimensions and standardized reference entities| `dim_property`, `dim_roomtype` | +| `map_` | Source-to-standard mapping relationships used for normalization | `map_segment`, `map_roomtype` | +| `lkp_` | Controlled lookup lists and allowed-value reference tables| `lkp_plan_type`, `lkp_event_category` | +| `fact_` | Finalized operational fact tables containing measurable business activity| `fact_actual_property`, `fact_booking`, `fact_finance_gl` | + + + +# Staging Tables (_stg) + +Staging tables normalize raw source data before semantic modeling occurs. + +**These tables typically handle:** + +* type casting, +* date normalization, +* field cleanup, +* source-system inconsistencies, +* and initial structural transformations. + +Staging tables preserve enough source-system behavior for traceability while making the data usable for warehouse processing. + +## Examples + +```text id="v88jwl" +stg_ideas_pace_property +stg_duetto_rms_segment +stg_costar_market_summary +``` + + + +# Snapshot Tables (snap_) + +Snapshot tables store reporting states captured at a specific point in time. + +**These datasets are commonly used for:** + +* pace reporting, +* OTB analysis, +* pickup tracking, +* forecast snapshots, +* and evolving operational states. + +Snapshot data is intentionally fluid.The same stay date may appear multiple times across different snapshot dates because the business position changes over time. + +## Examples + +```text id="36z33t" +snap_pace_property +snap_pace_segment +snap_forecast_daily +``` + + + +# Reporting Views (vw_) + +Views provide BI-facing reporting layers designed for dashboards, semantic models, and downstream analytics. + +**Views typically:** + +* simplify joins, +* standardize KPI calculations, +* flatten reporting relationships, +* and expose reporting-friendly structures. + +These layers improve reporting consistency while reducing duplicated dashboard logic. + +## Examples + +```text id="yzn5bw" +vw_pace_property +vw_actual_segment +vw_forecast_summary +``` + + + +# Dimension Tables (dim_) + +Dimension tables store descriptive reference entities shared throughout the warehouse. + +**These tables provide:** + +* standardized labels, +* controlled business definitions, +* dimensional hierarchies, +* and reporting relationships. + +Dimensions create consistency across operational systems that may structure the same concepts differently. + +## Examples + +```text id="lm6x1n" +dim_property +dim_roomtype +dim_source +dim_segment +``` + + + +# Mapping Tables (map_) + +Mapping tables normalize source-system values into standardized warehouse structures. + +**Hospitality systems rarely agree on:** + +* segment names, +* source/channel naming, +* roomtype structures, +* or operational classifications. + +Mapping tables resolve those inconsistencies while preserving source traceability. + +## Examples + +```text id="l1jwd7" +map_segment +map_roomtype +map_source +map_rate_code +``` + + + +# Lookup Tables (lkp_) + +Lookup tables define controlled value sets used throughout the warehouse. + +**These tables standardize:** + +* enums, +* workflow states, +* classifications, +* allowed values, +* and operational categories. + +**Lookup tables improve:** + +* validation consistency, +* reporting integrity, +* and semantic predictability. + +## Examples + +```text id="9okbvu" +lkp_plan_type +lkp_event_category +lkp_forecast_status +``` + + + +# Fact Tables (act_) + +Fact tables store measurable operational business activity. + +**These datasets represent:** + +* transactions, +* production metrics, +* financial activity, +* booking behavior, +* operational performance, +* and finalized reporting outcomes. + +**Fact tables typically contain:** + +* additive metrics, +* dimensional foreign keys, +* dates, +* and operational KPIs. + +## Examples + +```text id="r8w5lb" +fact_actual_property +fact_booking +fact_finance_gl +fact_price_shop +``` + + + +# Prefix Standards in Practice + +Prefixes are intentionally concise and operationally descriptive. + +**The objective is immediate readability during:** + +* warehouse development, +* dashboard modeling, +* SQL analysis, +* automation workflows, +* and debugging. + +**A well-structured warehouse should allow someone to infer:** + +* what the table contains, +* how stable the data is, +* and how the dataset behaves operationally + +directly from the table name itself. + +**Because good warehouse architecture scales. Mystery tables do not.**