diff --git a/functions/theme.py b/functions/theme.py
new file mode 100644
index 0000000..3e67298
--- /dev/null
+++ b/functions/theme.py
@@ -0,0 +1,204 @@
+from __future__ import annotations
+
+import plotly.graph_objects as go
+import streamlit as st
+
+# ── Color palette ─────────────────────────────────────────────────────────────
+WHITE = "#FFFFFF"
+INK_BLACK = "#00171F"
+DEEP_BLUE = "#003459"
+CERULEAN = "#007EA7"
+FRESH_SKY = "#00A8E8"
+
+# Extended palette — same cool aquatic vibe
+LIGHT_BLUE = "#E0F4FB" # very light sky — backgrounds, subtle fills
+MID_BLUE = "#005F8A" # between deep and cerulean
+TEAL = "#00B4A6" # warm teal accent
+SLATE = "#2C4A52" # muted dark — secondary text, borders
+STEEL = "#8DB7C7" # mid-tone for neutral chart fills
+ICE = "#D6EFF7" # lightest blue — card backgrounds
+
+# Semantic colors
+SUCCESS = "#00B894" # green — safe / positive
+WARNING = "#FDCB6E" # amber — swarmp / caution
+DANGER = "#D63031" # red — unsafe / critical
+
+# Background
+BG = WHITE
+
+# Color sequences for charts (ordered by visual weight)
+COLOR_SEQUENCE = [DEEP_BLUE, CERULEAN, FRESH_SKY, TEAL, MID_BLUE, STEEL]
+
+# Color scales
+COLOR_SCALE = [[0, ICE], [0.5, CERULEAN], [1, INK_BLACK]]
+COLOR_SCALE_WARM = [[0, LIGHT_BLUE], [0.5, FRESH_SKY], [1, DEEP_BLUE]]
+COLOR_SCALE_RISK = [[0, ICE], [0.5, WARNING], [1, DANGER]]
+
+# Facade status colors
+FACADE_COLORS = {
+ "SAFE": SUCCESS,
+ "SWARMP": WARNING,
+ "UNSAFE": DANGER,
+ "NO REPORT FILED": STEEL,
+ "UNKNOWN": STEEL,
+}
+
+# Analytics threshold
+UNSAFE_THRESHOLD = 0.30
+
+
+# ── CSS injection ─────────────────────────────────────────────────────────────
+def apply_css() -> None:
+ st.markdown(
+ f"""
+
+ """,
+ unsafe_allow_html=True,
+ )
+
+
+# ── Chart theme ───────────────────────────────────────────────────────────────
+def apply_chart_theme(fig) -> go.Figure:
+ fig.update_layout(
+ paper_bgcolor=WHITE,
+ plot_bgcolor=WHITE,
+ font_color=INK_BLACK,
+ font_family="Arial, sans-serif",
+ title_font_color=INK_BLACK,
+ title_font_size=15,
+ title_font_family="Arial, sans-serif",
+ legend_bgcolor=WHITE,
+ legend_bordercolor=LIGHT_BLUE,
+ legend_borderwidth=1,
+ margin={"r": 10, "t": 50, "l": 10, "b": 10},
+ )
+ fig.update_xaxes(
+ gridcolor=LIGHT_BLUE,
+ linecolor=ICE,
+ tickfont_color=SLATE,
+ title_font_color=SLATE,
+ )
+ fig.update_yaxes(
+ gridcolor=LIGHT_BLUE,
+ linecolor=ICE,
+ tickfont_color=SLATE,
+ title_font_color=SLATE,
+ )
+ return fig
+
+
+# ── Alert / info boxes ────────────────────────────────────────────────────────
+def warning_box(message: str) -> None:
+ st.markdown(
+ f"
"
+ f"⚠️ {message}
",
+ unsafe_allow_html=True,
+ )
+
+
+def caution_box(message: str) -> None:
+ st.markdown(
+ f""
+ f"⚠️ {message}
",
+ unsafe_allow_html=True,
+ )
+
+
+def info_box(message: str) -> None:
+ st.markdown(
+ f""
+ f"💡 {message}
",
+ unsafe_allow_html=True,
+ )
+
+
+def success_box(message: str) -> None:
+ st.markdown(
+ f""
+ f"✅ {message}
",
+ unsafe_allow_html=True,
+ )
+
+
+# ── Page header ───────────────────────────────────────────────────────────────
+def page_header(title: str, subtitle: str = "") -> None:
+ st.markdown(
+ f""
+ f"{title}
",
+ unsafe_allow_html=True,
+ )
+ if subtitle:
+ st.markdown(
+ f""
+ f"{subtitle}
",
+ unsafe_allow_html=True,
+ )
+
+
+# ── Section divider ───────────────────────────────────────────────────────────
+def section_header(title: str) -> None:
+ st.markdown(
+ f""
+ f"{title}
",
+ unsafe_allow_html=True,
+ )
diff --git a/pages/0_Proposal.py b/pages/0_Proposal.py
index 8cbc874..16172c6 100644
--- a/pages/0_Proposal.py
+++ b/pages/0_Proposal.py
@@ -1,96 +1,104 @@
import time
-from contextlib import contextmanager
import streamlit as st
-st.set_page_config(page_title="Proposal")
+from functions.theme import apply_css, page_header
+st.set_page_config(page_title="Proposal", layout="wide")
+apply_css()
-# add page load time
-@contextmanager
-def display_load_time():
- start_time = time.time()
+start_time = time.time()
- try:
- yield
- finally:
- elapsed = time.time() - start_time
- st.caption(f"Page loaded in {elapsed:.2f} seconds")
+page_header(
+ "Project Proposal",
+ "NYC Building Insights: Unraveling the Web of NYC Building Data",
+)
-
-with display_load_time():
- st.title("Project Proposal: NYC Building Insights: Unraveling the Web of NYC Building Data")
-
- # Original Proposal
- st.subheader("Project Overview")
-
- st.write("""
+# ── Project Overview ──────────────────────────────────────────────────────────
+st.markdown("### Project Overview")
+st.write("""
We are planning to explore several NYC open datasets to better understand the relationship
-between construction activity, housing conditions, and socioeconomic factors across New York City.
+between construction activity, housing conditions, and socioeconomic factors across
+New York City.
""")
- st.divider()
- # Main datasets
- with st.container(border=True):
- st.subheader("Main Datasets")
+st.divider()
- st.markdown("""
-**1. DOB-Now Job Permit**
-Records historical job permit in NYC such as work type (new building, demolition, etc.).
+# ── Main Datasets ─────────────────────────────────────────────────────────────
+with st.container(border=True):
+ st.markdown("### Main Datasets")
+ st.markdown("""
+**1. DOB NOW Build: Approved Permits**
+Records historical job permits in NYC such as work type (new building, demolition, etc.).
Provides insights about construction patterns across NYC.
-[Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-NOW-Build-Approved-Permits/rbx6-tga4/about_data)
+[View Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-NOW-Build-Approved-Permits/rbx6-tga4/about_data)
-**2. NYC Evictions**
+**2. DOB Permit Issuance**
+Records permits issued by the NYC Department of Buildings including job type,
+borough, and filing dates. Covers new buildings, alterations, and demolitions.
+[View Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-Permit-Issuance/ipu4-2q9a/about_data)
+
+**3. NYC Evictions**
Executed residential evictions across the five boroughs since 2017, sortable by borough,
building type, and date.
-[Dataset](https://data.cityofnewyork.us/City-Government/Evictions/6z8x-wfk4/about_data)
+[View Dataset](https://data.cityofnewyork.us/City-Government/Evictions/6z8x-wfk4/about_data)
-**3. ACS Census Income Data**
-Median household income aggregated at the county (borough) level from the U.S. Census Bureau's
-American Community Survey. Updated annually or every five years.
-""")
+**4. DOB Complaints Received**
+Records complaints submitted by tenants or members of the public. Includes complaint category,
+source, location, and response status.
+[View Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-Complaints-Received/eabe-havv/about_data)
- st.write("")
+**5. DOB NOW: Safety Facade Compliance Filings (FISP)**
+Records facade inspection filings for buildings taller than 6 stories, including
+filing status (Safe, SWARMP, Unsafe) and inspection cycle.
+[View Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-NOW-Safety-Facade-Compliance-Filings/xubg-57si/about_data)
- # Additional / Potential Datasets Box
- with st.container(border=True):
- st.subheader("Additional datasets being considered")
+**6. NYC Building Footprints**
+Contains building geometry, construction year, height, and borough for all
+buildings in New York City.
+[View Dataset](https://data.cityofnewyork.us/Housing-Development/Building-Footprints/nqwf-w8eh/about_data)
+""")
+
+st.write("")
- st.markdown("""
+# ── Additional Datasets ───────────────────────────────────────────────────────
+with st.container(border=True):
+ st.markdown("### Additional Datasets Considered")
+ st.markdown("""
**1. DOB Violations**
Records violations recorded by the DOB including violation type, severity, location, and status.
Reflects compliance issues related to building safety, zoning, and construction regulations.
-[Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-Violations/3h2n-5cm9/about_data)
+[View Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-Violations/3h2n-5cm9/about_data)
-**2. DOB Complaints Received**
-Records complaints submitted by tenants or members of the public. Includes complaint category,
-source, location, and response status.
-[Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-Complaints-Received/eabe-havv/about_data)
-
-**3. DOB Disciplinary Actions**
-Records disciplinary actions taken against professionals or entities (e.g., contractors, engineers)
+**2. DOB Disciplinary Actions**
+Records disciplinary actions taken against professionals or entities (e.g. contractors, engineers)
for violations or misconduct. Includes action types, outcomes, and associated cases.
-[Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-Disciplinary-Actions/ndq3-kuef/about_data)
+[View Dataset](https://data.cityofnewyork.us/Housing-Development/DOB-Disciplinary-Actions/ndq3-kuef/about_data)
+
+**3. ACS Census Income Data**
+Median household income aggregated at the county (borough) level from the U.S. Census Bureau's
+American Community Survey. Updated annually or every five years.
""")
- st.divider()
+st.divider()
- # Research Questions
- with st.container(border=True):
- st.subheader("Research Questions")
- st.write("""Following the feedback on our initial proposal, we are planning to combine
- different datasets to better understand relationships between construction activity,
- housing conditions, and eviction trends across New York City.
+# ── Research Questions ────────────────────────────────────────────────────────
+with st.container(border=True):
+ st.markdown("### Research Questions")
+ st.write("""
+Following the feedback on our initial proposal, we combined multiple datasets to better
+understand relationships between construction activity, housing conditions, and eviction
+trends across New York City.
""")
- st.markdown("""
+ st.markdown("""
**1. How does construction activity relate to eviction patterns across NYC boroughs?**
We are interested in exploring whether areas with higher levels of construction activity
(such as new building permits or major renovations) also experience higher eviction rates.
This may reveal patterns related to redevelopment or potential housing displacement.
-**2. Are building complaints and violations associated with eviction outcomes?**
-The dashboard will allow users to investigate whether buildings with more complaints or safety
-violations are also more likely to experience evictions, helping identify possible links
+**2. Are building complaints and facade conditions associated with eviction outcomes?**
+The dashboard allows users to investigate whether buildings with more complaints or unsafe
+facade filings are also more likely to experience evictions, helping identify possible links
between housing conditions and tenant displacement.
**3. How do socioeconomic conditions relate to housing enforcement and evictions?**
@@ -98,43 +106,63 @@ def display_load_time():
experience higher rates of complaints, violations, or evictions.
""")
- st.write("")
+st.write("")
- # Target Visualization
- with st.container(border=True):
- st.subheader("Target Visualizations")
+# ── Target Visualizations ─────────────────────────────────────────────────────
+with st.container(border=True):
+ st.markdown("### Target Visualizations")
+ st.markdown("""
+Our main visualization is an **interactive dashboard** displaying housing and building-related
+data by borough across New York City. Users can explore patterns in construction activity,
+evictions, complaints, and facade inspections geographically and over time.
- st.markdown("""
-Our main visualization will be an **interactive map of New York City** displaying housing
-and building-related data by borough or neighborhood. Users will be able to explore patterns
-in construction activity, evictions, and housing violations geographically.
+Additional visualizations include:
-Additional visualizations may include:
-
-- **Time-series charts** showing trends in construction filings and evictions over time.
-- **Bar charts** comparing boroughs across indicators such as complaints, violations,
- and income levels.
+- **Choropleth maps** showing eviction rates, unsafe facade counts, and permit activity by borough.
+- **Time-series charts** showing trends in construction filing, complaints, and evictions over time
+- **Heatmaps** comparing boroughs across indicators such as permit types and construction decades.
+- **Anomaly detection charts** flagging months with unusually high eviction activity.
+- **Gauge charts** showing facade safety rates (Safe, SWARMP, Unsafe).
""")
- st.write("")
+st.write("")
+
+# ── How the App Evolved ───────────────────────────────────────────────────────
+with st.container(border=True):
+ st.markdown("### How the Project Evolved")
+ st.markdown("""
+Our original proposal focused primarily on evictions and income data. Through the course of
+the project, we expanded the scope significantly to include:
+
+- **Building footprints** to analyze construction density by borough.
+- **DOB permit data** from two separate systems (DOB NOW and DOB Permit Issuance)
+ that required schema normalization and combined loading.
+- **Facade inspection data (FISP)** to track building safety trends across inspection cycles.
+- **DOB complaints data** with priority classification and response time analysis.
- # Known Unknowns + Challenges
- with st.container(border=True):
- st.subheader("Known Unknowns and Anticipated Challenges")
+We also built a full ETL pipeline with automated daily refresh via GitHub Actions,
+storing all data in Google BigQuery for fast dashboard queries.
+""")
+
+st.write("")
- st.markdown("""
+# ── Known Unknowns + Challenges ───────────────────────────────────────────────
+with st.container(border=True):
+ st.markdown("### Known Unknowns and Anticipated Challenges")
+ st.markdown("""
**Known Unknowns**
- Differences in geographic granularity across datasets (borough vs ZIP vs address level).
-- Difficulty joining datasets due to inconsistent formats or identifiers.
+- Difficulty joining datasets due to inconsistent column names and date formats across APIs.
- Uncertainty about how strong relationships between datasets will appear in the data.
-- Changes in API that affects functions and/or visualization pages.
+- Changes in NYC Open Data APIs that affect column availability or data freshness.
**Anticipated Challenges**
-- Cleaning and standardizing large NYC open datasets.
-- Handling large dataset sizes efficiently in the dashboard.
-- Designing visualizations that communicate complex relationships clearly without
- overwhelming users.
-- Understanding the difference of loading streamlit in local computer and web.
+- Cleaning and standardizing large NYC open datasets with inconsistent schemas.
+- Handling API limitations (no SELECT on geometry columns, null date fields, rate limits).
+- Designing visualizations that communicate complex relationships clearly.
+- Managing BigQuery free tier constraints (no DML, truncate-only refresh strategy).
""")
+
+st.caption(f"Page loaded in {time.time() - start_time:.2f} seconds")
diff --git a/pages/1_Building Overview.py b/pages/1_Building Overview.py
index fa7c143..89d0cd8 100644
--- a/pages/1_Building Overview.py
+++ b/pages/1_Building Overview.py
@@ -1,11 +1,11 @@
from __future__ import annotations
import time
-from datetime import datetime, timedelta
import pandas as pd
import pandas_gbq
import plotly.express as px
+import plotly.graph_objects as go
import requests
import streamlit as st
from google.oauth2 import service_account
@@ -17,9 +17,24 @@
first_column,
permit_timeseries_by_borough,
)
+from functions.theme import (
+ COLOR_SCALE,
+ COLOR_SCALE_RISK,
+ COLOR_SEQUENCE,
+ DANGER,
+ DEEP_BLUE,
+ FACADE_COLORS,
+ FRESH_SKY,
+ UNSAFE_THRESHOLD,
+ apply_chart_theme,
+ apply_css,
+ info_box,
+ page_header,
+ warning_box,
+)
st.set_page_config(page_title="NYC Buildings Overview", layout="wide")
-st.title("NYC Buildings Overview")
+apply_css()
start_time = time.time()
@@ -27,6 +42,16 @@
DATASET = "cosmic_spaghetti"
MIN_CONSTRUCTION_YEAR = 1900
MAX_CONSTRUCTION_YEAR = 2025
+UNSAFE_RATE_WARNING = 15
+AT_RISK_RATE_WARNING = 40
+
+BOROUGH_AREA_SQ_MI = {
+ "MANHATTAN": 22.8,
+ "BROOKLYN": 71.0,
+ "QUEENS": 109.0,
+ "BRONX": 42.0,
+ "STATEN ISLAND": 58.0,
+}
@st.cache_data(ttl=86400, show_spinner=False)
@@ -64,11 +89,13 @@ def load_buildings_summary() -> pd.DataFrame:
def load_new_buildings() -> pd.DataFrame:
query = f"""
SELECT borough, permit_date, permit_type, permit_type_desc,
- status, latitude, longitude
+ status, latitude, longitude
FROM `{PROJECT_ID}.{DATASET}.permits`
WHERE permit_type = 'NB'
AND borough IS NOT NULL
AND permit_date IS NOT NULL
+ AND permit_date >= '2008-01-01'
+ AND permit_date <= '2020-12-31'
"""
df = pandas_gbq.read_gbq(
query,
@@ -85,15 +112,12 @@ def load_new_buildings() -> pd.DataFrame:
@st.cache_data(ttl=3600, show_spinner=False)
def load_other_permits() -> pd.DataFrame:
- one_year_ago = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d")
query = f"""
SELECT borough, permit_date, permit_type, permit_type_desc,
status, latitude, longitude, source
FROM `{PROJECT_ID}.{DATASET}.permits`
WHERE permit_type != 'NB'
- AND permit_date >= '{one_year_ago}'
AND borough IS NOT NULL
- LIMIT 50000
"""
df = pandas_gbq.read_gbq(
query,
@@ -134,58 +158,97 @@ def load_facades() -> pd.DataFrame:
df_facades = load_facades()
nyc_geo = get_geojson()
-df_other_2025 = df_other[df_other["permit_date"] >= "2025-01-01"]
+df_other_2025 = df_other.copy() # all available permit data (Jan 2025+)
-# ── Top metrics ───────────────────────────────────────────────────────────────
-total_unsafe = (
- len(df_facades[df_facades["filing_status"].str.contains("UNSAFE", na=False)])
+# ── Pre-compute key stats ─────────────────────────────────────────────────────
+total_buildings = int(df_summary["total_buildings"].sum())
+total_new = len(df_new)
+total_other_2025 = len(df_other_2025)
+
+safe_count = (
+ len(df_facades[df_facades["filing_status"].str.contains("SAFE", na=False)])
if not df_facades.empty
else 0
)
-
-col1, col2, col3, col4 = st.columns(4)
-col1.metric(
- label="Total Buildings (end of 2025)",
- value=f"{int(df_summary['total_buildings'].sum()):,}",
- border=True,
-)
-col2.metric(
- label="New Building Jobs (2008–2020)",
- value=f"{len(df_new):,}",
- border=True,
+swarmp_count = (
+ len(df_facades[df_facades["filing_status"].str.contains("SWARMP", na=False)])
+ if not df_facades.empty
+ else 0
)
-col3.metric(
- label="Other Permits (Jan 2025+)",
- value=f"{len(df_other_2025):,}",
- border=True,
+unsafe_count = (
+ len(df_facades[df_facades["filing_status"].str.contains("UNSAFE", na=False)])
+ if not df_facades.empty
+ else 0
)
-col4.metric(
- label="Unsafe Facade Filings",
- value=f"{total_unsafe:,}",
- border=True,
+total_filed = safe_count + swarmp_count + unsafe_count
+unsafe_rate = unsafe_count / total_filed * 100 if total_filed > 0 else 0
+at_risk_rate = (unsafe_count + swarmp_count) / total_filed * 100 if total_filed > 0 else 0
+
+# ── Page header ───────────────────────────────────────────────────────────────
+page_header(
+ "🏙️ NYC Buildings Overview",
+ "Explore building activity across New York City's five boroughs — "
+ "total building stock, new construction, active permits, and facade safety inspections.",
)
+# ── Unsafe facade warning ─────────────────────────────────────────────────────
+if not df_facades.empty:
+ unsafe_boros = []
+ for boro in df_facades["borough"].unique():
+ b = df_facades[df_facades["borough"] == boro]
+ pct = (
+ len(b[b["filing_status"].str.contains("UNSAFE", na=False)]) / len(b)
+ if len(b) > 0
+ else 0
+ )
+ if pct > UNSAFE_THRESHOLD:
+ unsafe_boros.append(f"{boro.title()} ({pct:.0%})")
+ if unsafe_boros:
+ warning_box(f"High unsafe facade rate detected — {' · '.join(unsafe_boros)}")
+
+# ── KPI row ───────────────────────────────────────────────────────────────────
+c1, c2, c3, c4, c5 = st.columns(5)
+c1.metric("Total Buildings", f"{total_buildings:,}", border=True)
+c2.metric("New Building Jobs (2008–2020)", f"{total_new:,}", border=True)
+c3.metric("Active Construction Jobs (Jan 2025+)", f"{total_other_2025:,}", border=True)
+c4.metric("Unsafe Facade Filings", f"{unsafe_count:,}", border=True)
+c5.metric("Overall Unsafe Rate", f"{unsafe_rate:.1f}%", border=True)
+
st.divider()
+# ── Tabs ──────────────────────────────────────────────────────────────────────
tab1, tab2, tab3, tab4 = st.tabs(
[
- "🏙️ Total Buildings",
- "🏗️ New Building Jobs",
- "🔨 Other Building Jobs",
- "🔍 Facade Inspection (FISP)",
+ "🏙️ Total Buildings",
+ "🏗️ New Building Jobs",
+ "🔨 Construction & Renovation",
+ "🔍 Facade Inspection (FISP)",
]
)
# ══════════════════════════════════════════════════════════════════════════════
-# TAB 1 — TOTAL BUILDINGS IN NYC
+# TAB 1 — TOTAL BUILDINGS
# ══════════════════════════════════════════════════════════════════════════════
with tab1:
- st.subheader("Total Buildings in NYC (end of 2025)")
- st.caption("Source: NYC Building Footprints (5zhs-2jue) — cnstrct_yr ≤ 2025")
+ st.markdown("### Building Stock by Borough")
+ st.caption(
+ "Source: NYC Building Footprints (5zhs-2jue) — all buildings with construction year ≤ 2025"
+ )
- boro_summary = df_summary.groupby("borough")["total_buildings"].sum().reset_index()
+ boro_summary = (
+ df_summary.groupby("borough")
+ .agg(
+ total_buildings=("total_buildings", "sum"),
+ avg_height=("avg_height", "mean"),
+ )
+ .reset_index()
+ )
boro_summary["Borough"] = boro_summary["borough"].str.title()
+ boro_summary["Area (Square Mile)"] = boro_summary["borough"].str.upper().map(BOROUGH_AREA_SQ_MI)
+ boro_summary["Buildings per Square Mile"] = (
+ (boro_summary["total_buildings"] / boro_summary["Area (Square Mile)"]).round(0).astype(int)
+ )
col1, col2 = st.columns(2)
with col1:
@@ -195,59 +258,168 @@ def load_facades() -> pd.DataFrame:
locations="Borough",
featureidkey="properties.BoroName",
color="total_buildings",
- color_continuous_scale="Blues",
+ color_continuous_scale=COLOR_SCALE,
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 40.7128, "lon": -74.0060},
title="Total Buildings by Borough",
hover_name="Borough",
- labels={"total_buildings": "Buildings"},
+ hover_data={
+ "total_buildings": True,
+ "Buildings per Square Mile": True,
+ "avg_height": ":.0f",
+ },
+ labels={"total_buildings": "Buildings", "avg_height": "Avg Height (ft)"},
)
- fig_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_map, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_map), use_container_width=True)
+
with col2:
- fig_bar = px.bar(
- boro_summary.sort_values("total_buildings", ascending=False),
- x="Borough",
- y="total_buildings",
- title="Total Buildings by Borough",
- color="Borough",
- color_discrete_sequence=px.colors.qualitative.Dark24_r,
- labels={"total_buildings": "Buildings"},
+ fig_tree = px.treemap(
+ boro_summary,
+ path=["Borough"],
+ values="total_buildings",
+ color="Buildings per Square Mile",
+ color_continuous_scale=COLOR_SCALE,
+ title="Building Stock — Size = Count · Color = Density per Square Mile",
+ hover_data={"Buildings per Square Mile": True, "Area (Square Mile)": True},
+ )
+ fig_tree.update_traces(
+ texttemplate="%{label}
%{value:,} buildings
%{percentRoot:.1%} of NYC",
+ textfont_size=13,
)
- st.plotly_chart(fig_bar, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_tree), use_container_width=True)
+
+ # density bar
+ st.markdown("### Building Density — Buildings per Square Mile")
+ fig_density = px.bar(
+ boro_summary.sort_values("Buildings per Square Mile", ascending=False),
+ x="Borough",
+ y="Buildings per Square Mile",
+ color="Buildings per Square Mile",
+ color_continuous_scale=COLOR_SCALE,
+ text="Buildings per Square Mile",
+ title="How Densely Built Is Each Borough?",
+ )
+ fig_density.update_traces(texttemplate="%{text:,}", textposition="outside")
+ fig_density.update_layout(showlegend=False)
+ st.plotly_chart(apply_chart_theme(fig_density), use_container_width=True)
+
+ top_dense = boro_summary.sort_values("Buildings per Square Mile", ascending=False).iloc[0]
+ info_box(
+ f"{top_dense['Borough']} is the most densely built borough — "
+ f"{top_dense['Buildings per Square Mile']:,} buildings per square mile "
+ f"across just {top_dense['Area (Square Mile)']} Square Mile."
+ )
+
+ st.divider()
+
+ # construction trend
+ st.markdown("### Construction History")
+
+ # ── Year range slider ─────────────────────────────────────────────────────
+ year_range = st.slider(
+ "Filter by construction year range",
+ min_value=MIN_CONSTRUCTION_YEAR,
+ max_value=MAX_CONSTRUCTION_YEAR,
+ value=(1940, MAX_CONSTRUCTION_YEAR),
+ step=5,
+ key="yr_slider",
+ )
- st.subheader("Buildings Constructed Per Year")
df_yr = (
df_summary[
- (df_summary["cnstrct_yr"] >= MIN_CONSTRUCTION_YEAR)
- & (df_summary["cnstrct_yr"] <= MAX_CONSTRUCTION_YEAR)
+ (df_summary["cnstrct_yr"] >= year_range[0])
+ & (df_summary["cnstrct_yr"] <= year_range[1])
]
.groupby(["cnstrct_yr", "borough"])["total_buildings"]
.sum()
.reset_index()
.rename(columns={"cnstrct_yr": "Year", "total_buildings": "Buildings"})
)
- fig_yr = px.line(
- df_yr,
- x="Year",
- y="Buildings",
- color="borough",
- title="Buildings Constructed Per Year by Borough",
- labels={"borough": "Borough"},
+
+ yr_total = int(df_yr["Buildings"].sum())
+ info_box(
+ f"Showing buildings constructed between {year_range[0]} and "
+ f"{year_range[1]} — {yr_total:,} buildings total"
+ )
+
+ col1, col2 = st.columns(2)
+ with col1:
+ fig_area = px.area(
+ df_yr,
+ x="Year",
+ y="Buildings",
+ color="borough",
+ title=f"Buildings Constructed Per Year ({year_range[0]}–{year_range[1]})",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ labels={"borough": "Borough"},
+ )
+ fig_area.update_traces(line_width=1)
+ st.plotly_chart(apply_chart_theme(fig_area), use_container_width=True)
+
+ with col2:
+ # heatmap: borough × decade
+ df_decade = df_yr.copy()
+ df_decade["Decade"] = (df_decade["Year"] // 10 * 10).astype(str) + "s"
+ pivot = (
+ df_decade.groupby(["borough", "Decade"])["Buildings"]
+ .sum()
+ .reset_index()
+ .pivot_table(index="borough", columns="Decade", values="Buildings", aggfunc="sum")
+ .fillna(0)
+ )
+ fig_heat = go.Figure(
+ go.Heatmap(
+ z=pivot.to_numpy(),
+ x=pivot.columns.tolist(),
+ y=[b.title() for b in pivot.index.tolist()],
+ colorscale=COLOR_SCALE,
+ text=pivot.to_numpy().astype(int),
+ texttemplate="%{text:,}",
+ hoverongaps=False,
+ )
+ )
+ fig_heat.update_layout(
+ title="Construction Intensity by Borough and Decade",
+ xaxis_title="Decade",
+ yaxis_title="",
+ )
+ st.plotly_chart(apply_chart_theme(fig_heat), use_container_width=True)
+
+ peak_decade = df_decade.groupby("Decade")["Buildings"].sum().idxmax()
+ peak_count = df_decade.groupby("Decade")["Buildings"].sum().max()
+ info_box(
+ f"Peak construction decade: {peak_decade} — "
+ f"{int(peak_count):,} buildings added to NYC"
)
- st.plotly_chart(fig_yr, use_container_width=True)
# ══════════════════════════════════════════════════════════════════════════════
-# TAB 2 — NEW BUILDING JOBS (2008–2020)
+# TAB 2 — NEW BUILDING JOBS
# ══════════════════════════════════════════════════════════════════════════════
with tab2:
- st.subheader("New Building Jobs (2008–2020)")
- st.caption(
- "Source: DOB Permit Issuance (ipu4-2q9a) — job_type = NB. "
- "Data available 2008–2020. New buildings after 2020 are not yet in any public dataset."
- )
+ st.markdown("### New Building Jobs (2008–2020)")
+ st.caption("Source: DOB Permit Issuance (ipu4-2q9a) — job_type = NB · 2008–2020 only")
+
+ with st.expander("📋 What is a New Building Job?", expanded=False):
+ st.markdown("""
+ A **New Building (NB)** job is filed with the NYC Department of Buildings (DOB)
+ when a property owner or developer intends to construct a brand-new building on a lot.
+
+ | Job Type | Description |
+ |---|---|
+ | **NB** | New Building — construct a new structure from scratch |
+ | **A1** | Major Alteration — change in use, egress, or occupancy |
+ | **A2** | Minor Alteration — no change in use or occupancy |
+ | **A3** | Minor Work — no plans required |
+ | **DM** | Demolition — full or partial removal of an existing building |
+
+ 🔗 Learn more at the
+ [NYC Department of Buildings](https://www.nyc.gov/site/buildings/index.page)
+ or browse job filings at
+ [NYC Open Data — DOB Permit Issuance](https://data.cityofnewyork.us/Housing-Development/DOB-Permit-Issuance/ipu4-2q9a/about_data).
+ """)
+ st.markdown("")
if df_new.empty:
st.info("No new building permits found.")
@@ -259,6 +431,29 @@ def load_facades() -> pd.DataFrame:
.rename(columns={"borough": "Borough", "count": "New Buildings"})
)
new_by_boro["Borough"] = new_by_boro["Borough"].str.title()
+ new_by_boro["Area (Square Mile)"] = (
+ new_by_boro["Borough"].str.upper().map(BOROUGH_AREA_SQ_MI)
+ )
+ new_by_boro["New Bldgs per Square Mile"] = (
+ new_by_boro["New Buildings"] / new_by_boro["Area (Square Mile)"]
+ ).round(1)
+
+ col1, col2, col3 = st.columns(3)
+ col1.metric("New Building Jobs (2008–2020)", f"{total_new:,}", border=True)
+ col2.metric(
+ "Most Active Borough",
+ new_by_boro.sort_values("New Buildings", ascending=False).iloc[0]["Borough"],
+ border=True,
+ )
+ col3.metric(
+ "Peak Year",
+ str(int(df_new.dropna(subset=["permit_date"])["permit_date"].dt.year.mode()[0]))
+ if not df_new.dropna(subset=["permit_date"]).empty
+ else "N/A",
+ border=True,
+ )
+
+ st.divider()
col1, col2 = st.columns(2)
with col1:
@@ -268,29 +463,63 @@ def load_facades() -> pd.DataFrame:
locations="Borough",
featureidkey="properties.BoroName",
color="New Buildings",
- color_continuous_scale="Greens",
+ color_continuous_scale=COLOR_SCALE,
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 40.7128, "lon": -74.0060},
title="New Building Permits by Borough",
hover_name="Borough",
+ hover_data={"New Buildings": True, "New Bldgs per Square Mile": True},
)
- fig_new_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_new_map, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_new_map), use_container_width=True)
+
with col2:
- fig_new_bar = px.bar(
- new_by_boro.sort_values("New Buildings", ascending=False),
- x="Borough",
- y="New Buildings",
- title="New Building Permits by Borough",
- color="Borough",
- color_discrete_sequence=px.colors.qualitative.Dark24_r,
+ fig_donut = go.Figure(
+ data=[
+ go.Pie(
+ labels=new_by_boro["Borough"],
+ values=new_by_boro["New Buildings"],
+ hole=0.6,
+ marker_colors=COLOR_SEQUENCE[: len(new_by_boro)],
+ textinfo="label+percent",
+ textfont_size=13,
+ )
+ ]
+ )
+ fig_donut.update_layout(
+ title="New Building Jobs by Borough (2008–2020)",
+ # and annotation:
+ annotations=[
+ {
+ "text": f"{total_new:,}
jobs",
+ "x": 0.5,
+ "y": 0.5,
+ "font_size": 18,
+ "showarrow": False,
+ "font_color": DEEP_BLUE,
+ }
+ ],
)
- st.plotly_chart(fig_new_bar, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_donut), use_container_width=True)
+
+ # density comparison
+ st.markdown("### New Building Density by Borough")
+ fig_nb_density = px.bar(
+ new_by_boro.sort_values("New Bldgs per Square Mile", ascending=False),
+ x="Borough",
+ y="New Bldgs per Square Mile",
+ color="New Bldgs per Square Mile",
+ color_continuous_scale=COLOR_SCALE,
+ text="New Bldgs per Square Mile",
+ title="New Building Jobs per Square Mile (2008–2020)",
+ )
+ fig_nb_density.update_traces(texttemplate="%{text:.1f}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_nb_density), use_container_width=True)
+ # scatter map
df_new_coords = df_new.dropna(subset=["latitude", "longitude"])
if not df_new_coords.empty:
- st.subheader("Where Are New Buildings Being Built?")
+ st.markdown("### Where Are New Buildings Being Built?")
fig_scatter = px.scatter_mapbox(
df_new_coords,
lat="latitude",
@@ -300,109 +529,191 @@ def load_facades() -> pd.DataFrame:
zoom=10,
center={"lat": 40.7128, "lon": -74.0060},
title="New Building Permit Locations",
- opacity=0.6,
+ opacity=0.65,
+ color_discrete_sequence=COLOR_SEQUENCE,
hover_data={"borough": True, "status": True, "permit_date": True},
)
- fig_scatter.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_scatter, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_scatter), use_container_width=True)
+ # yearly trend
df_new_yr = df_new.dropna(subset=["permit_date"]).copy()
if not df_new_yr.empty:
df_new_yr["Year"] = df_new_yr["permit_date"].dt.year
- yearly = df_new_yr.groupby("Year").size().reset_index(name="Count")
+ yearly = df_new_yr.groupby(["Year", "borough"]).size().reset_index(name="Count")
fig_trend = px.bar(
yearly,
x="Year",
y="Count",
- title="New Building Permits Per Year (2008–2020)",
- color_discrete_sequence=["#1D9E75"],
+ color="borough",
+ title="New Building Permits Per Year by Borough",
+ barmode="stack",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ labels={"borough": "Borough"},
+ )
+ st.plotly_chart(apply_chart_theme(fig_trend), use_container_width=True)
+
+ peak_yr = df_new_yr.groupby("Year").size().idxmax()
+ peak_yr_count = df_new_yr.groupby("Year").size().max()
+ info_box(
+ f"Peak new building year: {peak_yr} — "
+ f"{peak_yr_count:,} new building permits filed"
)
- st.plotly_chart(fig_trend, use_container_width=True)
# ══════════════════════════════════════════════════════════════════════════════
# TAB 3 — OTHER BUILDING JOBS
# ══════════════════════════════════════════════════════════════════════════════
with tab3:
- st.subheader("Other Building Jobs")
+ st.markdown("### Construction & Renovation Jobs (January 2025 onwards)")
st.caption(
- "Source: DOB NOW (rbx6-tga4) — all work permit types except New Building. "
- "Filtered to January 2025 onwards."
+ "Source: DOB NOW Build — Approved Permits (rbx6-tga4) — " # noqa: E501
+ "all construction job types except New Building"
)
+ with st.expander("📋 What are these construction job types?", expanded=False):
+ st.markdown("""
+ These are **work permits** issued by the NYC Department of Buildings for active
+ construction or renovation work on existing buildings.
+
+ | Job Type | Description |
+ |---|---|
+ | **General Construction** | Structural, facade, or major building work |
+ | **Plumbing** | Water supply, drainage, gas piping |
+ | **Mechanical Systems** | HVAC, ventilation, fire suppression |
+ | **Structural** | Beams, columns, foundations |
+ | **Full Demolition** | Complete removal of an existing building |
+ | **Foundation** | Footings, piles, or underpinning |
+ | **Sidewalk Shed** | Temporary protective structure over sidewalk |
+ | **Solar** | Solar panel installation |
+ | **Sign** | Signage installation or alteration |
+
+ 🔗 Learn more at
+ [NYC DOB NOW Build](https://www.nyc.gov/site/buildings/industry/dob-now-build.page)
+ or browse job filings at
+ [NYC Open Data — DOB NOW Approved Permits](https://data.cityofnewyork.us/Housing-Development/DOB-NOW-Build-Approved-Permits/rbx6-tga4/about_data).
+ """)
+ st.markdown("")
+
if df_other.empty:
st.info("No other permits found.")
else:
- st.markdown("#### Overview (January 2025 onwards)")
-
- other_by_boro_2025 = (
+ other_by_boro = (
df_other_2025["borough"]
.value_counts()
.reset_index()
.rename(columns={"borough": "Borough", "count": "Permits"})
)
- other_by_boro_2025["Borough"] = other_by_boro_2025["Borough"].str.title()
+ other_by_boro["Borough"] = other_by_boro["Borough"].str.title()
- by_type_2025 = (
+ top_type = (
df_other_2025["permit_type_desc"]
.fillna(df_other_2025["permit_type"])
.value_counts()
- .head(10)
- .reset_index()
- .rename(columns={"permit_type_desc": "Permit Type", "count": "Count"})
+ .idxmax()
+ )
+
+ col1, col2, col3 = st.columns(3)
+ col1.metric("Total Construction Jobs (Jan 2025+)", f"{total_other_2025:,}", border=True)
+ col2.metric("Most Active Borough", other_by_boro.iloc[0]["Borough"], border=True)
+ col3.metric("Top Job Type", top_type, border=True)
+
+ st.divider()
+
+ # sunburst + map
+ sunburst_data = (
+ df_other_2025.assign(
+ borough=df_other_2025["borough"].str.title(),
+ permit_type_desc=df_other_2025["permit_type_desc"].fillna(
+ df_other_2025["permit_type"]
+ ),
+ )
+ .groupby(["borough", "permit_type_desc"])
+ .size()
+ .reset_index(name="Count")
)
col1, col2 = st.columns(2)
with col1:
- fig_other_map = px.choropleth_mapbox(
- other_by_boro_2025,
+ fig_map3 = px.choropleth_mapbox(
+ other_by_boro,
geojson=nyc_geo,
locations="Borough",
featureidkey="properties.BoroName",
color="Permits",
- color_continuous_scale="Oranges",
+ color_continuous_scale=COLOR_SCALE,
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 40.7128, "lon": -74.0060},
- title="Other Permits by Borough (Jan 2025+)",
+ title="Active Construction Jobs by Borough (Jan 2025+)",
hover_name="Borough",
)
- fig_other_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_other_map, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_map3), use_container_width=True)
+
with col2:
- fig_type = px.bar(
- by_type_2025,
- x="Count",
- y="Permit Type",
- orientation="h",
- title="Top 10 Permit Types (Jan 2025+)",
- color="Count",
- color_continuous_scale="Oranges",
+ fig_sun = px.sunburst(
+ sunburst_data,
+ path=["borough", "permit_type_desc"],
+ values="Count",
+ title="Permit Breakdown by Borough and Type",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ )
+ fig_sun.update_traces(
+ textinfo="label+percent parent",
+ insidetextfont_size=12,
)
- fig_type.update_layout(yaxis={"categoryorder": "total ascending"})
- st.plotly_chart(fig_type, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_sun), use_container_width=True)
- df_other_coords = df_other_2025.dropna(subset=["latitude", "longitude"])
- if not df_other_coords.empty:
- st.subheader("Where Are Permits Being Filed? (Jan 2025+)")
- fig_other_scatter = px.scatter_mapbox(
- df_other_coords,
+ # heatmap
+ st.markdown("### Permit Type Heatmap by Borough")
+ top_types = (
+ sunburst_data.groupby("permit_type_desc")["Count"].sum().nlargest(10).index.tolist()
+ )
+ pivot = (
+ sunburst_data[sunburst_data["permit_type_desc"].isin(top_types)]
+ .pivot_table(index="borough", columns="permit_type_desc", values="Count", aggfunc="sum")
+ .fillna(0)
+ )
+ fig_pheat = go.Figure(
+ go.Heatmap(
+ z=pivot.to_numpy(),
+ x=pivot.columns.tolist(),
+ y=pivot.index.tolist(),
+ colorscale=COLOR_SCALE,
+ text=pivot.to_numpy().astype(int),
+ texttemplate="%{text:,}",
+ hoverongaps=False,
+ )
+ )
+ fig_pheat.update_layout(
+ title="Top 10 Permit Types by Borough (Jan 2025+)",
+ xaxis_title="Permit Type",
+ yaxis_title="",
+ xaxis_tickangle=-30,
+ height=350,
+ )
+ st.plotly_chart(apply_chart_theme(fig_pheat), use_container_width=True)
+
+ # scatter map
+ df_coords = df_other_2025.dropna(subset=["latitude", "longitude"])
+ if not df_coords.empty:
+ st.markdown("### Permit Locations (Jan 2025+)")
+ fig_scat = px.scatter_mapbox(
+ df_coords,
lat="latitude",
lon="longitude",
color="permit_type_desc",
mapbox_style="carto-positron",
zoom=10,
center={"lat": 40.7128, "lon": -74.0060},
- title="Permit Locations",
- opacity=0.5,
+ title="Where Are Permits Being Filed?",
+ opacity=0.55,
+ color_discrete_sequence=COLOR_SEQUENCE,
hover_data={"borough": True, "permit_type_desc": True, "status": True},
)
- fig_other_scatter.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_other_scatter, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_scat), use_container_width=True)
st.divider()
- st.markdown("#### Detailed View (Last 12 Months)")
- st.success(f"Loaded {len(df_other):,} rows (last 12 months)")
+ st.markdown("### Detailed View — Construction & Renovation Jobs (Last 12 Months)")
date_col = first_column(df_other, ["permit_date"])
borough_col = "borough" if "borough" in df_other.columns else None
@@ -412,40 +723,26 @@ def load_facades() -> pd.DataFrame:
status_col = first_column(df_other, ["status", "permit_status"])
df_detail = filter_last_12_months(df_other, date_col=date_col)
- with st.expander("Set Filters", expanded=False):
+ with st.expander("Filters", expanded=False):
+ cols = st.columns(3)
if borough_col:
- borough_options = sorted(
- df_detail[borough_col].dropna().astype(str).unique().tolist()
- )
- selected_borough = st.multiselect(
- "Borough",
- borough_options,
- default=borough_options,
- key="t3_boro",
+ boro_opts = sorted(df_detail[borough_col].dropna().astype(str).unique())
+ selected_borough = cols[0].multiselect(
+ "Borough", boro_opts, default=boro_opts, key="t3b"
)
else:
selected_borough = None
-
if type_col:
- type_options = sorted(df_detail[type_col].dropna().astype(str).unique().tolist())
- selected_types = st.multiselect(
- type_col.replace("_", " ").title(),
- type_options,
- default=type_options,
- key="t3_type",
+ type_opts = sorted(df_detail[type_col].dropna().astype(str).unique())
+ selected_types = cols[1].multiselect(
+ "Permit Type", type_opts, default=type_opts, key="t3t"
)
else:
selected_types = None
-
if status_col:
- status_options = sorted(
- df_detail[status_col].dropna().astype(str).unique().tolist()
- )
- selected_status = st.multiselect(
- "Permit Status",
- status_options,
- default=status_options,
- key="t3_status",
+ stat_opts = sorted(df_detail[status_col].dropna().astype(str).unique())
+ selected_status = cols[2].multiselect(
+ "Status", stat_opts, default=stat_opts, key="t3s"
)
else:
selected_status = None
@@ -459,113 +756,65 @@ def load_facades() -> pd.DataFrame:
status_col=status_col,
selected_status=selected_status,
)
- st.caption(f"Filtered rows: {len(df_filtered):,}")
+ st.caption(f"{len(df_filtered):,} rows after filtering")
- bucket = st.selectbox(
- "Time bucket",
- ["Monthly", "Weekly", "Daily"],
- index=0,
- key="bucket_permits",
- )
+ bucket = st.selectbox("Time bucket", ["Monthly", "Weekly", "Daily"], key="t3bucket")
freq = {"Monthly": "MS", "Weekly": "W-MON", "Daily": "D"}[bucket]
if not df_filtered.empty and date_col in df_filtered.columns:
df_filtered[date_col] = pd.to_datetime(df_filtered[date_col], errors="coerce")
- end_period = df_filtered[date_col].max()
- offsets_map = {
+ end_p = df_filtered[date_col].max()
+ off = {
"Monthly": pd.DateOffset(months=1),
"Weekly": pd.DateOffset(weeks=1),
"Daily": pd.DateOffset(days=1),
- }
- offsets = offsets_map[bucket]
- period_label = bucket.lower().rstrip("ly")
- start_period = end_period - offsets
- start_prev = start_period - offsets
-
- current = df_filtered[df_filtered[date_col] > start_period]
- previous = df_filtered[
- (df_filtered[date_col] > start_prev) & (df_filtered[date_col] <= start_period)
+ }[bucket]
+ pl = bucket.lower().rstrip("ly")
+ cur = df_filtered[df_filtered[date_col] > end_p - off]
+ prev = df_filtered[
+ (df_filtered[date_col] > end_p - 2 * off) & (df_filtered[date_col] <= end_p - off)
]
- current_total = len(current)
- previous_total = len(previous)
- total_delta = (
- f"{((current_total - previous_total) / previous_total * 100):+.1f}%"
- if previous_total > 0
- else None
- )
-
- if borough_col and borough_col in df_filtered.columns:
- current_boro = current[borough_col].value_counts()
- previous_boro = previous[borough_col].value_counts()
- top_boro = current_boro.idxmax() if not current_boro.empty else "N/A"
- top_boro_count = int(current_boro.max()) if not current_boro.empty else 0
- prev_boro_count = int(previous_boro.get(top_boro, 0))
- boro_delta = f"{top_boro_count - prev_boro_count:+,}"
- else:
- top_boro, boro_delta = "N/A", None
-
- if type_col and type_col in df_filtered.columns:
- current_type = current[type_col].value_counts()
- previous_type = previous[type_col].value_counts()
- top_type = current_type.idxmax() if not current_type.empty else "N/A"
- top_type_count = int(current_type.max()) if not current_type.empty else 0
- prev_type_count = int(previous_type.get(top_type, 0))
- type_delta = (
- f"{((top_type_count - prev_type_count) / prev_type_count * 100):+.1f}%"
- if prev_type_count > 0
- else None
- )
- else:
- top_type, type_delta = "N/A", None
+ cur_n, prev_n = len(cur), len(prev)
+ delta = f"{((cur_n - prev_n) / prev_n * 100):+.1f}%" if prev_n > 0 else None
- st.subheader(f"Summary — Current {bucket} Period")
- col1, col2, col3 = st.columns(3)
- col1.metric(
- label=f"Total Permits (vs previous {period_label})",
- value=f"{current_total:,}",
- delta=total_delta,
- border=True,
- )
- col2.metric(
- label=f"Borough with Highest Permits (vs previous {period_label})",
- value=top_boro,
- delta=boro_delta,
- border=True,
+ top_b = (
+ cur[borough_col].value_counts().idxmax() if borough_col and not cur.empty else "N/A"
)
- col3.metric(
- label=f"Most Common Work Type (vs previous {period_label})",
- value=top_type,
- delta=type_delta,
- border=True,
+ top_b_prev = prev[borough_col].value_counts().get(top_b, 0) if borough_col else 0
+ top_b_delta = (
+ f"{len(cur[cur[borough_col] == top_b]) - top_b_prev:+,}" if borough_col else None
)
+ top_t = cur[type_col].value_counts().idxmax() if type_col and not cur.empty else "N/A"
+
+ c1, c2, c3 = st.columns(3)
+ c1.metric(f"Construction Jobs (vs prev {pl})", f"{cur_n:,}", delta=delta, border=True)
+ c2.metric(f"Top Borough (vs prev {pl})", top_b, delta=top_b_delta, border=True)
+ c3.metric("Top Job Type", top_t, border=True)
+
if not df_filtered.empty and borough_col:
- boro_counts = (
+ bc = (
df_filtered[borough_col]
.value_counts()
.reset_index()
.rename(columns={borough_col: "Borough", "count": "Count"})
)
- boro_counts["Borough"] = boro_counts["Borough"].str.strip().str.title()
- fig_detail_map = px.choropleth_mapbox(
- boro_counts,
+ bc["Borough"] = bc["Borough"].str.strip().str.title()
+ fig_dm = px.choropleth_mapbox(
+ bc,
geojson=nyc_geo,
locations="Borough",
featureidkey="properties.BoroName",
color="Count",
- color_continuous_scale="Reds",
+ color_continuous_scale=COLOR_SCALE,
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 40.7128, "lon": -74.0060},
- title="Number of Permits by Borough",
+ title="Construction Jobs by Borough (Filtered)",
hover_name="Borough",
- hover_data={"Count": True},
)
- fig_detail_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_detail_map, use_container_width=True)
- else:
- st.info("No data to display")
+ st.plotly_chart(apply_chart_theme(fig_dm), use_container_width=True)
ts = permit_timeseries_by_borough(
df_filtered,
@@ -574,25 +823,24 @@ def load_facades() -> pd.DataFrame:
status_col=None,
freq=freq,
)
- if ts.empty:
- st.info("No rows to visualize")
- else:
+ if not ts.empty:
fig_ts = px.line(
ts,
x="Period",
y="Count",
color="Borough",
markers=True,
- title="Building Job Permits by Borough Over Time",
+ title="Permit Activity by Borough Over Time",
+ color_discrete_sequence=COLOR_SEQUENCE,
)
- st.plotly_chart(fig_ts, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_ts), use_container_width=True)
# ══════════════════════════════════════════════════════════════════════════════
-# TAB 4 — FACADE INSPECTION (FISP)
+# TAB 4 — FACADE INSPECTION
# ══════════════════════════════════════════════════════════════════════════════
with tab4:
- st.subheader("Facade Inspection Safety Program (FISP)")
+ st.markdown("### Facade Inspection Safety Program (FISP)")
st.caption(
"Buildings taller than 6 stories must be inspected every 5 years. "
"Current cycle: Cycle 10 (2025–2030). Data available: 2001–present. "
@@ -602,40 +850,135 @@ def load_facades() -> pd.DataFrame:
if df_facades.empty:
st.info("No facade inspection data found.")
else:
- unsafe_count = len(df_facades[df_facades["filing_status"].str.contains("UNSAFE", na=False)])
- swarmp_count = len(df_facades[df_facades["filing_status"].str.contains("SWARMP", na=False)])
- safe_count = len(df_facades[df_facades["filing_status"].str.contains("SAFE", na=False)])
+ with st.expander("📋 What is FISP and why does it matter?", expanded=False):
+ st.markdown("""
+ The **Facade Inspection Safety Program (FISP)**, also known as **Local Law 11**,
+ requires all buildings taller than 6 stories in NYC to have their facades
+ inspected by a Qualified Exterior Wall Inspector (QEWI) every **5 years**.
+
+ **Classifications:**
+
+ | Status | Meaning | Action Required |
+ |---|---|---|
+ | ✅ **SAFE** | No problems found | None — next inspection in 5 years |
+ | ⚠️ **SWARMP** | Safe now but needs repair | Repairs required before next cycle |
+ | 🚨 **UNSAFE** | Immediate danger to public | Emergency repairs required; violations |
+ | 📋 **No Report Filed** | Owner has not submitted | Subject to $1,000/month penalty |
+
+ **Current Cycle:** Cycle 10 (2025–2030)
+
+ 🔗 Learn more:
+ - [NYC DOB — Facade Inspection Safety Program](https://www.nyc.gov/site/buildings/property/facades.page)
+ - [Local Law 11 Overview](https://www.nyc.gov/site/buildings/property/facades.page)
+ - [NYC Open Data — FISP Filings](https://data.cityofnewyork.us/Housing-Development/DOB-NOW-Safety-Facade-Compliance-Filings/xubg-57si/about_data)
+ """)
+ st.markdown("")
+ if unsafe_rate > UNSAFE_RATE_WARNING:
+ warning_box(
+ f"{unsafe_rate:.1f}% of all facade filings are UNSAFE — " # noqa: E501
+ "immediate attention required."
+ )
+ elif at_risk_rate > AT_RISK_RATE_WARNING:
+ warning_box(
+ f"{at_risk_rate:.1f}% of all facade filings are at risk (UNSAFE or SWARMP)."
+ )
- col1, col2, col3 = st.columns(3)
- col1.metric(label="Safe Filings", value=f"{safe_count:,}", border=True)
- col2.metric(label="SWARMP Filings", value=f"{swarmp_count:,}", border=True)
- col3.metric(label="Unsafe Filings", value=f"{unsafe_count:,}", border=True)
-
- df_cycle9 = df_facades[df_facades["cycle"] == "9"]
- status_counts = (
- df_cycle9["filing_status"]
- .fillna("UNKNOWN")
- .value_counts()
- .reset_index()
- .rename(columns={"filing_status": "Status", "count": "Count"})
+ # gauge row
+ def make_gauge(value, title, color, suffix="%", max_val=100):
+ fig = go.Figure(
+ go.Indicator(
+ mode="gauge+number",
+ value=round(value, 1),
+ title={"text": title, "font": {"color": DEEP_BLUE, "size": 13}},
+ number={
+ "suffix": suffix,
+ "font": {"color": DEEP_BLUE, "size": 22},
+ "valueformat": ".1f",
+ },
+ gauge={
+ "axis": {
+ "range": [0, max_val],
+ "tickcolor": DEEP_BLUE,
+ "tickfont": {"size": 10},
+ },
+ "bar": {"color": color, "thickness": 0.7},
+ "bordercolor": DEEP_BLUE,
+ "borderwidth": 1,
+ "steps": [{"range": [0, max_val * 0.5], "color": "rgba(0,0,0,0.03)"}],
+ },
+ )
+ )
+ fig.update_layout(
+ height=180,
+ margin={"t": 50, "b": 10, "l": 20, "r": 20},
+ paper_bgcolor="rgba(0,0,0,0)",
+ )
+ return fig
+
+ safe_rate = safe_count / total_filed * 100 if total_filed > 0 else 0
+ swarmp_rate = swarmp_count / total_filed * 100 if total_filed > 0 else 0
+
+ g1, g2, g3, g4 = st.columns(4)
+ with g1:
+ st.plotly_chart(make_gauge(safe_rate, "Safe Rate", "#1D9E75"), use_container_width=True)
+ with g2:
+ st.plotly_chart(
+ make_gauge(swarmp_rate, "SWARMP Rate", FRESH_SKY), use_container_width=True
+ )
+ with g3:
+ st.plotly_chart(
+ make_gauge(unsafe_rate, "Unsafe Rate", DANGER), use_container_width=True
+ )
+ with g4:
+ st.plotly_chart(
+ make_gauge(at_risk_rate, "At Risk Rate", DEEP_BLUE), use_container_width=True
+ )
+
+ st.divider()
+
+ available_cycles = sorted(
+ [int(c) for c in df_facades["cycle"].dropna().unique() if str(c).isdigit()]
+ )
+ min_c, max_c = available_cycles[0], available_cycles[-1]
+ selected_cycles = st.slider(
+ "Filter by inspection cycle",
+ min_value=min_c,
+ max_value=max_c,
+ value=(min_c, max_c),
+ step=1,
+ key="cycle_slider",
+ help="Cycle 9 = 2020–2024 · Cycle 10 = 2025–2030",
+ )
+ df_cycle9 = df_facades[
+ df_facades["cycle"].dropna().astype(int).between(selected_cycles[0], selected_cycles[1])
+ ]
+ st.caption(
+ f"Showing cycles {selected_cycles[0]}–{selected_cycles[1]} · {len(df_cycle9):,} filings"
)
col1, col2 = st.columns(2)
with col1:
+ sc9 = (
+ df_cycle9["filing_status"]
+ .fillna("UNKNOWN")
+ .value_counts()
+ .reset_index()
+ .rename(columns={"filing_status": "Status", "count": "Count"})
+ )
fig_pie = px.pie(
- status_counts,
+ sc9,
names="Status",
values="Count",
- title="Facade Filing Status (Cycle 9, Previous Cycle)",
+ title=f"Filing Status — Cycles {selected_cycles[0]}–{selected_cycles[1]}",
color="Status",
- color_discrete_map={
- "SAFE": "#639922",
- "SWARMP": "#BA7517",
- "UNSAFE": "#E24B4A",
- "NO REPORT FILED": "#888780",
- },
+ color_discrete_map=FACADE_COLORS,
+ hole=0.5,
)
- st.plotly_chart(fig_pie, use_container_width=True)
+ fig_pie.update_traces(
+ textinfo="label+percent",
+ textfont_size=12,
+ )
+ st.plotly_chart(apply_chart_theme(fig_pie), use_container_width=True)
with col2:
unsafe_by_boro = (
@@ -649,56 +992,106 @@ def load_facades() -> pd.DataFrame:
unsafe_by_boro.sort_values("Unsafe Filings", ascending=False),
x="Borough",
y="Unsafe Filings",
- title="Unsafe Facade Filings by Borough",
- color="Borough",
- color_discrete_sequence=px.colors.qualitative.Dark24_r,
+ title=f"Unsafe Filings by Borough, Cycle {selected_cycles[0]}–{selected_cycles[1]}",
+ color="Unsafe Filings",
+ color_continuous_scale=COLOR_SCALE_RISK,
+ text="Unsafe Filings",
)
- st.plotly_chart(fig_unsafe, use_container_width=True)
+ fig_unsafe.update_traces(textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_unsafe), use_container_width=True)
- fisp_by_boro = (
- df_facades[df_facades["filing_status"].str.contains("UNSAFE|SWARMP", na=False)]
+ # at-risk map — connected to slider
+ fisp_risk = (
+ df_cycle9[df_cycle9["filing_status"].str.contains("UNSAFE|SWARMP", na=False)]
.groupby("borough")["filing_status"]
.count()
.reset_index()
.rename(columns={"borough": "Borough", "filing_status": "At Risk Filings"})
)
- fisp_by_boro["Borough"] = fisp_by_boro["Borough"].str.title()
-
- fig_fisp_map = px.choropleth_mapbox(
- fisp_by_boro,
+ fisp_risk["Borough"] = fisp_risk["Borough"].str.title()
+ fig_risk_map = px.choropleth_mapbox(
+ fisp_risk,
geojson=nyc_geo,
locations="Borough",
featureidkey="properties.BoroName",
color="At Risk Filings",
- color_continuous_scale="Reds",
+ color_continuous_scale=COLOR_SCALE_RISK,
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 40.7128, "lon": -74.0060},
- title="At Risk Facades (SWARMP + Unsafe) by Borough",
+ title=f"At Risk Facade(SWARMP+Unsafe):Cycles {selected_cycles[0]}–{selected_cycles[1]}",
hover_name="Borough",
- hover_data={"At Risk Filings": True},
)
- fig_fisp_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_fisp_map, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_risk_map), use_container_width=True)
+
+ # unsafe rate per borough — connected to slider
+ st.markdown("### Unsafe Rate by Borough")
+ rates = []
+ for boro in df_cycle9["borough"].unique():
+ b = df_cycle9[df_cycle9["borough"] == boro]
+ u = len(b[b["filing_status"].str.contains("UNSAFE", na=False)])
+ t = len(b)
+ rates.append(
+ {
+ "Borough": boro.title(),
+ "Unsafe Rate (%)": round(u / t * 100, 1) if t > 0 else 0,
+ "Total Filings": t,
+ }
+ )
+ df_rates = pd.DataFrame(rates).sort_values("Unsafe Rate (%)", ascending=False)
+ fig_rate = px.bar(
+ df_rates,
+ x="Borough",
+ y="Unsafe Rate (%)",
+ title=f"Unsafe Facade Rate by Borough: Cycle {selected_cycles[0]}–{selected_cycles[1]}",
+ color="Unsafe Rate (%)",
+ color_continuous_scale=COLOR_SCALE_RISK,
+ text="Unsafe Rate (%)",
+ )
+ fig_rate.update_traces(texttemplate="%{text:.1f}%", textposition="outside")
+ fig_rate.add_hline(
+ y=UNSAFE_THRESHOLD * 100,
+ line_dash="dash",
+ line_color=DANGER,
+ annotation_text=f"Warning threshold ({UNSAFE_THRESHOLD:.0%})",
+ annotation_font_color=DANGER,
+ )
+ st.plotly_chart(apply_chart_theme(fig_rate), use_container_width=True)
+ # cycle trend — stacked
if "cycle" in df_facades.columns:
- cycle_counts = (
+ st.markdown("### Filing Trends Across Inspection Cycles")
+ cycle_data = (
df_facades.groupby(["cycle", "filing_status"]).size().reset_index(name="Count")
)
fig_cycle = px.bar(
- cycle_counts,
+ cycle_data,
x="cycle",
y="Count",
color="filing_status",
- title="Facade Filing Status by Cycle",
- barmode="group",
- color_discrete_map={
- "SAFE": "#639922",
- "SWARMP": "#BA7517",
- "UNSAFE": "#E24B4A",
- },
+ title="Facade Filing Status by Inspection Cycle",
+ barmode="stack",
+ color_discrete_map=FACADE_COLORS,
labels={"filing_status": "Status", "cycle": "Cycle"},
)
- st.plotly_chart(fig_cycle, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_cycle), use_container_width=True)
+
+ # trend line: unsafe count over cycles
+ unsafe_trend = (
+ df_facades[df_facades["filing_status"].str.contains("UNSAFE", na=False)]
+ .groupby("cycle")
+ .size()
+ .reset_index(name="Unsafe Count")
+ )
+ fig_ut = px.line(
+ unsafe_trend,
+ x="cycle",
+ y="Unsafe Count",
+ title="Unsafe Filing Count Over Inspection Cycles",
+ markers=True,
+ color_discrete_sequence=[DANGER],
+ )
+ fig_ut.update_traces(line_width=2.5, marker_size=8)
+ st.plotly_chart(apply_chart_theme(fig_ut), use_container_width=True)
-st.caption(f"Page loaded in {time.time() - start_time:.2f} seconds")
+st.caption(f"⏱ Page loaded in {time.time() - start_time:.2f} seconds")
diff --git a/pages/2_Building_Eviction.py b/pages/2_Building_Eviction.py
index ec5e930..17f71ee 100644
--- a/pages/2_Building_Eviction.py
+++ b/pages/2_Building_Eviction.py
@@ -1,253 +1,643 @@
+from __future__ import annotations
+
import time
-from contextlib import contextmanager
import pandas as pd
import pandas_gbq
import plotly.express as px
+import plotly.graph_objects as go
import requests
import streamlit as st
from google.oauth2 import service_account
-st.set_page_config(page_title="NYC Evictions", layout="wide")
-st.title("NYC Evictions Dashboard")
-
-date_col = "executed_date"
-borough_col = "borough"
-building_col = "residential_commercial_ind"
-
-
-# add page load time
-@contextmanager
-def display_load_time():
- start_time = time.time()
-
- try:
- yield
- finally:
- elapsed = time.time() - start_time
- st.caption(f"Page loaded in {elapsed:.2f} seconds")
-
-
-# --- Cache GeoJSON ---
-@st.cache_data(ttl=86400)
+from functions.theme import (
+ COLOR_SCALE,
+ COLOR_SCALE_RISK,
+ COLOR_SEQUENCE,
+ DANGER,
+ DEEP_BLUE,
+ FRESH_SKY,
+ INK_BLACK,
+ LIGHT_BLUE,
+ SLATE,
+ apply_chart_theme,
+ apply_css,
+ info_box,
+ page_header,
+ warning_box,
+)
+
+st.set_page_config(page_title="NYC Building Evictions", layout="wide")
+apply_css()
+
+start_time = time.time()
+
+PROJECT_ID = "sipa-adv-c-cosmic-spaghetti"
+DATASET = "cosmic_spaghetti"
+DATE_COL = "executed_date"
+BOROUGH_COL = "borough"
+TYPE_COL = "residential_commercial_ind"
+
+ANOMALY_STD = 2.0 # flag months > 2 std dev above mean
+
+
+@st.cache_data(ttl=86400, show_spinner=False)
def get_geojson():
- geojson_url = "https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson"
- response = requests.get(geojson_url)
+ response = requests.get(
+ "https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson"
+ )
return response.json()
-@st.cache_data(ttl=3600)
-def load_data_from_bq():
- credentials = service_account.Credentials.from_service_account_info(
+def get_credentials():
+ return service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"],
scopes=["https://www.googleapis.com/auth/bigquery"],
)
- query = """
- SELECT
- executed_date,
- borough,
- residential_commercial_ind
- FROM `sipa-adv-c-cosmic-spaghetti.cosmic_spaghetti.evictions`
- WHERE executed_date >= '2025-01-01'
- AND borough IS NOT NULL
+
+@st.cache_data(ttl=3600, show_spinner=False)
+def load_evictions() -> pd.DataFrame:
+ query = f"""
+ SELECT executed_date, borough, residential_commercial_ind
+ FROM `{PROJECT_ID}.{DATASET}.evictions`
+ WHERE borough IS NOT NULL
AND executed_date IS NOT NULL
- LIMIT 10000
"""
df = pandas_gbq.read_gbq(
query,
- project_id=credentials.project_id,
- credentials=credentials,
- dtypes={
- "borough": "str",
- "residential_commercial_ind": "str",
- },
+ project_id=PROJECT_ID,
+ credentials=get_credentials(),
+ progress_bar_type=None,
+ dtypes={"borough": "str", "residential_commercial_ind": "str"},
)
- # df = client.query(query).to_dataframe()
-
df["executed_date"] = pd.to_datetime(df["executed_date"], errors="coerce")
df = df.dropna(subset=["executed_date"])
df["year"] = df["executed_date"].dt.year
+ df["month"] = df["executed_date"].dt.to_period("M").dt.to_timestamp()
+
+ # normalize borough names — county names → borough names
+ borough_name_map = {
+ "Kings": "Brooklyn",
+ "Richmond": "Staten Island",
+ "New York": "Manhattan",
+ "Bronx": "Bronx",
+ "Queens": "Queens",
+ "Brooklyn": "Brooklyn",
+ "Manhattan": "Manhattan",
+ "Staten Island": "Staten Island",
+ }
+ df["borough"] = (
+ df["borough"]
+ .str.strip()
+ .str.title()
+ .map(borough_name_map)
+ .fillna(df["borough"].str.strip().str.title()) # noqa: E501
+ )
+
+ # normalize type — combine C/Commercial and R/Residential
+ type_map = {
+ "C": "Commercial",
+ "R": "Residential",
+ "Commercial": "Commercial",
+ "Residential": "Residential",
+ }
+ df["type"] = df[TYPE_COL].str.strip().str.title().map(type_map).fillna("Unknown")
return df
-# --- Load data ---
-with display_load_time():
- with st.spinner("Loading eviction data from BigQuery..."):
- df_evic = load_data_from_bq()
+# ── Load data ─────────────────────────────────────────────────────────────────
+with st.spinner("Loading eviction data..."):
+ df = load_evictions()
+ nyc_geo = get_geojson()
+
+if df.empty:
+ st.error("No data returned from BigQuery.")
+ st.stop()
+
+# ── Page header ───────────────────────────────────────────────────────────────
+page_header(
+ "🏠 NYC Building Evictions",
+ "Explore eviction trends across New York City's five boroughs : "
+ "residential vs commercial, seasonal patterns, and anomaly detection.",
+)
+
+# ── Pre-compute stats ─────────────────────────────────────────────────────────
+total_evictions = len(df)
+top_boro = df[BOROUGH_COL].value_counts().idxmax()
+residential = len(df[df["type"].str.contains("Residential", na=False)])
+commercial = len(df[df["type"].str.contains("Commercial", na=False)])
+res_pct = residential / total_evictions * 100 if total_evictions > 0 else 0
+
+# anomaly detection — monthly counts
+monthly_total = df.groupby("month").size().reset_index(name="Count")
+mean_m = monthly_total["Count"].mean()
+std_m = monthly_total["Count"].std()
+anomaly_months = monthly_total[monthly_total["Count"] > mean_m + ANOMALY_STD * std_m]
+
+if not anomaly_months.empty:
+ anomaly_labels = ", ".join(anomaly_months["month"].dt.strftime("%b %Y").tolist())
+ warning_box(
+ f"Unusually high eviction activity detected in: {anomaly_labels} "
+ f"— more than {ANOMALY_STD:.0f} standard deviations above the monthly average."
+ )
- st.success(f"Loaded {len(df_evic):,} rows (2025 → today)")
- st.dataframe(df_evic.head(5), use_container_width=True)
+# ── KPI row ───────────────────────────────────────────────────────────────────
+c1, c2, c3, c4, c5 = st.columns(5)
+c1.metric("Total Evictions", f"{total_evictions:,}", border=True)
+c2.metric("Most Affected Borough", top_boro, border=True)
+c3.metric("Residential Evictions", f"{residential:,}", border=True)
+c4.metric("Commercial Evictions", f"{commercial:,}", border=True)
+c5.metric("Residential Share", f"{res_pct:.1f}%", border=True)
+
+st.divider()
+
+# ── Filters ───────────────────────────────────────────────────────────────────
+with st.expander("🔍 Filters", expanded=False):
+ fcols = st.columns(3)
+ boro_opts = sorted(df[BOROUGH_COL].dropna().unique())
+ selected_borough = fcols[0].multiselect("Borough", boro_opts, default=boro_opts)
+
+ type_opts = sorted(df["type"].dropna().unique())
+ selected_type = fcols[1].multiselect("Building Type", type_opts, default=type_opts)
+
+ year_opts = sorted(df["year"].dropna().unique())
+ selected_years = fcols[2].multiselect("Year", year_opts, default=year_opts)
+
+df_f = df.copy()
+if selected_borough:
+ df_f = df_f[df_f[BOROUGH_COL].isin(selected_borough)]
+if selected_type:
+ df_f = df_f[df_f["type"].isin(selected_type)]
+if selected_years:
+ df_f = df_f[df_f["year"].isin(selected_years)]
+
+st.caption(f"{len(df_f):,} evictions after filtering")
+st.divider()
+
+# ── Tabs ──────────────────────────────────────────────────────────────────────
+tab1, tab2, tab3, tab4 = st.tabs(
+ [
+ "📊 Overview",
+ "🗺️ Borough Analysis",
+ "🏠 Residential vs Commercial",
+ "🔍 Anomaly Detection",
+ ]
+)
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 1 — OVERVIEW
+# ══════════════════════════════════════════════════════════════════════════════
+with tab1:
+ st.markdown("### Eviction Trends Over Time")
+
+ # year range slider
+ year_min = int(df_f["year"].min())
+ year_max = int(df_f["year"].max())
+ if year_min < year_max:
+ selected_year_range = st.slider(
+ "Filter by year range",
+ min_value=year_min,
+ max_value=year_max,
+ value=(year_min, year_max),
+ step=1,
+ key="yr_range_slider",
+ )
+ df_tab1 = df_f[df_f["year"].between(selected_year_range[0], selected_year_range[1])]
+ else:
+ df_tab1 = df_f.copy()
- # --- Filter options ---
- borough_options = sorted(df_evic[borough_col].dropna().astype(str).unique().tolist())
- building_options = sorted(df_evic[building_col].dropna().astype(str).unique().tolist())
+ bucket = st.selectbox("Time bucket", ["Monthly", "Weekly", "Daily"], key="t1_bucket")
+ freq_map = {"Monthly": "M", "Weekly": "W", "Daily": "D"}
+ df_tab1["Period"] = df_tab1[DATE_COL].dt.to_period(freq_map[bucket]).dt.to_timestamp()
- selected_borough = st.multiselect("Select Borough(s)", borough_options, default=borough_options)
- selected_building = st.multiselect(
- "Select Building Type(s)", building_options, default=building_options
+ ts = df_tab1.groupby(["Period", BOROUGH_COL]).size().reset_index(name="Evictions")
+ fig_ts = px.line(
+ ts,
+ x="Period",
+ y="Evictions",
+ color=BOROUGH_COL,
+ markers=True,
+ title=f"Evic by Boro: {bucket} Trend ({selected_year_range[0]}–{selected_year_range[1]})",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ labels={BOROUGH_COL: "Borough"},
)
+ st.plotly_chart(apply_chart_theme(fig_ts), use_container_width=True)
+
+ # year over year
+ st.markdown("### Year-over-Year Comparison")
+ yearly = df_tab1.groupby("year").size().reset_index(name="Evictions")
+ fig_yoy = px.bar(
+ yearly,
+ x="year",
+ y="Evictions",
+ title="Total Evictions by Year",
+ color="Evictions",
+ color_continuous_scale=COLOR_SCALE,
+ text="Evictions",
+ )
+ fig_yoy.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_yoy), use_container_width=True)
- df_filtered = df_evic.copy()
- if selected_borough:
- df_filtered = df_filtered[df_filtered[borough_col].astype(str).isin(selected_borough)]
- if selected_building:
- df_filtered = df_filtered[df_filtered[building_col].astype(str).isin(selected_building)]
+ peak_yr = yearly.sort_values("Evictions", ascending=False).iloc[0]
+ info_box(
+ f"Peak eviction year: {int(peak_yr['year'])} — "
+ f"{int(peak_yr['Evictions']):,} evictions executed"
+ )
- st.caption(f"Filtered rows: {len(df_filtered):,}")
- st.dataframe(df_filtered.head(20), use_container_width=True)
+ # seasonal pattern — avg by month of year
+ st.markdown("### Seasonal Pattern")
+ df_tab1["month_name"] = df_tab1[DATE_COL].dt.strftime("%b")
+ df_tab1["month_num"] = df_tab1[DATE_COL].dt.month
+ seasonal = (
+ df_tab1.groupby(["month_num", "month_name"])
+ .size()
+ .reset_index(name="Avg Evictions")
+ .sort_values("month_num")
+ )
+ fig_seasonal = px.bar(
+ seasonal,
+ x="month_name",
+ y="Avg Evictions",
+ title="Eviction Volume by Month (All Years)",
+ color="Avg Evictions",
+ color_continuous_scale=COLOR_SCALE,
+ text="Avg Evictions",
+ )
+ fig_seasonal.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_seasonal), use_container_width=True)
+
+ peak_month = seasonal.sort_values("Avg Evictions", ascending=False).iloc[0]
+ info_box(
+ f"Historically highest eviction month: "
+ f"{peak_month['month_name']} — "
+ f"{int(peak_month['Avg Evictions']):,} evictions on record"
+ )
- # --- Summary Metrics ---
- bucket = st.selectbox(
- "Time bucket", ["Monthly", "Weekly", "Daily"], index=0, key="metrics_bucket"
- ) # noqa: E501
- st.subheader(f"Summary of Evictions — Current {bucket}")
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 2 — BOROUGH ANALYSIS
+# ══════════════════════════════════════════════════════════════════════════════
+with tab2:
+ st.markdown("### Evictions by Borough")
- if not df_filtered.empty and date_col in df_filtered.columns:
- df_metrics = df_filtered.copy()
- df_metrics[date_col] = pd.to_datetime(df_metrics[date_col], errors="coerce")
+ boro_counts = (
+ df_f[BOROUGH_COL]
+ .value_counts()
+ .reset_index()
+ .rename(columns={BOROUGH_COL: "Borough", "count": "Evictions"})
+ )
- end_period = df_metrics[date_col].max()
+ col1, col2 = st.columns(2)
+ with col1:
+ fig_map = px.choropleth_mapbox(
+ boro_counts,
+ geojson=nyc_geo,
+ locations="Borough",
+ featureidkey="properties.BoroName",
+ color="Evictions",
+ color_continuous_scale=COLOR_SCALE,
+ mapbox_style="carto-positron",
+ zoom=9.5,
+ center={"lat": 40.7128, "lon": -74.0060},
+ title="Total Evictions by Borough",
+ hover_name="Borough",
+ )
+ st.plotly_chart(apply_chart_theme(fig_map), use_container_width=True)
+
+ with col2:
+ fig_bbar = px.bar(
+ boro_counts.sort_values("Evictions", ascending=True),
+ x="Evictions",
+ y="Borough",
+ orientation="h",
+ title="Evictions by Borough",
+ color="Evictions",
+ color_continuous_scale=COLOR_SCALE,
+ text="Evictions",
+ )
+ fig_bbar.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_bbar), use_container_width=True)
+
+ # borough × year heatmap
+ st.markdown("### Eviction Heatmap : Borough by Year")
+ heat = df_f.groupby([BOROUGH_COL, "year"]).size().reset_index(name="Count")
+ heat_pivot = heat.pivot_table(
+ index=BOROUGH_COL, columns="year", values="Count", aggfunc="sum"
+ ).fillna(0)
+
+ fig_heat = go.Figure(
+ go.Heatmap(
+ z=heat_pivot.to_numpy(),
+ x=[str(c) for c in heat_pivot.columns.tolist()],
+ y=heat_pivot.index.tolist(),
+ colorscale=[[0, LIGHT_BLUE], [0.5, FRESH_SKY], [1, DEEP_BLUE]],
+ text=heat_pivot.to_numpy().astype(int),
+ texttemplate="%{text:,}",
+ hoverongaps=False,
+ )
+ )
+ fig_heat.update_layout(
+ title="Evictions per Borough per Year",
+ xaxis_title="Year",
+ yaxis_title="",
+ )
+ st.plotly_chart(apply_chart_theme(fig_heat), use_container_width=True)
+
+ # borough time series
+ st.markdown("### Borough Trends Over Time")
+ bucket2 = st.selectbox("Time bucket", ["Monthly", "Weekly", "Daily"], key="t2_bucket")
+ freq2 = {"Monthly": "M", "Weekly": "W", "Daily": "D"}[bucket2]
+ df_f["Period2"] = df_f[DATE_COL].dt.to_period(freq2).dt.to_timestamp()
+ ts2 = df_f.groupby(["Period2", BOROUGH_COL]).size().reset_index(name="Evictions")
+ fig_ts2 = px.area(
+ ts2,
+ x="Period2",
+ y="Evictions",
+ color=BOROUGH_COL,
+ title=f"Eviction Trends by Borough ({bucket2})",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ labels={BOROUGH_COL: "Borough", "Period2": "Period"},
+ )
+ fig_ts2.update_traces(line_width=1)
+ st.plotly_chart(apply_chart_theme(fig_ts2), use_container_width=True)
- offsets_map = {
- "Monthly": pd.DateOffset(months=1),
- "Weekly": pd.DateOffset(weeks=1),
- "Daily": pd.DateOffset(days=1),
- }
- offsets = offsets_map[bucket]
- period_label = bucket.lower().rstrip("ly")
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 3 — RESIDENTIAL VS COMMERCIAL
+# ══════════════════════════════════════════════════════════════════════════════
+with tab3:
+ st.markdown("### Residential vs Commercial Evictions")
- start_period = end_period - offsets
- start_prev = start_period - offsets
+ type_counts = (
+ df_f["type"]
+ .value_counts()
+ .reset_index()
+ .rename(columns={"type": "Type", "count": "Evictions"})
+ )
- current = df_metrics[df_metrics[date_col] > start_period]
- previous = df_metrics[
- (df_metrics[date_col] > start_prev) & (df_metrics[date_col] <= start_period)
- ]
+ col1, col2 = st.columns(2)
+ with col1:
+ fig_type_pie = go.Figure(
+ data=[
+ go.Pie(
+ labels=type_counts["Type"],
+ values=type_counts["Evictions"],
+ hole=0.55,
+ marker_colors=[DEEP_BLUE, FRESH_SKY, SLATE],
+ textinfo="label+percent",
+ textfont_size=13,
+ )
+ ]
+ )
+ fig_type_pie.update_layout(
+ title="Eviction Type Breakdown",
+ annotations=[
+ {
+ "text": f"{len(df_f):,}
total",
+ "x": 0.5,
+ "y": 0.5,
+ "font_size": 16,
+ "showarrow": False,
+ "font_color": INK_BLACK,
+ }
+ ],
+ )
+ st.plotly_chart(apply_chart_theme(fig_type_pie), use_container_width=True)
+
+ with col2:
+ # type by borough stacked bar
+ type_boro = df_f.groupby([BOROUGH_COL, "type"]).size().reset_index(name="Evictions")
+ fig_type_boro = px.bar(
+ type_boro,
+ x=BOROUGH_COL,
+ y="Evictions",
+ color="type",
+ title="Eviction Type by Borough",
+ barmode="stack",
+ color_discrete_sequence=[DEEP_BLUE, FRESH_SKY, SLATE],
+ labels={"type": "Type"},
+ )
+ st.plotly_chart(apply_chart_theme(fig_type_boro), use_container_width=True)
+
+ # residential vs commercial over time
+ st.markdown("### Type Trends Over Time")
+ bucket3 = st.selectbox("Time bucket", ["Monthly", "Weekly", "Daily"], key="t3_bucket")
+ freq3 = {"Monthly": "M", "Weekly": "W", "Daily": "D"}[bucket3]
+ df_f["Period3"] = df_f[DATE_COL].dt.to_period(freq3).dt.to_timestamp()
+ ts3 = df_f.groupby(["Period3", "type"]).size().reset_index(name="Evictions")
+ fig_ts3 = px.line(
+ ts3,
+ x="Period3",
+ y="Evictions",
+ color="type",
+ markers=True,
+ title=f"Residential vs Commercial Evictions Over Time ({bucket3})",
+ color_discrete_sequence=[DEEP_BLUE, FRESH_SKY, SLATE],
+ labels={"type": "Type", "Period3": "Period"},
+ )
+ st.plotly_chart(apply_chart_theme(fig_ts3), use_container_width=True)
+
+ # residential share over time
+ st.markdown("### Residential Share Over Time")
+ res_share = df_f.groupby(["Period3", "type"]).size().reset_index(name="Count")
+ res_total = res_share.groupby("Period3")["Count"].transform("sum")
+ res_share["Share (%)"] = (res_share["Count"] / res_total * 100).round(1)
+ res_only = res_share[res_share["type"].str.contains("Residential", na=False)]
+ fig_share = px.line(
+ res_only,
+ x="Period3",
+ y="Share (%)",
+ title="Residential Eviction Share Over Time (%)",
+ markers=True,
+ color_discrete_sequence=[DEEP_BLUE],
+ labels={"Period3": "Period"},
+ )
+ fig_share.add_hline(
+ y=50,
+ line_dash="dash",
+ line_color=SLATE,
+ annotation_text="50% threshold",
+ annotation_font_color=SLATE,
+ )
+ st.plotly_chart(apply_chart_theme(fig_share), use_container_width=True)
- # --- 1. Total evictions ---
- current_total = len(current)
- previous_total = len(previous)
+ res_avg = res_only["Share (%)"].mean()
+ info_box(f"On average, {res_avg:.1f}% of all evictions are residential")
- if previous_total > 0:
- total_pct = ((current_total - previous_total) / previous_total) * 100
- total_delta = f"{total_pct:+.1f}%"
- else:
- total_delta = None
- # --- 2. Borough with highest evictions ---
- current_boro = current[borough_col].value_counts()
- previous_boro = previous[borough_col].value_counts()
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 4 — ANOMALY DETECTION
+# ══════════════════════════════════════════════════════════════════════════════
+with tab4:
+ st.markdown("### Anomaly Detection — Unusual Eviction Months")
- top_boro = current_boro.idxmax() if not current_boro.empty else "N/A"
- top_boro_count = int(current_boro.max()) if not current_boro.empty else 0
- prev_boro_count = int(previous_boro.get(top_boro, 0))
- boro_delta = f"{top_boro_count - prev_boro_count:+,}"
+ with st.expander("📋 How is anomaly detection calculated?", expanded=False):
+ st.markdown(f"""
+ We calculate the **mean** and **standard deviation** of monthly eviction counts
+ across all available data. Any month where evictions exceed
+ **mean + {ANOMALY_STD:.0f} × standard deviation** is flagged as an anomaly.
- # --- 3. Building type with highest evictions ---
- current_build = current[building_col].value_counts()
- previous_build = previous[building_col].value_counts()
+ This method helps identify months with unusually high eviction activity
+ that may warrant policy attention or further investigation.
- top_build = current_build.idxmax() if not current_build.empty else "N/A"
- top_build_count = int(current_build.max()) if not current_build.empty else 0
- prev_build_count = int(previous_build.get(top_build, 0))
+ > ⚠️ Note: Anomalies may reflect real spikes in evictions, or data reporting
+ > artifacts (e.g. backlogs being processed in a single month).
+ """)
- if prev_build_count > 0:
- build_pct = ((top_build_count - prev_build_count) / prev_build_count) * 100
- build_delta = f"{build_pct:+.1f}%"
- else:
- build_delta = None
+ monthly = df_f.groupby("month").size().reset_index(name="Count")
+ mean_val = monthly["Count"].mean()
+ std_val = monthly["Count"].std()
+ threshold = mean_val + ANOMALY_STD * std_val
- # --- Display metrics ---
- col1, col2, col3 = st.columns(3)
+ monthly["Anomaly"] = monthly["Count"] > threshold
+ monthly["Color"] = monthly["Anomaly"].map({True: DANGER, False: DEEP_BLUE})
- col1.metric(
- label=f"Total Evictions (vs previous {period_label})",
- value=f"{current_total:,}",
- delta=total_delta,
- border=True,
- )
+ # main anomaly chart
+ fig_anomaly = go.Figure()
- col2.metric(
- label="Borough with Highest Evictions",
- value=top_boro,
- delta=boro_delta,
- border=True,
- )
+ # normal bars
+ normal = monthly[~monthly["Anomaly"]]
+ anomaly = monthly[monthly["Anomaly"]]
- col3.metric(
- label="Most Affected Building Type",
- value=top_build,
- delta=build_delta,
- border=True,
+ fig_anomaly.add_trace(
+ go.Bar(
+ x=normal["month"],
+ y=normal["Count"],
+ name="Normal",
+ marker_color=DEEP_BLUE,
+ opacity=0.8,
)
-
- # --- Evictions by Borough (Bar) ---
- st.subheader("Evictions by Borough (Filtered)")
- st.info(
- "Currently showing borough-level data. "
- "Community District (sub-borough) map is under development."
)
-
- counts = df_filtered[borough_col].fillna("Missing").value_counts().reset_index()
- counts.columns = ["Borough", "Total Evictions"]
-
- fig = px.bar(
- counts,
- x="Borough",
- y="Total Evictions",
- title="NYC Evictions by Borough",
- color="Borough",
- color_discrete_sequence=px.colors.qualitative.Dark24_r,
+ fig_anomaly.add_trace(
+ go.Bar(
+ x=anomaly["month"],
+ y=anomaly["Count"],
+ name="Anomaly",
+ marker_color=DANGER,
+ opacity=0.9,
+ )
)
- st.plotly_chart(fig, use_container_width=True)
-
- # --- Evictions by Borough (Map) ---
- st.subheader("Evictions by Borough (Map)")
-
- if not df_filtered.empty:
- nyc_geo = get_geojson()
- borough_counts = df_filtered[borough_col].value_counts().reset_index()
- borough_counts.columns = ["Borough", "Evictions"]
- borough_counts["Borough"] = borough_counts["Borough"].str.strip().str.title()
-
- fig_map = px.choropleth_mapbox(
- borough_counts,
- geojson=nyc_geo,
- locations="Borough",
- featureidkey="properties.BoroName",
- color="Evictions",
- color_continuous_scale="Reds",
- mapbox_style="carto-positron",
- zoom=9.5,
- center={"lat": 40.7128, "lon": -74.0060},
- title="Evictions by Borough (Map)",
- hover_name="Borough",
- hover_data={"Evictions": True},
+ fig_anomaly.add_hline(
+ y=mean_val,
+ line_dash="dot",
+ line_color=SLATE,
+ annotation_text=f"Mean ({mean_val:.0f})",
+ annotation_font_color=SLATE,
+ )
+ fig_anomaly.add_hline(
+ y=threshold,
+ line_dash="dash",
+ line_color=DANGER,
+ annotation_text=f"Threshold ({threshold:.0f})",
+ annotation_font_color=DANGER,
+ )
+ fig_anomaly.update_layout(
+ title="Monthly Eviction Counts with Anomaly Detection",
+ xaxis_title="Month",
+ yaxis_title="Evictions",
+ barmode="overlay",
+ showlegend=True,
+ )
+ st.plotly_chart(apply_chart_theme(fig_anomaly), use_container_width=True)
+
+ if anomaly.empty:
+ info_box("No anomalous months detected in the filtered data.")
+ else:
+ anomaly_list = anomaly.sort_values("Count", ascending=False)
+ warning_box(
+ f"{len(anomaly)} anomalous month(s) detected: "
+ + ", ".join(
+ [
+ f"{row['month'].strftime('%b %Y')} ({row['Count']:,})"
+ for _, row in anomaly_list.iterrows()
+ ]
+ )
)
- fig_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_map, use_container_width=True)
- # --- Evictions Over Time ---
- st.subheader("Evictions Over Time")
- bucket = st.selectbox(
- "Time bucket", ["Monthly", "Weekly", "Daily"], index=0, key="timeseries_bucket"
+ # anomaly by borough — which boroughs drive spikes
+ st.markdown("### Which Boroughs Drive the Spikes?")
+ if not anomaly.empty:
+ anomaly_months_list = anomaly["month"].tolist()
+ df_anomaly = df_f[df_f["month"].isin(anomaly_months_list)]
+ anomaly_boro = (
+ df_anomaly.groupby(BOROUGH_COL).size().reset_index(name="Evictions in Anomaly Months")
+ )
+ fig_ab = px.bar(
+ anomaly_boro.sort_values("Evictions in Anomaly Months", ascending=False),
+ x=BOROUGH_COL,
+ y="Evictions in Anomaly Months",
+ title="Evictions by Borough During Anomalous Months",
+ color="Evictions in Anomaly Months",
+ color_continuous_scale=COLOR_SCALE_RISK,
+ text="Evictions in Anomaly Months",
+ )
+ fig_ab.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_ab), use_container_width=True)
+ else:
+ st.info("No anomalous months in current filter selection.")
+
+ # rolling average
+ st.markdown("### Rolling Average: Smoothed Trend")
+
+ with st.expander("📋 What is a rolling average?", expanded=False):
+ st.markdown("""
+ A **rolling average** smooths out short-term fluctuations by averaging
+ each month's eviction count with preceding months.
+
+ This helps identify the **underlying trend** in eviction activity, removing
+ noise caused by seasonal spikes, data reporting delays, or one-off events.
+
+ | Term | Meaning |
+ |---|---|
+ | **Monthly Count** (bars) | Actual evictions recorded that month |
+ | **Rolling Avg** (line) | Average of current + N prior months |
+
+ > 📌 When the rolling average is **rising**, eviction activity is trending up.
+ > When it is **falling**, the trend is improving.
+ """)
+
+ roll_window = st.slider(
+ "Rolling window (months)",
+ min_value=2,
+ max_value=12,
+ value=3,
+ step=1,
+ key="roll_slider",
)
- freq_map = {"Monthly": "M", "Weekly": "W-MON", "Daily": "D"}
- freq = freq_map[bucket]
- df_ts = df_filtered.copy()
- df_ts["Period"] = df_ts[date_col].dt.to_period(freq).dt.to_timestamp()
- ts = df_ts.groupby(["Period", borough_col]).size().reset_index(name="Evictions")
+ monthly_sorted = monthly.sort_values("month").copy()
+ monthly_sorted["Rolling Avg"] = (
+ monthly_sorted["Count"].rolling(roll_window, min_periods=1).mean()
+ )
- fig_ts = px.line(
- ts,
- x="Period",
- y="Evictions",
- color=borough_col,
- markers=True,
- title="Evictions by Borough Over Time",
+ fig_roll = go.Figure()
+ fig_roll.add_trace(
+ go.Bar(
+ x=monthly_sorted["month"],
+ y=monthly_sorted["Count"],
+ name="Monthly Count",
+ marker_color=LIGHT_BLUE,
+ opacity=0.6,
+ )
+ )
+ fig_roll.add_trace(
+ go.Scatter(
+ x=monthly_sorted["month"],
+ y=monthly_sorted["Rolling Avg"],
+ name="3-Month Rolling Avg",
+ line_color=DEEP_BLUE,
+ line_width=2.5,
+ mode="lines",
+ )
)
- st.plotly_chart(fig_ts, use_container_width=True)
+ fig_roll.update_layout(
+ title=f"Monthly Evictions with {roll_window}-Month Rolling Average",
+ xaxis_title="Month",
+ yaxis_title="Evictions",
+ showlegend=True,
+ )
+ st.plotly_chart(apply_chart_theme(fig_roll), use_container_width=True)
+
+st.caption(f"⏱ Page loaded in {time.time() - start_time:.2f} seconds")
diff --git a/pages/3_Building_Complaints.py b/pages/3_Building_Complaints.py
index 03ae9c2..30e34e4 100644
--- a/pages/3_Building_Complaints.py
+++ b/pages/3_Building_Complaints.py
@@ -1,71 +1,94 @@
from __future__ import annotations
import time
-from contextlib import contextmanager
import pandas as pd
import pandas_gbq
import plotly.express as px
+import plotly.graph_objects as go
import requests
import streamlit as st
from google.oauth2 import service_account
from complaint_categories import COMPLAINT_CATEGORY_MAP
+from functions.theme import (
+ COLOR_SCALE,
+ COLOR_SCALE_RISK,
+ COLOR_SEQUENCE,
+ DANGER,
+ DEEP_BLUE,
+ FRESH_SKY,
+ LIGHT_BLUE,
+ SLATE,
+ WARNING,
+ apply_chart_theme,
+ apply_css,
+ caution_box,
+ info_box,
+ page_header,
+ warning_box,
+)
st.set_page_config(page_title="NYC Building Complaints", layout="wide")
-st.title("NYC Building Complaints Dashboard")
+apply_css()
+
+start_time = time.time()
-# Configuration
PROJECT_ID = "sipa-adv-c-cosmic-spaghetti"
DATASET = "cosmic_spaghetti"
TABLE = "complaints"
-date_col = "date_entered"
-borough_col = "borough"
-category_col = "complaint_category"
-status_col = "status"
+DATE_COL = "date_entered"
+BOROUGH_COL = "borough"
+CATEGORY_COL = "complaint_category"
+STATUS_COL = "status"
+# Priority labels from DOB
+PRIORITY_LABELS = {"A": "Emergency (A)", "B": "Urgent (B)", "C": "Normal (C)", "D": "Low (D)"}
+PRIORITY_COLORS = {
+ "Emergency (A)": DANGER,
+ "Urgent (B)": WARNING,
+ "Normal (C)": FRESH_SKY,
+ "Low (D)": SLATE,
+}
-# Page load time context manager
-@contextmanager
-def display_load_time():
- start_time = time.time()
- try:
- yield
- finally:
- elapsed = time.time() - start_time
- st.caption(f"Page loaded in {elapsed:.2f} seconds")
+# Analytics thresholds
+HIGH_COMPLAINT_THRESHOLD = 500
-# GeoJSON loader with caching
@st.cache_data(ttl=86400, show_spinner=False)
def get_geojson():
- geojson_url = "https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson"
- response = requests.get(geojson_url)
+ response = requests.get(
+ "https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson"
+ )
return response.json()
-# Load from BQ
-@st.cache_data(ttl=3600, show_spinner=False)
-def load_complaints() -> pd.DataFrame:
- credentials = service_account.Credentials.from_service_account_info(
+def get_credentials():
+ return service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"],
scopes=["https://www.googleapis.com/auth/bigquery"],
)
+
+
+@st.cache_data(ttl=3600, show_spinner=False)
+def load_complaints() -> pd.DataFrame:
query = f"""
SELECT
community_board,
date_entered,
complaint_category,
- status
- FROM `{PROJECT_ID}.{DATASET}.complaints`
+ status,
+ disposition_date,
+ inspection_date
+ FROM `{PROJECT_ID}.{DATASET}.{TABLE}`
WHERE date_entered IS NOT NULL
- LIMIT 10000
+ LIMIT 200000
"""
df = pandas_gbq.read_gbq(
query,
project_id=PROJECT_ID,
- credentials=credentials,
+ credentials=get_credentials(),
progress_bar_type=None,
dtypes={
"community_board": "str",
@@ -74,8 +97,11 @@ def load_complaints() -> pd.DataFrame:
},
)
df["date_entered"] = pd.to_datetime(df["date_entered"], errors="coerce")
+ df["disposition_date"] = pd.to_datetime(df["disposition_date"], errors="coerce")
+ df["inspection_date"] = pd.to_datetime(df["inspection_date"], errors="coerce")
+ df = df.dropna(subset=["date_entered"])
- # extract borough from community_board (first digit = borough code)
+ # extract borough from community_board first digit
borough_map = {
"1": "Manhattan",
"2": "Bronx",
@@ -84,194 +110,628 @@ def load_complaints() -> pd.DataFrame:
"5": "Staten Island",
}
df["borough"] = df["community_board"].str[0].map(borough_map).fillna("Unknown")
+
+ # add readable category description
df["complaint_desc"] = (
df["complaint_category"].map(COMPLAINT_CATEGORY_MAP).fillna(df["complaint_category"])
)
- return df
-
-
-# Main page
-with display_load_time():
- with st.spinner("Loading complaints data from BigQuery..."):
- df = load_complaints()
- if df.empty:
- st.error("No rows returned from BigQuery")
- st.stop()
+ # add priority from category map (A/B/C/D)
+ priority_map = {
+ "01": "A",
+ "03": "A",
+ "10": "A",
+ "12": "A",
+ "13": "A",
+ "14": "A",
+ "16": "A",
+ "18": "A",
+ "20": "A",
+ "30": "A",
+ "37": "A",
+ "50": "A",
+ "56": "A",
+ "62": "A",
+ "65": "A",
+ "67": "A",
+ "76": "A",
+ "81": "A",
+ "82": "A",
+ "86": "A",
+ "89": "A",
+ "91": "A",
+ "5B": "A",
+ "5C": "A",
+ "2B": "A",
+ "1E": "A",
+ "2E": "A",
+ "04": "B",
+ "05": "B",
+ "06": "B",
+ "09": "B",
+ "15": "B",
+ "21": "B",
+ "23": "B",
+ "45": "B",
+ "52": "B",
+ "54": "B",
+ "58": "B",
+ "59": "B",
+ "63": "B",
+ "66": "B",
+ "71": "B",
+ "75": "B",
+ "78": "B",
+ "83": "B",
+ "88": "B",
+ "92": "B",
+ "93": "B",
+ "1A": "B",
+ "1B": "B",
+ "1D": "B",
+ "1G": "B",
+ "2A": "B",
+ "2C": "B",
+ "2D": "B",
+ "3A": "B",
+ "4A": "B",
+ "4B": "B",
+ "4G": "B",
+ "5A": "B",
+ "5F": "B",
+ "5G": "B",
+ "29": "C",
+ "31": "C",
+ "49": "C",
+ "73": "C",
+ "74": "C",
+ "77": "C",
+ "79": "C",
+ "85": "C",
+ "90": "C",
+ "94": "C",
+ "2G": "C",
+ "4W": "C",
+ "6A": "C",
+ "35": "D",
+ "53": "D",
+ "55": "D",
+ "80": "D",
+ "1K": "D",
+ "1Z": "D",
+ "2F": "D",
+ "2H": "D",
+ "2J": "D",
+ "2K": "D",
+ "2L": "D",
+ "2M": "D",
+ "4C": "D",
+ "4D": "D",
+ "4F": "D",
+ "4J": "D",
+ "4K": "D",
+ "4L": "D",
+ "4M": "D",
+ "4N": "D",
+ "4P": "D",
+ }
+ df["priority"] = df["complaint_category"].map(priority_map).fillna("C")
+ df["priority_label"] = df["priority"].map(PRIORITY_LABELS).fillna("Normal (C)")
- st.success(f"Loaded {len(df):,} rows (last 12 months)")
+ # response time in days
+ df["resp_days"] = (df["disposition_date"] - df["date_entered"]).dt.days.clip(lower=0)
- # ── Filters ───────────────────────────────────────────────────────────────
- st.subheader("Filters")
+ df["year"] = df["date_entered"].dt.year
+ df["month"] = df["date_entered"].dt.to_period("M").dt.to_timestamp()
+ return df
- with st.expander("Set Filters", expanded=False):
- borough_options = sorted(df[borough_col].dropna().astype(str).unique().tolist())
- selected_borough = st.multiselect("Borough", borough_options, default=borough_options)
- category_options = sorted(df[category_col].dropna().astype(str).unique().tolist())
- selected_category = st.multiselect(
- "Complaint Category", category_options, default=category_options
- )
+# ── Load data ─────────────────────────────────────────────────────────────────
+with st.spinner("Loading complaints data..."):
+ df = load_complaints()
+ nyc_geo = get_geojson()
+
+if df.empty:
+ st.error("No data returned from BigQuery.")
+ st.stop()
+
+# ── Page header ───────────────────────────────────────────────────────────────
+page_header(
+ "⚠️ NYC Building Complaints",
+ "Explore building complaints filed with the NYC Department of Buildings — "
+ "categories, priorities, borough trends, and response times.",
+)
+
+# ── Top-level warning ─────────────────────────────────────────────────────────
+emergency_count = len(df[df["priority"] == "A"])
+open_count = len(df[df["status"].str.upper().str.contains("OPEN|ACTIVE", na=False)])
+if emergency_count > HIGH_COMPLAINT_THRESHOLD:
+ warning_box(
+ f"{emergency_count:,} emergency (Priority A) complaints on record — "
+ "these require immediate DOB response."
+ )
- status_options = sorted(df[status_col].dropna().astype(str).unique().tolist())
- selected_status = st.multiselect("Status", status_options, default=status_options)
+# ── KPI row ───────────────────────────────────────────────────────────────────
+total = len(df)
+resolved = len(df[df["status"].str.upper().str.contains("CLOSED|RESOLVED|DONE", na=False)])
+resolution_rate = resolved / total * 100 if total > 0 else 0
+avg_response = df["resp_days"].dropna().mean()
+top_boro = df["borough"].value_counts().idxmax() if not df.empty else "N/A"
+
+c1, c2, c3, c4, c5 = st.columns(5)
+c1.metric("Total Complaints", f"{total:,}", border=True)
+c2.metric("Emergency (Priority A)", f"{emergency_count:,}", border=True)
+c3.metric("Resolution Rate", f"{resolution_rate:.1f}%", border=True)
+c4.metric("Avg Response Time", f"{avg_response:.0f} days" if avg_response else "N/A", border=True)
+c5.metric("Most Complaints", top_boro, border=True)
+
+st.divider()
+
+# ── Filters ───────────────────────────────────────────────────────────────────
+with st.expander("🔍 Filters", expanded=False):
+ fcols = st.columns(3)
+ borough_opts = sorted(df[BOROUGH_COL].dropna().unique())
+ selected_borough = fcols[0].multiselect("Borough", borough_opts, default=borough_opts)
+
+ status_opts = sorted(df[STATUS_COL].dropna().unique())
+ selected_status = fcols[1].multiselect("Status", status_opts, default=status_opts)
+
+ priority_opts = sorted(df["priority_label"].dropna().unique())
+ selected_priority = fcols[2].multiselect("Priority", priority_opts, default=priority_opts)
+
+df_f = df.copy()
+if selected_borough:
+ df_f = df_f[df_f[BOROUGH_COL].isin(selected_borough)]
+if selected_status:
+ df_f = df_f[df_f[STATUS_COL].isin(selected_status)]
+if selected_priority:
+ df_f = df_f[df_f["priority_label"].isin(selected_priority)]
+
+st.caption(f"{len(df_f):,} complaints after filtering")
+st.divider()
+
+# ── Tabs ──────────────────────────────────────────────────────────────────────
+tab1, tab2, tab3, tab4 = st.tabs(
+ [
+ "📊 Overview",
+ "🗺️ Borough Analysis",
+ "📋 Complaint Categories",
+ "⏱️ Response & Status",
+ ]
+)
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 1 — OVERVIEW
+# ══════════════════════════════════════════════════════════════════════════════
+with tab1:
+ st.markdown("### Complaint Volume Over Time")
+
+ with st.expander("📋 Understanding Complaint Priorities", expanded=False):
+ st.markdown("""
+ The NYC Department of Buildings assigns a **priority level** to each complaint
+ based on the potential risk to public safety:
+
+ | Priority | Label | Meaning | Target Response |
+ |---|---|---|---|
+ | **A** | Emergency | Immediate danger to life or property | Same day inspection |
+ | **B** | Urgent | Significant safety risk | Within 5 business days |
+ | **C** | Normal | Non-urgent code violation | Within 30 days |
+ | **D** | Low | Administrative or tracking complaint | As resources allow |
+
+ 🔗 Source: [NYC DOB Complaint Categories](https://www.nyc.gov/assets/buildings/pdf/complaint_category.pdf)
+ """)
+
+ bucket = st.selectbox("Time bucket", ["Monthly", "Weekly", "Daily"], key="t1_bucket")
+ freq = {"Monthly": "MS", "Weekly": "W-MON", "Daily": "D"}[bucket]
+
+ df_f["Period"] = (
+ df_f[DATE_COL]
+ .dt.to_period({"Monthly": "M", "Weekly": "W", "Daily": "D"}[bucket])
+ .dt.to_timestamp()
+ )
- df_filtered = df.copy()
- if selected_borough:
- df_filtered = df_filtered[df_filtered[borough_col].astype(str).isin(selected_borough)]
- if selected_category:
- df_filtered = df_filtered[df_filtered[category_col].astype(str).isin(selected_category)]
- if selected_status:
- df_filtered = df_filtered[df_filtered[status_col].astype(str).isin(selected_status)]
+ ts = df_f.groupby(["Period", BOROUGH_COL]).size().reset_index(name="Complaints")
- st.caption(f"Filtered rows: {len(df_filtered):,}")
+ fig_ts = px.line(
+ ts,
+ x="Period",
+ y="Complaints",
+ color=BOROUGH_COL,
+ markers=True,
+ title=f"Complaints by Borough — {bucket} Trend",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ labels={BOROUGH_COL: "Borough"},
+ )
+ st.plotly_chart(apply_chart_theme(fig_ts), use_container_width=True)
- # summary metrics
- bucket = st.selectbox(
- "Time bucket", ["Monthly", "Weekly", "Daily"], index=0, key="complaints_bucket"
+ # priority breakdown over time — stacked area
+ st.markdown("### Priority Breakdown Over Time")
+ ts_priority = df_f.groupby(["Period", "priority_label"]).size().reset_index(name="Count")
+ fig_pri_ts = px.area(
+ ts_priority,
+ x="Period",
+ y="Count",
+ color="priority_label",
+ title=f"Complaint Priority Over Time ({bucket})",
+ color_discrete_map=PRIORITY_COLORS,
+ labels={"priority_label": "Priority"},
+ )
+ fig_pri_ts.update_traces(line_width=1)
+ st.plotly_chart(apply_chart_theme(fig_pri_ts), use_container_width=True)
+
+ # year over year comparison
+ st.markdown("### Year-over-Year Comparison")
+ yearly = df_f.groupby("year").size().reset_index(name="Complaints")
+ fig_yoy = px.bar(
+ yearly,
+ x="year",
+ y="Complaints",
+ title="Total Complaints by Year",
+ color="Complaints",
+ color_continuous_scale=COLOR_SCALE,
+ text="Complaints",
)
+ fig_yoy.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_yoy), use_container_width=True)
- st.subheader(f"Summary of Complaints — Current {bucket}")
-
- if not df_filtered.empty:
- end_period = df_filtered[date_col].max()
- offsets_map = {
- "Monthly": pd.DateOffset(months=1),
- "Weekly": pd.DateOffset(weeks=1),
- "Daily": pd.DateOffset(days=1),
- }
- offsets = offsets_map[bucket]
- period_label = bucket.lower().rstrip("ly")
-
- start_period = end_period - offsets
- start_prev = start_period - offsets
-
- current = df_filtered[df_filtered[date_col] > start_period]
- previous = df_filtered[
- (df_filtered[date_col] > start_prev) & (df_filtered[date_col] <= start_period)
- ]
-
- # total complaints
- current_total = len(current)
- previous_total = len(previous)
- if previous_total > 0:
- total_pct = ((current_total - previous_total) / previous_total) * 100
- total_delta = f"{total_pct:+.1f}%"
- else:
- total_delta = None
-
- # borough with most complaints
- current_boro = current[borough_col].value_counts()
- previous_boro = previous[borough_col].value_counts()
- top_boro = current_boro.idxmax() if not current_boro.empty else "N/A"
- top_boro_count = int(current_boro.max()) if not current_boro.empty else 0
- prev_boro_count = int(previous_boro.get(top_boro, 0))
- boro_delta = f"{top_boro_count - prev_boro_count:+,}"
-
- # most common complaint category
- current_cat = current[category_col].value_counts()
- top_cat = current_cat.idxmax() if not current_cat.empty else "N/A"
-
- col1, col2, col3 = st.columns(3)
- col1.metric(
- label=f"Total Complaints (vs previous {period_label})",
- value=f"{current_total:,}",
- delta=total_delta,
- border=True,
- )
- col2.metric(
- label="Borough with Most Complaints",
- value=top_boro,
- delta=boro_delta,
- border=True,
- )
- col3.metric(
- label="Most Common Category",
- value=top_cat,
- border=True,
- )
+ peak_yr = yearly.sort_values("Complaints", ascending=False).iloc[0]
+ info_box(
+ f"Peak complaint year: {int(peak_yr['year'])} — "
+ f"{int(peak_yr['Complaints']):,} complaints filed"
+ )
- # complaints by borough
- st.subheader("Complaints by Borough")
- counts = df_filtered[borough_col].fillna("Missing").value_counts().reset_index()
- counts.columns = ["Borough", "Total Complaints"]
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 2 — BOROUGH ANALYSIS
+# ══════════════════════════════════════════════════════════════════════════════
+with tab2:
+ st.markdown("### Complaints by Borough")
- fig_bar = px.bar(
- counts,
- x="Borough",
- y="Total Complaints",
- title="NYC Building Complaints by Borough",
- color="Borough",
- color_discrete_sequence=px.colors.qualitative.Dark24_r,
+ boro_counts = (
+ df_f[BOROUGH_COL]
+ .value_counts()
+ .reset_index()
+ .rename(columns={BOROUGH_COL: "Borough", "count": "Complaints"})
)
- st.plotly_chart(fig_bar, use_container_width=True)
-
- # complaints by borough (map)
- st.subheader("Complaints by Borough (Map)")
+ boro_counts["Borough"] = boro_counts["Borough"].str.title()
- if not df_filtered.empty:
- nyc_geo = get_geojson()
- borough_counts = df_filtered[borough_col].value_counts().reset_index()
- borough_counts.columns = ["Borough", "Complaints"]
- borough_counts["Borough"] = borough_counts["Borough"].str.strip().str.title()
-
- fig_map = px.choropleth_mapbox(
- borough_counts,
+ col1, col2 = st.columns(2)
+ with col1:
+ fig_bmap = px.choropleth_mapbox(
+ boro_counts,
geojson=nyc_geo,
locations="Borough",
featureidkey="properties.BoroName",
color="Complaints",
- color_continuous_scale="Reds",
+ color_continuous_scale=COLOR_SCALE,
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 40.7128, "lon": -74.0060},
- title="Building Complaints by Borough (Map)",
+ title="Total Complaints by Borough",
hover_name="Borough",
- hover_data={"Complaints": True},
)
- fig_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
- st.plotly_chart(fig_map, use_container_width=True)
+ st.plotly_chart(apply_chart_theme(fig_bmap), use_container_width=True)
+
+ with col2:
+ # bar chart instead of redundant donut
+ fig_bbar = px.bar(
+ boro_counts.sort_values("Complaints", ascending=True),
+ x="Complaints",
+ y="Borough",
+ orientation="h",
+ title="Complaints by Borough",
+ color="Complaints",
+ color_continuous_scale=COLOR_SCALE,
+ text="Complaints",
+ )
+ fig_bbar.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_bbar), use_container_width=True)
+
+ # borough × priority heatmap
+ st.markdown("### Priority Heatmap by Borough")
+ heat_data = df_f.groupby([BOROUGH_COL, "priority_label"]).size().reset_index(name="Count")
+ heat_pivot = heat_data.pivot_table(
+ index=BOROUGH_COL, columns="priority_label", values="Count", aggfunc="sum"
+ ).fillna(0)
+
+ fig_heat = go.Figure(
+ go.Heatmap(
+ z=heat_pivot.to_numpy(),
+ x=heat_pivot.columns.tolist(),
+ y=[b.title() for b in heat_pivot.index.tolist()],
+ colorscale=[[0, LIGHT_BLUE], [0.5, FRESH_SKY], [1, DEEP_BLUE]],
+ text=heat_pivot.to_numpy().astype(int),
+ texttemplate="%{text:,}",
+ hoverongaps=False,
+ )
+ )
+ fig_heat.update_layout(
+ title="Complaint Priority by Borough",
+ xaxis_title="Priority",
+ yaxis_title="",
+ )
+ st.plotly_chart(apply_chart_theme(fig_heat), use_container_width=True)
+
+ # complaint type by borough — sunburst
+ st.markdown("### Complaint Types by Borough")
+ sun_data = df_f.groupby([BOROUGH_COL, "complaint_desc"]).size().reset_index(name="Count")
+ sun_data[BOROUGH_COL] = sun_data[BOROUGH_COL].str.title()
+ # keep top 15 categories to avoid clutter
+ top15 = df_f["complaint_desc"].value_counts().head(15).index.tolist()
+ sun_data = sun_data[sun_data["complaint_desc"].isin(top15)]
+
+ fig_sun = px.sunburst(
+ sun_data,
+ path=[BOROUGH_COL, "complaint_desc"],
+ values="Count",
+ title="Top 15 Complaint Types by Borough",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ )
+ fig_sun.update_traces(textinfo="label+percent parent", insidetextfont_size=11)
+ st.plotly_chart(apply_chart_theme(fig_sun), use_container_width=True)
+
+
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 3 — COMPLAINT CATEGORIES
+# ══════════════════════════════════════════════════════════════════════════════
+with tab3:
+ st.markdown("### Top Complaint Categories")
+ st.caption(
+ "Complaint categories follow NYC DOB classification codes. "
+ "Priority A = Emergency, B = Urgent, C = Normal, D = Low."
+ )
- # Complaint categories
- st.subheader("Top 10 Complaint Categories")
+ n_top = st.slider("Number of top categories to show", 5, 30, 15, key="cat_slider")
- top_categories = (
- df_filtered["complaint_desc"].fillna("Unknown").value_counts().head(10).reset_index()
- )
- top_categories.columns = ["Category", "Count"]
-
- fig_cat = px.bar(
- top_categories,
- x="Count",
- y="Category",
- orientation="h",
- title="Top 10 Complaint Categories",
- color="Count",
- color_continuous_scale="Reds",
+ top_cats = (
+ df_f["complaint_desc"]
+ .value_counts()
+ .head(n_top)
+ .reset_index()
+ .rename(columns={"complaint_desc": "Category", "count": "Count"})
)
- fig_cat.update_layout(yaxis={"categoryorder": "total ascending"})
- st.plotly_chart(fig_cat, use_container_width=True)
- # Complaints over time
- st.subheader("Complaints Over Time")
+ col1, col2 = st.columns(2)
+ with col1:
+ fig_hbar = px.bar(
+ top_cats,
+ x="Count",
+ y="Category",
+ orientation="h",
+ title=f"Top {n_top} Complaint Categories",
+ color="Count",
+ color_continuous_scale=COLOR_SCALE,
+ text="Count",
+ )
+ fig_hbar.update_layout(yaxis={"categoryorder": "total ascending"})
+ fig_hbar.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_hbar), use_container_width=True)
+
+ with col2:
+ # treemap of categories
+ fig_tree = px.treemap(
+ top_cats,
+ path=["Category"],
+ values="Count",
+ title=f"Top {n_top} Categories — Treemap",
+ color="Count",
+ color_continuous_scale=COLOR_SCALE,
+ )
+ fig_tree.update_traces(
+ texttemplate="%{label}
%{value:,}",
+ textfont_size=12,
+ )
+ st.plotly_chart(apply_chart_theme(fig_tree), use_container_width=True)
+
+ # priority distribution
+ st.markdown("### Complaints by Priority Level")
+
+ pri_counts = (
+ df_f["priority_label"]
+ .value_counts()
+ .reset_index()
+ .rename(columns={"priority_label": "Priority", "count": "Count"})
+ )
- bucket_ts = st.selectbox(
- "Time bucket", ["Monthly", "Weekly", "Daily"], index=0, key="complaints_timeseries"
+ col1, col2 = st.columns(2)
+ with col1:
+ fig_pri = px.bar(
+ pri_counts.sort_values("Count", ascending=True),
+ x="Count",
+ y="Priority",
+ orientation="h",
+ title="Total Complaints by Priority Level",
+ color="Priority",
+ color_discrete_map=PRIORITY_COLORS,
+ text="Count",
+ )
+ fig_pri.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_pri), use_container_width=True)
+
+ with col2:
+ # line chart per priority over time
+ ts_pri = df_f.groupby(["Period", "priority_label"]).size().reset_index(name="Count")
+ fig_pri_line = px.line(
+ ts_pri,
+ x="Period",
+ y="Count",
+ color="priority_label",
+ markers=True,
+ title="Priority Trend Over Time",
+ color_discrete_map=PRIORITY_COLORS,
+ labels={"priority_label": "Priority"},
+ )
+ fig_pri_line.update_traces(line_width=2)
+ st.plotly_chart(apply_chart_theme(fig_pri_line), use_container_width=True)
+
+ # emergency complaints by borough
+ st.markdown("### Emergency Complaints (Priority A) by Borough")
+ emerg = (
+ df_f[df_f["priority"] == "A"][BOROUGH_COL]
+ .value_counts()
+ .reset_index()
+ .rename(columns={BOROUGH_COL: "Borough", "count": "Emergency Complaints"})
+ )
+ emerg["Borough"] = emerg["Borough"].str.title()
+ fig_emerg = px.bar(
+ emerg,
+ x="Borough",
+ y="Emergency Complaints",
+ title="Emergency (Priority A) Complaints by Borough",
+ color="Emergency Complaints",
+ color_continuous_scale=COLOR_SCALE_RISK,
+ text="Emergency Complaints",
)
- freq_map = {"Monthly": "M", "Weekly": "W-MON", "Daily": "D"}
- freq = freq_map[bucket_ts]
+ fig_emerg.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_emerg), use_container_width=True)
+
+ emerg_top = emerg.iloc[0] if not emerg.empty else None
+ if emerg_top is not None:
+ warning_box(
+ f"{emerg_top['Borough']} has the most emergency complaints — "
+ f"{int(emerg_top['Emergency Complaints']):,} Priority A filings."
+ )
- df_ts = df_filtered.copy()
- df_ts["Period"] = df_ts[date_col].dt.to_period(freq).dt.to_timestamp()
- ts = df_ts.groupby(["Period", borough_col]).size().reset_index(name="Complaints")
- fig_ts = px.line(
- ts,
- x="Period",
- y="Complaints",
- color=borough_col,
- markers=True,
- title="Building Complaints by Borough Over Time",
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 4 — RESPONSE & STATUS
+# ══════════════════════════════════════════════════════════════════════════════
+with tab4:
+ st.markdown("### Complaint Status Breakdown")
+
+ status_counts = (
+ df_f[STATUS_COL]
+ .value_counts()
+ .reset_index()
+ .rename(columns={STATUS_COL: "Status", "count": "Count"})
)
- st.plotly_chart(fig_ts, use_container_width=True)
+
+ col1, col2 = st.columns(2)
+ with col1:
+ fig_status = px.pie(
+ status_counts,
+ names="Status",
+ values="Count",
+ title="Complaint Status Distribution",
+ color_discrete_sequence=COLOR_SEQUENCE,
+ hole=0.45,
+ )
+ fig_status.update_traces(textinfo="label+percent", textfont_size=12)
+ st.plotly_chart(apply_chart_theme(fig_status), use_container_width=True)
+
+ with col2:
+ fig_status_bar = px.bar(
+ status_counts.sort_values("Count", ascending=False),
+ x="Status",
+ y="Count",
+ title="Complaint Status Counts",
+ color="Count",
+ color_continuous_scale=COLOR_SCALE,
+ text="Count",
+ )
+ fig_status_bar.update_traces(texttemplate="%{text:,}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_status_bar), use_container_width=True)
+
+ # response time analysis
+ st.markdown("### Response Time Analysis")
+
+ with st.expander("📋 How is response time calculated?", expanded=False):
+ st.markdown("""
+ **Response time** is calculated as the number of days between when a complaint was
+ filed (`date_entered`) and when it was resolved (`disposition_date`).
+
+ | Priority | DOB Target Response Time |
+ |---|---|
+ | **A — Emergency** | Same day inspection |
+ | **B — Urgent** | Within 5 business days |
+ | **C — Normal** | Within 30 days |
+ | **D — Low** | As resources allow |
+
+ > ⚠️ Complaints without a `disposition_date` are still open and excluded from
+ > response time calculations.
+ """)
+
+ df_resp = df_f.dropna(subset=["resp_days"])
+
+ if df_resp.empty:
+ st.info("No response time data available.")
+ else:
+ avg_resp = df_resp["resp_days"].mean()
+ median_resp = df_resp["resp_days"].median()
+ max_resp = df_resp["resp_days"].max()
+
+ r1, r2, r3 = st.columns(3)
+ r1.metric("Avg Response Time", f"{avg_resp:.1f} days", border=True)
+ r2.metric("Median Response Time", f"{median_resp:.0f} days", border=True)
+ r3.metric("Longest Response", f"{max_resp:.0f} days", border=True)
+
+ # response time by borough
+ resp_boro = (
+ df_resp.groupby(BOROUGH_COL)["resp_days"]
+ .mean()
+ .round(1)
+ .reset_index()
+ .rename(columns={BOROUGH_COL: "Borough", "resp_days": "Avg Days"})
+ .sort_values("Avg Days", ascending=False)
+ )
+ resp_boro["Borough"] = resp_boro["Borough"].str.title()
+
+ fig_resp = px.bar(
+ resp_boro,
+ x="Borough",
+ y="Avg Days",
+ title="Average Response Time by Borough (days)",
+ color="Avg Days",
+ color_continuous_scale=COLOR_SCALE_RISK,
+ text="Avg Days",
+ )
+ fig_resp.update_traces(texttemplate="%{text:.1f}", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_resp), use_container_width=True)
+
+ # response time by priority
+ resp_pri = (
+ df_resp.groupby("priority_label")["resp_days"]
+ .mean()
+ .round(1)
+ .reset_index()
+ .rename(columns={"priority_label": "Priority", "resp_days": "Avg Days"})
+ )
+ fig_resp_pri = px.bar(
+ resp_pri.sort_values("Avg Days", ascending=False),
+ x="Priority",
+ y="Avg Days",
+ title="Average Response Time by Priority Level (days)",
+ color="Priority",
+ color_discrete_map=PRIORITY_COLORS,
+ text="Avg Days",
+ )
+ fig_resp_pri.update_traces(texttemplate="%{text:.1f}d", textposition="outside")
+ st.plotly_chart(apply_chart_theme(fig_resp_pri), use_container_width=True)
+
+ slowest_boro = resp_boro.iloc[0]
+ if slowest_boro["Avg Days"] > 30: # noqa: PLR2004
+ caution_box(
+ f"{slowest_boro['Borough']} has the slowest average response time "
+ f"{slowest_boro['Avg Days']:.1f} days on average."
+ )
+ else:
+ info_box(
+ f"Average response time across all boroughs: {avg_resp:.1f} days"
+ )
+
+ # response time distribution histogram
+ st.markdown("### Response Time Distribution")
+ year = 365
+ fig_hist = px.histogram(
+ df_resp[df_resp["resp_days"] <= year],
+ x="resp_days",
+ nbins=50,
+ title="Distribution of Response Times (days, capped at 1 year)",
+ color_discrete_sequence=[DEEP_BLUE],
+ labels={"resp_days": "Days to Resolution"},
+ )
+ fig_hist.update_layout(bargap=0.05)
+ st.plotly_chart(apply_chart_theme(fig_hist), use_container_width=True)
+
+st.caption(f"⏱ Page loaded in {time.time() - start_time:.2f} seconds")
diff --git a/streamlit_app.py b/streamlit_app.py
index 98afafe..c8c33f8 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -1,10 +1,116 @@
-# import necessary libraries
import streamlit as st
-# set title of the app
+from functions.theme import apply_css, info_box, page_header
-st.title("NYC Building Insights: Unraveling the Web of NYC Building Data")
+st.set_page_config(
+ page_title="NYC Building Insights",
+ layout="wide",
+)
+apply_css()
-st.header("Cosmic Spaghetti Team")
+page_header(
+ "NYC Building Insights",
+ "Unraveling the Web of NYC Building Data",
+)
-st.markdown("### Team Members\n- Mery Hotma Situmorang\n- Najihah Ahmad Fikri")
+# ── Team ──────────────────────────────────────────────────────────────────────
+st.markdown("### Cosmic Spaghetti Team")
+st.markdown("""
+**Mery Hotma Situmorang** (mhs2231) and **Najihah Ahmad Fikri** (na3183)
+Advanced Computing for Policy, Spring 2026, Columbia SIPA
+""")
+
+st.divider()
+
+# ── About the dashboard ───────────────────────────────────────────────────────
+st.markdown("### About This Dashboard")
+st.write("""
+This dashboard explores NYC building activity data sourced from the NYC Department of Buildings
+and related city agencies. It combines six datasets into a unified view, allowing users to
+investigate construction trends, eviction patterns, building complaints, and facade safety
+across New York City's five boroughs.
+
+All data is loaded from NYC Open Data APIs into Google BigQuery and refreshed automatically
+every day at 6am UTC via GitHub Actions. The dashboard reads directly from BigQuery for fast,
+cached queries.
+""")
+
+st.divider()
+
+# ── Pages ─────────────────────────────────────────────────────────────────────
+st.markdown("### What You Can Explore")
+
+col1, col2 = st.columns(2)
+
+with col1:
+ with st.container(border=True):
+ st.markdown("**Buildings Overview**")
+ st.write("""
+ Explore NYC's total building stock, new construction activity (2008 to 2020),
+ active construction and renovation jobs (January 2025 onwards), and facade
+ inspection safety data from the Facade Inspection Safety Program (FISP).
+ Includes building density analysis by borough, permit type breakdowns,
+ and unsafe facade rate tracking across inspection cycles.
+ """)
+
+ with st.container(border=True):
+ st.markdown("**Building Evictions**")
+ st.write("""
+ Track eviction trends across the five boroughs from 2017 onwards.
+ Explore residential vs commercial breakdowns, seasonal patterns,
+ year-over-year comparisons, and anomaly detection that flags months
+ with unusually high eviction activity relative to historical averages.
+ """)
+
+with col2:
+ with st.container(border=True):
+ st.markdown("**Building Complaints**")
+ st.write("""
+ Analyze complaints filed with the NYC Department of Buildings by category,
+ priority level, borough, and status. Includes response time analysis
+ showing how long complaints take to resolve by borough and priority,
+ and trend charts tracking complaint volume over time.
+ """)
+
+ with st.container(border=True):
+ st.markdown("**Proposal**")
+ st.write("""
+ Read the original project proposal including research questions, target
+ visualizations, and how the project evolved from its initial scope to the
+ full dashboard you see today.
+ """)
+
+st.divider()
+
+# ── Data sources ──────────────────────────────────────────────────────────────
+st.markdown("### Data Sources")
+
+with st.container(border=True):
+ st.markdown("""
+| Dataset | Source | Coverage |
+|---|---|---|
+| DOB NOW Build: Approved Permits | NYC Open Data (rbx6-tga4) | January 2025 onwards |
+| DOB Permit Issuance | NYC Open Data (ipu4-2q9a) | 2008 to 2020 (New Building jobs) |
+| NYC Evictions | NYC Open Data (6z8x-wfk4) | 2017 onwards |
+| DOB Complaints Received | NYC Open Data (eabe-havv) | Most recent 200,000 records |
+| DOB NOW: Safety Facade Compliance (FISP) | NYC Open Data (xubg-57si) | 2001 onwards |
+| NYC Building Footprints | NYC Open Data (5zhs-2jue) | All buildings up to 2025 |
+""")
+
+st.divider()
+
+# ── Technical notes ───────────────────────────────────────────────────────────
+st.markdown("### Technical Notes")
+st.write("""
+All datasets are stored in Google BigQuery under the project
+sipa-adv-c-cosmic-spaghetti, dataset cosmic_spaghetti.
+Data is refreshed daily using the truncate method (full replace on each run)
+because BigQuery's free tier does not support DML operations.
+The Streamlit app uses st.cache_data with a one-hour TTL to keep page loads fast
+after the initial query.
+""")
+
+info_box(
+ "Data is refreshed daily at 6am UTC via GitHub Actions. "
+ "If charts appear outdated, try clearing the Streamlit cache by pressing C."
+)