diff --git a/complaint_categories.py b/complaint_categories.py new file mode 100644 index 0000000..b26d8c2 --- /dev/null +++ b/complaint_categories.py @@ -0,0 +1,99 @@ +COMPLAINT_CATEGORY_MAP = { + "01": "Accident – Construction/Plumbing", + "03": "Adjacent Buildings - Not Protected", + "04": "After Hours Work – Illegal", + "05": "Permit – None (Building/PA/Demo etc.)", + "06": "Construction – Change Grade/Change Watercourse", + "09": "Debris – Excessive", + "10": "Debris/Building - Falling or In Danger of Falling", + "12": "Demolition - Unsafe/Illegal/Mechanical Demo", + "13": "Elevator In (FDNY) Readiness - None", + "14": "Excavation - Undermining Adjacent Building", + "15": "Fence - None/Inadequate/Illegal", + "16": "Inadequate Support/Shoring", + "18": "Material Storage – Unsafe", + "20": "Landmark Building – Illegal Work", + "21": "Safety Net/Guardrail - Damaged/Inadequate/None (over 6 Story/75 ft.)", + "23": "Sidewalk Shed/Supported Scaffold - Inadequate/Defective/None/No Permit", + "29": "Building – Vacant, Open and Unguarded", + "30": "Building Shaking/Vibrating/Structural Stability Affected", + "31": "Certificate of Occupancy – None/Illegal/Contrary to CO", + "35": "Curb Cut/Driveway/Carport – Illegal", + "37": "Egress – Locked/Blocked/Improper/No Secondary Means", + "45": "Illegal Conversion", + "49": "Storefront or Business Sign/Awning/Marquee/Canopy – Illegal", + "50": "Sign Falling - Danger/Sign Erection or Display In Progress – Illegal", + "52": "Sprinkler System – Inadequate", + "53": "Vent/Exhaust – Illegal/Improper", + "54": "Wall/Retaining Wall – Bulging/Cracked", + "55": "Zoning – Non-conforming", + "56": "Boiler – Fumes/Smoke/Carbon Monoxide", + "58": "Boiler – Defective/Non-operative/No Permit", + "59": "Electrical Wiring – Defective/Exposed, In Progress", + "62": "Elevator - Danger Condition/Shaft Open/Unguarded (Priority A)", + "63": "Elevator - Danger Condition/Shaft Open/Unguarded (Priority B)", + "65": "Gas Hook-Up/Piping – Illegal or Defective", + "66": "Plumbing Work – Illegal/No Permit (Also Sprinkler/Standpipe)", + "67": "Crane – No Permit/License/Cert/Unsafe/Illegal", + "71": "SRO – Illegal Work/No Permit/Change In Occupancy Use", + "73": "Failure to Maintain", + "74": "Illegal Commercial/Manufacturing Use In Residential Zone", + "75": "Adult Establishment", + "76": "Unlicensed/Illegal/Improper Plumbing Work In Progress", + "77": "Contrary To LL58/87 (Handicap Access)", + "78": "Privately Owned Public Space/Non-Compliance", + "79": "Lights from Parking Lot Shining on Building", + "80": "Elevator Not Inspected/Illegal/No Permit", + "81": "Elevator – Accident", + "82": "Boiler – Accident/Explosion", + "83": "Construction – Contrary/Beyond Approved Plans/Permits", + "85": "Failure to Retain Water/Improper Drainage (LL103/89)", + "86": "Work Contrary to Stop Work Order", + "88": "Safety Net/Guard Rail - Damaged/Inadequate/None (6 fl. 75ft. or less)", + "89": "Accident – Cranes/Derricks/Suspension", + "90": "Unlicensed/Illegal Activity", + "91": "Site Conditions Endangering Workers", + "92": "Illegal Conversion of Manufacturing/Industrial Space", + "93": "Request for Retaining Wall Safety Inspection", + "94": "Plumbing - Defective/Leaking/Not Maintained", + "1A": "Illegal Conversion Commercial Building/Space to Dwelling Units", + "1B": "Illegal Tree Removal/Topo. Change In SNAD", + "1D": "Con Edison Referral", + "1E": "Suspended (Hanging) Scaffolds - No Permit/License/Dangerous/Accident", + "1G": "Stalled Construction Site", + "1K": "Bowstring Truss Tracking Complaint", + "1Z": "Enforcement Work Order (DOB)", + "2A": "Posted Notice or Order Removed/Tampered With", + "2B": "Failure to Comply with Vacate Order", + "2C": "Smoking Ban – Smoking on Construction Site", + "2D": "Smoking Signs – No Smoking Signs Not Observed on Construction Site", + "2E": "Demolition Notification Received", + "2F": "Building Under Structural Monitoring", + "2G": "Advertising Sign/Billboard/Posters/Flexible Fabric – Illegal", + "2H": "Second Avenue Subway Construction", + "2J": "Sandy: Building Destroyed", + "2K": "Structurally Compromised Building (LL33/08)", + "2L": "Façade (LL11/98) – Unsafe Notification", + "2M": "Monopole Tracking Complaint", + "3A": "Unlicensed/Illegal/Improper Electrical Work In Progress", + "4A": "Illegal Hotel Rooms In Residential Buildings", + "4B": "SEP – Professional Certification Compliance Audit", + "4C": "Excavation Tracking Complaint", + "4D": "Interior Demo Tracking Complaint", + "4F": "SST Tracking Complaint", + "4G": "Illegal Conversion No Access Follow-Up", + "4J": "M.A.R.C.H. Program (Interagency)", + "4K": "CSC – DM Tracking Complaint", + "4L": "CSC – High-Rise Tracking Complaint", + "4M": "CSC – Low-Rise Tracking Complaint", + "4N": "Retaining Wall Tracking Complaint", + "4P": "Legal/Padlock Tracking Complaint", + "4W": "Woodside Settlement Project", + "5A": "Request for Joint FDNY/DOB Inspection", + "5B": "Non-Compliance with Lightweight Materials", + "5C": "Structural Stability Impacted – New Building Under Construction", + "5E": "Amusement Ride Accident/Incident", + "5F": "Compliance Inspection", + "5G": "Unlicensed/Illegal/Improper Work In Progress", + "6A": "Vesting Inspection", +} diff --git a/pages/1_Building Overview.py b/pages/1_Building Overview.py new file mode 100644 index 0000000..7ac7d06 --- /dev/null +++ b/pages/1_Building Overview.py @@ -0,0 +1,713 @@ +from __future__ import annotations + +import time +from datetime import datetime, timedelta + +import pandas as pd +import pandas_gbq +import plotly.express as px +import requests +import streamlit as st +from google.oauth2 import service_account + +import functions.permit_page as _pp # noqa: F401 +from functions.permit_page import ( + apply_filter, + filter_last_12_months, + first_column, + permit_timeseries_by_borough, +) + +st.set_page_config(page_title="NYC Buildings Overview", layout="wide") +st.title("NYC Buildings Overview") + +start_time = time.time() + +# ── Configuration ───────────────────────────────────────────────────────────── +PROJECT_ID = "sipa-adv-c-cosmic-spaghetti" +DATASET = "cosmic_spaghetti" + + +# ── GeoJSON ─────────────────────────────────────────────────────────────────── +@st.cache_data(ttl=86400, show_spinner=False) +def get_geojson(): + response = requests.get( + "https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson" + ) + return response.json() + + +def get_credentials(): + return service_account.Credentials.from_service_account_info( + st.secrets["gcp_service_account"], + scopes=["https://www.googleapis.com/auth/bigquery"], + ) + + +# ── Data loaders ────────────────────────────────────────────────────────────── +@st.cache_data(ttl=86400, show_spinner=False) +def load_buildings_summary() -> pd.DataFrame: + query = f""" + SELECT borough, cnstrct_yr, total_buildings, avg_height + FROM `{PROJECT_ID}.{DATASET}.buildings_summary` + WHERE borough IS NOT NULL AND cnstrct_yr <= 2025 + """ + return pandas_gbq.read_gbq( + query, + project_id=PROJECT_ID, + credentials=get_credentials(), + progress_bar_type=None, + dtypes={"borough": "str"}, + ) + + +@st.cache_data(ttl=3600, show_spinner=False) +def load_new_buildings() -> pd.DataFrame: + query = f""" + SELECT borough, permit_date, permit_type, permit_type_desc, + status, latitude, longitude + FROM `{PROJECT_ID}.{DATASET}.permits` + WHERE permit_type = 'NB' + AND permit_date >= '2025-01-01' + AND borough IS NOT NULL + """ + df = pandas_gbq.read_gbq( + query, + project_id=PROJECT_ID, + credentials=get_credentials(), + progress_bar_type=None, + dtypes={"borough": "str"}, + ) + df["permit_date"] = pd.to_datetime(df["permit_date"], errors="coerce") + df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce") + df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce") + return df + + +@st.cache_data(ttl=3600, show_spinner=False) +def load_other_permits() -> pd.DataFrame: + one_year_ago = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") + query = f""" + SELECT borough, permit_date, permit_type, permit_type_desc, + status, latitude, longitude, source + FROM `{PROJECT_ID}.{DATASET}.permits` + WHERE permit_type != 'NB' + AND permit_date >= '{one_year_ago}' + AND borough IS NOT NULL + LIMIT 50000 + """ + df = pandas_gbq.read_gbq( + query, + project_id=PROJECT_ID, + credentials=get_credentials(), + progress_bar_type=None, + dtypes={"borough": "str", "permit_type": "str", "status": "str"}, + ) + df["permit_date"] = pd.to_datetime(df["permit_date"], errors="coerce") + df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce") + df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce") + return df + + +@st.cache_data(ttl=86400, show_spinner=False) +def load_facades() -> pd.DataFrame: + query = f""" + SELECT borough, filing_status, current_status, cycle, filing_date + FROM `{PROJECT_ID}.{DATASET}.facades` + WHERE borough IS NOT NULL + """ + df = pandas_gbq.read_gbq( + query, + project_id=PROJECT_ID, + credentials=get_credentials(), + progress_bar_type=None, + dtypes={"borough": "str", "filing_status": "str"}, + ) + df["filing_date"] = pd.to_datetime(df["filing_date"], errors="coerce") + return df + + +# ── Load all data ───────────────────────────────────────────────────────────── +with st.spinner("Loading data..."): + df_summary = load_buildings_summary() + df_new = load_new_buildings() + df_other = load_other_permits() + df_facades = load_facades() + +nyc_geo = get_geojson() +df_other_2025 = df_other[df_other["permit_date"] >= "2025-01-01"] + +# ── Top metrics ─────────────────────────────────────────────────────────────── +total_unsafe = ( + len(df_facades[df_facades["filing_status"].str.contains("UNSAFE", na=False)]) + if not df_facades.empty + else 0 +) + +col1, col2, col3, col4 = st.columns(4) +col1.metric( + label="Total Buildings (end of 2025)", + value=f"{int(df_summary['total_buildings'].sum()):,}", + border=True, +) +col2.metric( + label="New Building Jobs (Jan 2025+)", + value=f"{len(df_new):,}", + border=True, +) +col3.metric( + label="Other Permits (Jan 2025+)", + value=f"{len(df_other_2025):,}", + border=True, +) +col4.metric( + label="Unsafe Facade Filings", + value=f"{total_unsafe:,}", + border=True, +) + +st.divider() + +# ── Tabs ────────────────────────────────────────────────────────────────────── +tab1, tab2, tab3, tab4 = st.tabs( + [ + "🏙️ Total Buildings", + "🏗️ New Building Jobs", + "🔨 Other Building Jobs", + "🔍 Facade Inspection (FISP)", + ] +) + + +# ══════════════════════════════════════════════════════════════════════════════ +# TAB 1 — TOTAL BUILDINGS IN NYC (end of 2025) +# ══════════════════════════════════════════════════════════════════════════════ +with tab1: + st.subheader("Total Buildings in NYC (end of 2025)") + st.caption("Source: NYC Building Footprints (5zhs-2jue) — cnstrct_yr ≤ 2025") + + boro_summary = df_summary.groupby("borough")["total_buildings"].sum().reset_index() + boro_summary["Borough"] = boro_summary["borough"].str.title() + + col1, col2 = st.columns(2) + with col1: + fig_map = px.choropleth_mapbox( + boro_summary, + geojson=nyc_geo, + locations="Borough", + featureidkey="properties.BoroName", + color="total_buildings", + color_continuous_scale="Blues", + mapbox_style="carto-positron", + zoom=9.5, + center={"lat": 40.7128, "lon": -74.0060}, + title="Total Buildings by Borough", + hover_name="Borough", + labels={"total_buildings": "Buildings"}, + ) + fig_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_map, use_container_width=True) + with col2: + fig_bar = px.bar( + boro_summary.sort_values("total_buildings", ascending=False), + x="Borough", + y="total_buildings", + title="Total Buildings by Borough", + color="Borough", + color_discrete_sequence=px.colors.qualitative.Dark24_r, + labels={"total_buildings": "Buildings"}, + ) + st.plotly_chart(fig_bar, use_container_width=True) + + min_construct_yr = 1900 + max_construct_yr = 2025 + st.subheader("Buildings Constructed Per Year") + df_yr = ( + df_summary[ + (df_summary["cnstrct_yr"] >= min_construct_yr) + & (df_summary["cnstrct_yr"] <= max_construct_yr) + ] + .groupby(["cnstrct_yr", "borough"])["total_buildings"] + .sum() + .reset_index() + .rename(columns={"cnstrct_yr": "Year", "total_buildings": "Buildings"}) + ) + fig_yr = px.line( + df_yr, + x="Year", + y="Buildings", + color="borough", + title="Buildings Constructed Per Year by Borough", + labels={"borough": "Borough"}, + ) + st.plotly_chart(fig_yr, use_container_width=True) + + +# ══════════════════════════════════════════════════════════════════════════════ +# TAB 2 — NEW BUILDING JOBS (Jan 2025+) +# ══════════════════════════════════════════════════════════════════════════════ +with tab2: + st.subheader("New Building Jobs (January 2025 onwards)") + st.caption("Source: DOB Permit Issuance (ipu4-2q9a) — job_type = NB") + + if df_new.empty: + st.info("No new building permits found from January 2025.") + else: + new_by_boro = ( + df_new["borough"] + .value_counts() + .reset_index() + .rename(columns={"borough": "Borough", "count": "New Buildings"}) + ) + new_by_boro["Borough"] = new_by_boro["Borough"].str.title() + + col1, col2 = st.columns(2) + with col1: + fig_new_map = px.choropleth_mapbox( + new_by_boro, + geojson=nyc_geo, + locations="Borough", + featureidkey="properties.BoroName", + color="New Buildings", + color_continuous_scale="Greens", + mapbox_style="carto-positron", + zoom=9.5, + center={"lat": 40.7128, "lon": -74.0060}, + title="New Building Permits by Borough", + hover_name="Borough", + ) + fig_new_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_new_map, use_container_width=True) + with col2: + fig_new_bar = px.bar( + new_by_boro.sort_values("New Buildings", ascending=False), + x="Borough", + y="New Buildings", + title="New Building Permits by Borough", + color="Borough", + color_discrete_sequence=px.colors.qualitative.Dark24_r, + ) + st.plotly_chart(fig_new_bar, use_container_width=True) + + df_new_coords = df_new.dropna(subset=["latitude", "longitude"]) + if not df_new_coords.empty: + st.subheader("Where Are New Buildings Being Built?") + fig_scatter = px.scatter_mapbox( + df_new_coords, + lat="latitude", + lon="longitude", + color="borough", + mapbox_style="carto-positron", + zoom=10, + center={"lat": 40.7128, "lon": -74.0060}, + title="New Building Permit Locations", + opacity=0.6, + hover_data={"borough": True, "status": True, "permit_date": True}, + ) + fig_scatter.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_scatter, use_container_width=True) + + df_new_monthly = ( + df_new.groupby(df_new["permit_date"].dt.to_period("M")).size().reset_index(name="Count") + ) + df_new_monthly["permit_date"] = df_new_monthly["permit_date"].dt.to_timestamp() + fig_trend = px.bar( + df_new_monthly, + x="permit_date", + y="Count", + title="New Building Permits Per Month (Jan 2025+)", + labels={"permit_date": "Month"}, + color_discrete_sequence=["#1D9E75"], + ) + st.plotly_chart(fig_trend, use_container_width=True) + + +# ══════════════════════════════════════════════════════════════════════════════ +# TAB 3 — OTHER BUILDING JOBS +# Overview: Jan 2025+ | Detail: last 12 months (from 1_Bulding_Permit.py) +# ══════════════════════════════════════════════════════════════════════════════ +with tab3: + st.subheader("Other Building Jobs") + st.caption( + "Source: DOB NOW (rbx6-tga4) + DOB Permit Issuance (ipu4-2q9a) — " + "all permit types except New Building" + ) + + if df_other.empty: + st.info("No other permits found.") + else: + # overview Jan 2025+ + st.markdown("#### Overview (January 2025 onwards)") + + other_by_boro_2025 = ( + df_other_2025["borough"] + .value_counts() + .reset_index() + .rename(columns={"borough": "Borough", "count": "Permits"}) + ) + other_by_boro_2025["Borough"] = other_by_boro_2025["Borough"].str.title() + + by_type_2025 = ( + df_other_2025["permit_type_desc"] + .fillna(df_other_2025["permit_type"]) + .value_counts() + .head(10) + .reset_index() + .rename(columns={"permit_type_desc": "Permit Type", "count": "Count"}) + ) + + col1, col2 = st.columns(2) + with col1: + fig_other_map = px.choropleth_mapbox( + other_by_boro_2025, + geojson=nyc_geo, + locations="Borough", + featureidkey="properties.BoroName", + color="Permits", + color_continuous_scale="Oranges", + mapbox_style="carto-positron", + zoom=9.5, + center={"lat": 40.7128, "lon": -74.0060}, + title="Other Permits by Borough (Jan 2025+)", + hover_name="Borough", + ) + fig_other_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_other_map, use_container_width=True) + with col2: + fig_type = px.bar( + by_type_2025, + x="Count", + y="Permit Type", + orientation="h", + title="Top 10 Permit Types (Jan 2025+)", + color="Count", + color_continuous_scale="Oranges", + ) + fig_type.update_layout(yaxis={"categoryorder": "total ascending"}) + st.plotly_chart(fig_type, use_container_width=True) + + df_other_coords = df_other_2025.dropna(subset=["latitude", "longitude"]) + if not df_other_coords.empty: + st.subheader("Where Are Permits Being Filed? (Jan 2025+)") + fig_other_scatter = px.scatter_mapbox( + df_other_coords, + lat="latitude", + lon="longitude", + color="permit_type_desc", + mapbox_style="carto-positron", + zoom=10, + center={"lat": 40.7128, "lon": -74.0060}, + title="Permit Locations", + opacity=0.5, + hover_data={"borough": True, "permit_type_desc": True, "status": True}, + ) + fig_other_scatter.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_other_scatter, use_container_width=True) + + st.divider() + + # detail last 12 months + st.markdown("#### Detailed View (Last 12 Months)") + st.success(f"Loaded {len(df_other):,} rows (last 12 months)") + + date_col = first_column(df_other, ["permit_date"]) + borough_col = "borough" if "borough" in df_other.columns else None + type_col = first_column( + df_other, ["permit_type_desc", "permit_type", "work_type", "job_type"] + ) + status_col = first_column(df_other, ["status", "permit_status"]) + + df_detail = filter_last_12_months(df_other, date_col=date_col) + + with st.expander("Set Filters", expanded=False): + if borough_col: + borough_options = sorted( + df_detail[borough_col].dropna().astype(str).unique().tolist() + ) + selected_borough = st.multiselect( + "Borough", + borough_options, + default=borough_options, + key="t3_boro", + ) + else: + selected_borough = None + + if type_col: + type_options = sorted(df_detail[type_col].dropna().astype(str).unique().tolist()) + selected_types = st.multiselect( + type_col.replace("_", " ").title(), + type_options, + default=type_options, + key="t3_type", + ) + else: + selected_types = None + + if status_col: + status_options = sorted( + df_detail[status_col].dropna().astype(str).unique().tolist() + ) + selected_status = st.multiselect( + "Permit Status", + status_options, + default=status_options, + key="t3_status", + ) + else: + selected_status = None + + df_filtered = apply_filter( + df_detail, + borough_col=borough_col, + selected_borough=selected_borough, + type_col=type_col, + selected_types=selected_types, + status_col=status_col, + selected_status=selected_status, + ) + st.caption(f"Filtered rows: {len(df_filtered):,}") + + bucket = st.selectbox( + "Time bucket", + ["Monthly", "Weekly", "Daily"], + index=0, + key="bucket_permits", + ) + freq = {"Monthly": "MS", "Weekly": "W-MON", "Daily": "D"}[bucket] + + if not df_filtered.empty and date_col in df_filtered.columns: + df_filtered[date_col] = pd.to_datetime(df_filtered[date_col], errors="coerce") + end_period = df_filtered[date_col].max() + offsets_map = { + "Monthly": pd.DateOffset(months=1), + "Weekly": pd.DateOffset(weeks=1), + "Daily": pd.DateOffset(days=1), + } + offsets = offsets_map[bucket] + period_label = bucket.lower().rstrip("ly") + start_period = end_period - offsets + start_prev = start_period - offsets + + current = df_filtered[df_filtered[date_col] > start_period] + previous = df_filtered[ + (df_filtered[date_col] > start_prev) & (df_filtered[date_col] <= start_period) + ] + + current_total = len(current) + previous_total = len(previous) + total_delta = ( + f"{((current_total - previous_total) / previous_total * 100):+.1f}%" + if previous_total > 0 + else None + ) + + if borough_col and borough_col in df_filtered.columns: + current_boro = current[borough_col].value_counts() + previous_boro = previous[borough_col].value_counts() + top_boro = current_boro.idxmax() if not current_boro.empty else "N/A" + top_boro_count = int(current_boro.max()) if not current_boro.empty else 0 + prev_boro_count = int(previous_boro.get(top_boro, 0)) + boro_delta = f"{top_boro_count - prev_boro_count:+,}" + else: + top_boro, boro_delta = "N/A", None + + if type_col and type_col in df_filtered.columns: + current_type = current[type_col].value_counts() + previous_type = previous[type_col].value_counts() + top_type = current_type.idxmax() if not current_type.empty else "N/A" + top_type_count = int(current_type.max()) if not current_type.empty else 0 + prev_type_count = int(previous_type.get(top_type, 0)) + type_delta = ( + f"{((top_type_count - prev_type_count) / prev_type_count * 100):+.1f}%" + if prev_type_count > 0 + else None + ) + else: + top_type, type_delta = "N/A", None + + st.subheader(f"Summary — Current {bucket} Period") + col1, col2, col3 = st.columns(3) + col1.metric( + label=f"Total Permits (vs previous {period_label})", + value=f"{current_total:,}", + delta=total_delta, + border=True, + ) + col2.metric( + label=f"Borough with Highest Permits (vs previous {period_label})", + value=top_boro, + delta=boro_delta, + border=True, + ) + col3.metric( + label=f"Most Common Work Type (vs previous {period_label})", + value=top_type, + delta=type_delta, + border=True, + ) + + if not df_filtered.empty and borough_col: + boro_counts = ( + df_filtered[borough_col] + .value_counts() + .reset_index() + .rename(columns={borough_col: "Borough", "count": "Count"}) + ) + boro_counts["Borough"] = boro_counts["Borough"].str.strip().str.title() + fig_detail_map = px.choropleth_mapbox( + boro_counts, + geojson=nyc_geo, + locations="Borough", + featureidkey="properties.BoroName", + color="Count", + color_continuous_scale="Reds", + mapbox_style="carto-positron", + zoom=9.5, + center={"lat": 40.7128, "lon": -74.0060}, + title="Number of Permits by Borough", + hover_name="Borough", + hover_data={"Count": True}, + ) + fig_detail_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_detail_map, use_container_width=True) + else: + st.info("No data to display") + + ts = permit_timeseries_by_borough( + df_filtered, + date_col=date_col, + borough_col=borough_col or "borough", + status_col=None, + freq=freq, + ) + if ts.empty: + st.info("No rows to visualize") + else: + fig_ts = px.line( + ts, + x="Period", + y="Count", + color="Borough", + markers=True, + title="Building Job Permits by Borough Over Time", + ) + st.plotly_chart(fig_ts, use_container_width=True) + + +# ══════════════════════════════════════════════════════════════════════════════ +# TAB 4 — FACADE INSPECTION (FISP) +# ══════════════════════════════════════════════════════════════════════════════ +with tab4: + st.subheader("Facade Inspection Safety Program (FISP)") + st.caption( + "Buildings taller than 6 stories must be inspected every 5 years. " + "Current cycle: Cycle 10 (2025–2030). " + "Classifications: Safe · SWARMP (needs repair within 5 years) · Unsafe." + ) + + if df_facades.empty: + st.info("No facade inspection data found.") + else: + unsafe_count = len(df_facades[df_facades["filing_status"].str.contains("UNSAFE", na=False)]) + swarmp_count = len(df_facades[df_facades["filing_status"].str.contains("SWARMP", na=False)]) + safe_count = len(df_facades[df_facades["filing_status"].str.contains("SAFE", na=False)]) + + col1, col2, col3 = st.columns(3) + col1.metric(label="Safe Filings", value=f"{safe_count:,}", border=True) + col2.metric(label="SWARMP Filings", value=f"{swarmp_count:,}", border=True) + col3.metric(label="Unsafe Filings", value=f"{unsafe_count:,}", border=True) + + # pie chart + status_counts = ( + df_facades["filing_status"] + .fillna("UNKNOWN") + .value_counts() + .reset_index() + .rename(columns={"filing_status": "Status", "count": "Count"}) + ) + + col1, col2 = st.columns(2) + with col1: + fig_pie = px.pie( + status_counts, + names="Status", + values="Count", + title="Facade Filing Status (All Cycles)", + color="Status", + color_discrete_map={ + "SAFE": "#639922", + "SWARMP": "#BA7517", + "UNSAFE": "#E24B4A", + "NO REPORT FILED": "#888780", + }, + ) + st.plotly_chart(fig_pie, use_container_width=True) + + with col2: + unsafe_by_boro = ( + df_facades[df_facades["filing_status"].str.contains("UNSAFE", na=False)]["borough"] + .value_counts() + .reset_index() + .rename(columns={"borough": "Borough", "count": "Unsafe Filings"}) + ) + unsafe_by_boro["Borough"] = unsafe_by_boro["Borough"].str.title() + fig_unsafe = px.bar( + unsafe_by_boro.sort_values("Unsafe Filings", ascending=False), + x="Borough", + y="Unsafe Filings", + title="Unsafe Facade Filings by Borough", + color="Borough", + color_discrete_sequence=px.colors.qualitative.Dark24_r, + ) + st.plotly_chart(fig_unsafe, use_container_width=True) + + # at-risk map + fisp_by_boro = ( + df_facades[df_facades["filing_status"].str.contains("UNSAFE|SWARMP", na=False)] + .groupby("borough")["filing_status"] + .count() + .reset_index() + .rename(columns={"borough": "Borough", "filing_status": "At Risk Filings"}) + ) + fisp_by_boro["Borough"] = fisp_by_boro["Borough"].str.title() + + fig_fisp_map = px.choropleth_mapbox( + fisp_by_boro, + geojson=nyc_geo, + locations="Borough", + featureidkey="properties.BoroName", + color="At Risk Filings", + color_continuous_scale="Reds", + mapbox_style="carto-positron", + zoom=9.5, + center={"lat": 40.7128, "lon": -74.0060}, + title="At Risk Facades (SWARMP + Unsafe) by Borough", + hover_name="Borough", + hover_data={"At Risk Filings": True}, + ) + fig_fisp_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_fisp_map, use_container_width=True) + + # trend by cycle + if "cycle" in df_facades.columns: + cycle_counts = ( + df_facades.groupby(["cycle", "filing_status"]).size().reset_index(name="Count") + ) + fig_cycle = px.bar( + cycle_counts, + x="cycle", + y="Count", + color="filing_status", + title="Facade Filing Status by Cycle", + barmode="group", + color_discrete_map={ + "SAFE": "#639922", + "SWARMP": "#BA7517", + "UNSAFE": "#E24B4A", + }, + labels={"filing_status": "Status", "cycle": "Cycle"}, + ) + st.plotly_chart(fig_cycle, use_container_width=True) + +st.caption(f"Page loaded in {time.time() - start_time:.2f} seconds") diff --git a/pages/1_Bulding Permit.py b/pages/1_Bulding Permit.py deleted file mode 100644 index 248b208..0000000 --- a/pages/1_Bulding Permit.py +++ /dev/null @@ -1,295 +0,0 @@ -from __future__ import annotations - -import time -from contextlib import contextmanager -from datetime import datetime, timedelta - -import pandas as pd -import pandas_gbq -import plotly.express as px -import requests -import streamlit as st -from google.oauth2 import service_account - -# temporary fix -import functions.permit_page as _pp # noqa: F401 -from functions.permit_page import ( - apply_filter, - first_column, - permit_timeseries_by_borough, -) - -st.set_page_config(page_title="NYC Building Job", layout="wide") -st.title("NYC Building Job (Last 12 Months)") - -project_id = "sipa-adv-c-cosmic-spaghetti" -dataset = "cosmic_spaghetti" -table = "permits" - - -# add page load time -@contextmanager -def display_load_time(): - start_time = time.time() - - try: - yield - finally: - elapsed = time.time() - start_time - st.caption(f"Page loaded in {elapsed:.2f} seconds") - - -# load geojson -@st.cache_data(ttl=86400, show_spinner=False) -def get_geojson(): - geojson_url = "https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson" - response = requests.get(geojson_url) - return response.json() - - -# load fromBQ -@st.cache_data(ttl=3600, show_spinner=False) -def get_permits(): - credentials = service_account.Credentials.from_service_account_info( - st.secrets["gcp_service_account"], scopes=["https://www.googleapis.com/auth/bigquery"] - ) - - one_year_ago = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") - - # sql filter - query = f""" - SELECT borough, - issued_date, - work_type, - permit_status, - community_board - FROM `{project_id}.{dataset}.{table}` - WHERE DATE(issued_date) >= '{one_year_ago}' - AND borough IS NOT NULL - AND issued_date IS NOT NULL - LIMIT 10000""" - - df = pandas_gbq.read_gbq( - query, - project_id=project_id, - credentials=credentials, - dtypes={ - "borough": "str", - "work_type": "str", - "permit_status": "str", - "community_board": "str", - }, - ) - df["issued_date"] = pd.to_datetime(df["issued_date"], errors="coerce") - df["year"] = df["issued_date"].dt.year - return df - - -# main page with load time -with display_load_time(): - with st.spinner("Loading permits from BigQuery..."): - df = get_permits() - - if df.empty: - st.error("No rows returned from BigQuery") - st.stop() - - # finding columns for 12 months filter - date_col = first_column(df, ["issued_date", "approved_date", "expired_date"]) - if not date_col: - st.error("Could not find suitable date columns for filtering") - st.write("Columns found:", df.columns.tolist()) - st.stop() - - # st.success(f"Loaded {len(df):,} rows (last 12 months) using `{date_col}`") - # st.dataframe(df.head(5), use_container_width=True) - # st.write("Loaded DF columns:", df.columns.tolist()) - - borough_col = "borough" if "borough" in df.columns else None - type_col = first_column(df, ["permit_type", "work_type", "job_type", "permit_subtype"]) - status_col = first_column(df, ["permit_status"]) - - # filters and metrics - st.subheader("Filters") - - with st.expander("Set Filters", expanded=False): - if borough_col: - borough_options = sorted(df[borough_col].dropna().astype(str).unique().tolist()) - selected_borough = st.multiselect("Borough", borough_options, default=borough_options) - else: - selected_borough = None - st.info("No borough column found to filter by") - - if type_col: - type_options = sorted(df[type_col].dropna().astype(str).unique().tolist()) - selected_types = st.multiselect( - type_col.replace("_", " ").title(), type_options, default=type_options - ) - else: - selected_types = None - st.info("No type column found to filter by") - - if status_col: - status_options = sorted(df[status_col].dropna().astype(str).unique().tolist()) - selected_status = st.multiselect( - "Permit Status", status_options, default=status_options - ) - else: - selected_status = None - - df_filtered = apply_filter( - df, - borough_col=borough_col, - selected_borough=selected_borough, - type_col=type_col, - selected_types=selected_types, - status_col=status_col, - selected_status=selected_status, - ) - - st.caption(f"Filtered rows: {len(df_filtered):,}") - # st.dataframe(df_filtered.head(5), use_container_width=True) - - st.subheader("Approved and Issued Building Job Permits by Borough Over Time") - - bucket = st.selectbox( - "Time bucket", ["Monthly", "Weekly", "Daily"], index=0, key="bucket_permits" - ) # noqa: E501 - freq = {"Monthly": "MS", "Weekly": "W-MON", "Daily": "D"}[bucket] - - # Metrics - - if not df_filtered.empty and date_col in df_filtered.columns: - df_filtered[date_col] = pd.to_datetime(df_filtered[date_col], errors="coerce") - - end_period = df_filtered[date_col].max() - offsets_map = { - "Monthly": pd.DateOffset(months=1), - "Weekly": pd.DateOffset(weeks=1), - "Daily": pd.DateOffset(days=1), - } - offsets = offsets_map[bucket] - period_label = bucket.lower().rstrip("ly") - - start_period = end_period - offsets - start_prev = start_period - offsets - - current = df_filtered[df_filtered[date_col] > start_period] - previous = df_filtered[ - (df_filtered[date_col] > start_prev) & (df_filtered[date_col] <= start_period) - ] - - # a. total permit metrics - current_total = len(current) - previous_total = len(previous) - if previous_total > 0: - total_percentage = ((current_total - previous_total) / previous_total) * 100 - total_delta = f"{total_percentage:+.1f}%" - else: - total_delta = None - - # b. borough w/highest permt - if borough_col and borough_col in df_filtered.columns: - current_boro = current[borough_col].value_counts() - previous_boro = previous[borough_col].value_counts() - top_boro = current_boro.idxmax() if not current_boro.empty else "N/A" - top_boro_count = int(current_boro.max()) if not current_boro.empty else 0 - previous_boro_count = int(previous_boro.get(top_boro, 0)) - boro_delta = f"{top_boro_count - previous_boro_count:+,}" - else: - top_boro = "N/A" - top_boro_count = 0 - boro_delta = None - - # c. work type with highest permit - if type_col and type_col in df_filtered.columns: - current_type = current[type_col].value_counts() - previous_type = previous[type_col].value_counts() - top_type = current_type.idxmax() if not current_type.empty else "N/A" - top_type_count = int(current_type.max()) if not current_type.empty else 0 - previous_type_count = int(previous_type.get(top_type, 0)) - if previous_type_count > 0: - type_pct = ((top_type_count - previous_type_count) / previous_type_count) * 100 - type_delta = f"{type_pct:+.1f}%" - else: - type_delta = None - else: - top_type = "N/A" - top_type_count = 0 - type_delta = None - - # show metric - st.subheader(f"Summary-Current {bucket} Period") - col1, col2, col3 = st.columns(3) - col1.metric( - label=f"Total Permits (compared to previous {period_label})", - value=f"{current_total:,}", - delta=total_delta, - border=True, - ) - col2.metric( - label=f"Borough with Highest Permits (compared to previous {period_label})", - value=top_boro, - delta=boro_delta, - border=True, - ) - - col3.metric( - label=f"Most Work Type (compared to previous {period_label})", - value=top_type, - delta=type_delta, - border=True, - ) - # st.write(df_filtered[borough_col].unique()) - - st.subheader("Permits by Borough") - if not df_filtered.empty and borough_col: - nyc_geo = get_geojson() - - boro_counts = ( - df_filtered[borough_col] - .value_counts() - .reset_index() - .rename(columns={borough_col: "Borough", "count": "Count"}) - ) - boro_counts["Borough"] = boro_counts["Borough"].str.strip().str.title() - - fig_map = px.choropleth_mapbox( - boro_counts, - geojson=nyc_geo, - locations="Borough", - featureidkey="properties.BoroName", - color="Count", - color_continuous_scale="Reds", - mapbox_style="carto-positron", - zoom=9.5, - center={"lat": 40.7128, "lon": -74.0060}, - title="Number of Permits by Borough", - hover_name="Borough", - hover_data={"Count": True}, - ) - fig_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) - st.plotly_chart(fig_map, use_container_width=True) - else: - st.info("No data to display") - - ts = permit_timeseries_by_borough( - df_filtered, - date_col=date_col, - borough_col=borough_col or "borough", - status_col=None, - freq=freq, - ) - - if ts.empty: - st.info("No rows to visualize") - else: - fig = px.line( - ts, - x="Period", - y="Count", - color="Borough", - markers=True, - title="Approved and Issued Building Job Permits by Borough Over Time", - ) - st.plotly_chart(fig, use_container_width=True) diff --git a/pages/3_Building_Complaints.py b/pages/3_Building_Complaints.py new file mode 100644 index 0000000..03ae9c2 --- /dev/null +++ b/pages/3_Building_Complaints.py @@ -0,0 +1,277 @@ +from __future__ import annotations + +import time +from contextlib import contextmanager + +import pandas as pd +import pandas_gbq +import plotly.express as px +import requests +import streamlit as st +from google.oauth2 import service_account + +from complaint_categories import COMPLAINT_CATEGORY_MAP + +st.set_page_config(page_title="NYC Building Complaints", layout="wide") +st.title("NYC Building Complaints Dashboard") + +# Configuration +PROJECT_ID = "sipa-adv-c-cosmic-spaghetti" +DATASET = "cosmic_spaghetti" +TABLE = "complaints" + +date_col = "date_entered" +borough_col = "borough" +category_col = "complaint_category" +status_col = "status" + + +# Page load time context manager +@contextmanager +def display_load_time(): + start_time = time.time() + try: + yield + finally: + elapsed = time.time() - start_time + st.caption(f"Page loaded in {elapsed:.2f} seconds") + + +# GeoJSON loader with caching +@st.cache_data(ttl=86400, show_spinner=False) +def get_geojson(): + geojson_url = "https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson" + response = requests.get(geojson_url) + return response.json() + + +# Load from BQ +@st.cache_data(ttl=3600, show_spinner=False) +def load_complaints() -> pd.DataFrame: + credentials = service_account.Credentials.from_service_account_info( + st.secrets["gcp_service_account"], + scopes=["https://www.googleapis.com/auth/bigquery"], + ) + query = f""" + SELECT + community_board, + date_entered, + complaint_category, + status + FROM `{PROJECT_ID}.{DATASET}.complaints` + WHERE date_entered IS NOT NULL + LIMIT 10000 + """ + df = pandas_gbq.read_gbq( + query, + project_id=PROJECT_ID, + credentials=credentials, + progress_bar_type=None, + dtypes={ + "community_board": "str", + "complaint_category": "str", + "status": "str", + }, + ) + df["date_entered"] = pd.to_datetime(df["date_entered"], errors="coerce") + + # extract borough from community_board (first digit = borough code) + borough_map = { + "1": "Manhattan", + "2": "Bronx", + "3": "Brooklyn", + "4": "Queens", + "5": "Staten Island", + } + df["borough"] = df["community_board"].str[0].map(borough_map).fillna("Unknown") + df["complaint_desc"] = ( + df["complaint_category"].map(COMPLAINT_CATEGORY_MAP).fillna(df["complaint_category"]) + ) + return df + + +# Main page +with display_load_time(): + with st.spinner("Loading complaints data from BigQuery..."): + df = load_complaints() + + if df.empty: + st.error("No rows returned from BigQuery") + st.stop() + + st.success(f"Loaded {len(df):,} rows (last 12 months)") + + # ── Filters ─────────────────────────────────────────────────────────────── + st.subheader("Filters") + + with st.expander("Set Filters", expanded=False): + borough_options = sorted(df[borough_col].dropna().astype(str).unique().tolist()) + selected_borough = st.multiselect("Borough", borough_options, default=borough_options) + + category_options = sorted(df[category_col].dropna().astype(str).unique().tolist()) + selected_category = st.multiselect( + "Complaint Category", category_options, default=category_options + ) + + status_options = sorted(df[status_col].dropna().astype(str).unique().tolist()) + selected_status = st.multiselect("Status", status_options, default=status_options) + + df_filtered = df.copy() + if selected_borough: + df_filtered = df_filtered[df_filtered[borough_col].astype(str).isin(selected_borough)] + if selected_category: + df_filtered = df_filtered[df_filtered[category_col].astype(str).isin(selected_category)] + if selected_status: + df_filtered = df_filtered[df_filtered[status_col].astype(str).isin(selected_status)] + + st.caption(f"Filtered rows: {len(df_filtered):,}") + + # summary metrics + bucket = st.selectbox( + "Time bucket", ["Monthly", "Weekly", "Daily"], index=0, key="complaints_bucket" + ) + + st.subheader(f"Summary of Complaints — Current {bucket}") + + if not df_filtered.empty: + end_period = df_filtered[date_col].max() + offsets_map = { + "Monthly": pd.DateOffset(months=1), + "Weekly": pd.DateOffset(weeks=1), + "Daily": pd.DateOffset(days=1), + } + offsets = offsets_map[bucket] + period_label = bucket.lower().rstrip("ly") + + start_period = end_period - offsets + start_prev = start_period - offsets + + current = df_filtered[df_filtered[date_col] > start_period] + previous = df_filtered[ + (df_filtered[date_col] > start_prev) & (df_filtered[date_col] <= start_period) + ] + + # total complaints + current_total = len(current) + previous_total = len(previous) + if previous_total > 0: + total_pct = ((current_total - previous_total) / previous_total) * 100 + total_delta = f"{total_pct:+.1f}%" + else: + total_delta = None + + # borough with most complaints + current_boro = current[borough_col].value_counts() + previous_boro = previous[borough_col].value_counts() + top_boro = current_boro.idxmax() if not current_boro.empty else "N/A" + top_boro_count = int(current_boro.max()) if not current_boro.empty else 0 + prev_boro_count = int(previous_boro.get(top_boro, 0)) + boro_delta = f"{top_boro_count - prev_boro_count:+,}" + + # most common complaint category + current_cat = current[category_col].value_counts() + top_cat = current_cat.idxmax() if not current_cat.empty else "N/A" + + col1, col2, col3 = st.columns(3) + col1.metric( + label=f"Total Complaints (vs previous {period_label})", + value=f"{current_total:,}", + delta=total_delta, + border=True, + ) + col2.metric( + label="Borough with Most Complaints", + value=top_boro, + delta=boro_delta, + border=True, + ) + col3.metric( + label="Most Common Category", + value=top_cat, + border=True, + ) + + # complaints by borough + st.subheader("Complaints by Borough") + + counts = df_filtered[borough_col].fillna("Missing").value_counts().reset_index() + counts.columns = ["Borough", "Total Complaints"] + + fig_bar = px.bar( + counts, + x="Borough", + y="Total Complaints", + title="NYC Building Complaints by Borough", + color="Borough", + color_discrete_sequence=px.colors.qualitative.Dark24_r, + ) + st.plotly_chart(fig_bar, use_container_width=True) + + # complaints by borough (map) + st.subheader("Complaints by Borough (Map)") + + if not df_filtered.empty: + nyc_geo = get_geojson() + borough_counts = df_filtered[borough_col].value_counts().reset_index() + borough_counts.columns = ["Borough", "Complaints"] + borough_counts["Borough"] = borough_counts["Borough"].str.strip().str.title() + + fig_map = px.choropleth_mapbox( + borough_counts, + geojson=nyc_geo, + locations="Borough", + featureidkey="properties.BoroName", + color="Complaints", + color_continuous_scale="Reds", + mapbox_style="carto-positron", + zoom=9.5, + center={"lat": 40.7128, "lon": -74.0060}, + title="Building Complaints by Borough (Map)", + hover_name="Borough", + hover_data={"Complaints": True}, + ) + fig_map.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) + st.plotly_chart(fig_map, use_container_width=True) + + # Complaint categories + st.subheader("Top 10 Complaint Categories") + + top_categories = ( + df_filtered["complaint_desc"].fillna("Unknown").value_counts().head(10).reset_index() + ) + top_categories.columns = ["Category", "Count"] + + fig_cat = px.bar( + top_categories, + x="Count", + y="Category", + orientation="h", + title="Top 10 Complaint Categories", + color="Count", + color_continuous_scale="Reds", + ) + fig_cat.update_layout(yaxis={"categoryorder": "total ascending"}) + st.plotly_chart(fig_cat, use_container_width=True) + + # Complaints over time + st.subheader("Complaints Over Time") + + bucket_ts = st.selectbox( + "Time bucket", ["Monthly", "Weekly", "Daily"], index=0, key="complaints_timeseries" + ) + freq_map = {"Monthly": "M", "Weekly": "W-MON", "Daily": "D"} + freq = freq_map[bucket_ts] + + df_ts = df_filtered.copy() + df_ts["Period"] = df_ts[date_col].dt.to_period(freq).dt.to_timestamp() + ts = df_ts.groupby(["Period", borough_col]).size().reset_index(name="Complaints") + + fig_ts = px.line( + ts, + x="Period", + y="Complaints", + color=borough_col, + markers=True, + title="Building Complaints by Borough Over Time", + ) + st.plotly_chart(fig_ts, use_container_width=True) diff --git a/pages/4_Findings.md b/pages/4_Findings.md new file mode 100644 index 0000000..e69de29