Traffic Assignment Validation

1 Overview

This document validates the Travel Demand Model’s traffic assignment by comparing modeled volumes against observed counts from Continuous Count Stations (CCS).

The process is divided into modular steps to support a “Minimal Documentation” workflow: 1. Bridge Creation: Mapping CCS Stations to Network Segments (Static). 2. Observed Data: Aggregating raw telemetry data (Static). 3. Model Data: Loading specific scenario results (Dynamic). 4. Validation: Merging the three sources for visualization.

2 Environment Setup

Import Standard Libraries

Show the code

# For Analysis
import numpy as np
import pandas as pd
import geopandas as gpd

# For Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import mapclassify

# misc
import os
import requests
from pathlib import Path
import importlib.util

from dotenv import load_dotenv

load_dotenv()

True

Setup Directories & Variables

Show the code

CRS_UTM = "EPSG:26912"

# Define Output Paths
intermediate_dir = Path("_intermediate")
output_dir = Path("_output")
raw_dir = Path("_data/raw")

# Create directories if they don't exist
intermediate_dir.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)
raw_dir.mkdir(parents=True, exist_ok=True)

print("Directories validated.")

Directories validated.

3 Helper Functions

Show the code

def fetch_github(
    url: str,
    mode: str = "private",
    token_env_var: str = "GITHUB_TOKEN"
) -> requests.Response:
    """
    Fetch content from GitHub repositories.

    Args:
        url: GitHub raw URL (e.g., https://raw.githubusercontent.com/...)
        mode: "public" for public repos, "private" for private repos requiring authentication
        token_env_var: Name of environment variable containing GitHub token (default: GITHUB_TOKEN)

    Returns:
        requests.Response object

    Raises:
        ValueError: If token is missing for private mode or invalid mode
        requests.HTTPError: If request fails
    """
    # Validate mode
    if mode not in ["public", "private"]:
        raise ValueError(f"mode must be 'public' or 'private', got '{mode}'")

    if mode == "public":
        response = requests.get(url, timeout=30)
    else:
        token = os.getenv(token_env_var)
        if not token:
            raise ValueError(
                f"GitHub token not found in environment variable '{token_env_var}'. "
                f"Check your .env file has: {token_env_var}=your_token_here"
            )

        headers = {
            'Authorization': f'token {token}',
            'Accept': 'application/vnd.github.v3.raw'
        }
        response = requests.get(url, headers=headers, timeout=30)

    response.raise_for_status()
    return response

Show the code

# Create function to read ArcGIS FeatureLayer or Table
def arc_read(url, where="1=1", outFields="*", outSR=4326, **kwargs):
    """
    Read an ArcGIS FeatureLayer or Table to a GeoDataFrame.

    Parameters:
    url (str): The ArcGIS REST service URL (e.g., ending in /FeatureServer/0)
    where (str): SQL WHERE clause for filtering. Default: "1=1"
    outFields (str): Comma-separated field names. Default: "*"
    outSR (int): Output spatial reference EPSG code. Default: 4326
    **kwargs: Additional query parameters passed to the ArcGIS REST API

    Returns:
    geopandas.GeoDataFrame: Spatial data from the service
    """
    # Ensure URL ends with /query
    if not url.endswith("/query"):
        url = url.rstrip("/") + "/query"

    # Build query parameters
    params = {
        "where": where,
        "outFields": outFields,
        "returnGeometry": "true",
        "outSR": outSR,
        "f": "geojson",
    }

    # Add any additional parameters
    params.update(kwargs)

    # Make request
    response = requests.get(url, params=params)
    response.raise_for_status()  # Good practice to check for HTTP errors

    # Read as GeoDataFrame
    # We use io.BytesIO to handle the response content safely for read_file
    import io

    return gpd.read_file(io.BytesIO(response.content), engine="pyogrio")

4 Step 1: Create Station-to-Network Bridge

Objective: Create a static reference file that links Station IDs to Network SEGIDs and attaches static network attributes (County, Functional Class, Truck %).

Load Network (Input Shapefile)

We load the raw input shapefile. We do not filter by model results here, ensuring the bridge is independent of any specific model run.

Show the code

# Load Segments (Input)
# Source: https://github.com/WFRCAnalytics/WF-TDM-Documentation/tree/v10-calib/v10x/v1000/validation/data/5-assignhwy/seg_shp
gdf_segments = gpd.read_file(
    raw_dir / "Segments/WFv1000_Segments.shp",
    engine="pyogrio",
).to_crs(CRS_UTM)

# Ensure SEGID is string for consistent matching
gdf_segments["SEGID"] = gdf_segments["SEGID"].astype(str)

# Parse Route and Milepost from SEGID directly (Independent of Model Results)
temp_route = gdf_segments["SEGID"].str.split("_").str[0]
gdf_segments["MODEL_ROUTE"] = pd.to_numeric(temp_route, errors="coerce")
gdf_segments["MODEL_MP"] = gdf_segments["SEGID"].str.split("_").str[1].astype(float)

# Filter to valid network links for matching
gdf_network_clean = gdf_segments.dropna(subset=["MODEL_ROUTE", "MODEL_MP"]).copy()
gdf_network_clean["MODEL_ROUTE"] = gdf_network_clean["MODEL_ROUTE"].astype(int)

print(f"Loaded Network with {len(gdf_network_clean)} valid segments.")

Loaded Network with 8078 valid segments.

WF-TDM Region

Show the code

# Configuration
service_url = "https://services1.arcgis.com/taguadKoI1XFwivx/ArcGIS/rest/services/RegionalBoundaryComponents/FeatureServer/0"
gdb_path = raw_dir / "RegionalBoundary.gpkg"

# Download/Load logic
if not gdb_path.exists():
    print("Downloading Regional Boundaries...")
    gdf_download = arc_read(service_url)
    gdf_download.to_file(gdb_path, layer="RegionalBoundaryComponents", driver="GPKG")
else:
    print("Loading Regional Boundaries from cache...")

# Load and Project
gdf_region = (
    gpd.read_file(
        gdb_path,
        layer="RegionalBoundaryComponents",
        where="PlanOrg IN ('MAG MPO', 'WFRC MPO')",
    )
    .dissolve()
    .to_crs(CRS_UTM)
)

# Fix the holes and slivers in the regional boundary
gdf_region["geometry"] = gdf_region.buffer(1).buffer(-1)

Loading Regional Boundaries from cache...

Load CCS Stations

Show the code

# Configuration
service_url = "https://services.arcgis.com/pA2nEVnB6tquxgOW/ArcGIS/rest/services/CCS_Data_v2/FeatureServer/1"
gdb_path = raw_dir / "CCS_Data_v2.gpkg"

# Download/Load logic
if not gdb_path.exists():
    print("Downloading CCS Stations...")
    gdf_download = arc_read(service_url)
    if "SITEID" in gdf_download.columns:
        gdf_download["SITEID"] = gdf_download["SITEID"].astype("int32")
    gdf_download.to_file(gdb_path, layer="CCS_Data_v2", driver="GPKG")
else:
    print("Loading CCS Stations from cache...")

# Load and Project
gdf_ccs_locations = gpd.read_file(
    gdb_path,
    layer="CCS_Data_v2",
    # where="COUNTY IN ('Box Elder', 'Weber', 'Davis', 'Salt Lake', 'Utah')",
    mask=gdf_region,
).to_crs(CRS_UTM)

print(f"Total Stations Loaded (Region): {len(gdf_ccs_locations)}")

Loading CCS Stations from cache...
Total Stations Loaded (Region): 126

Run Spatial Matching Logic

Show the code

# 1. Define FC Mapping (UDOT Station FC -> Model FUNCGROUP)
# Add or refine these mappings based on your data
fc_map = {
    # Freeways
    "Urban Principal Arterial - Interstate": "Freeway",
    "Rural Principal Arterial - Interstate": "Freeway",
    "Urban Principal Arterial - Other Freeways": "Freeway",  # Default mapping
    # Arterials
    "Urban Principal Arterial - Other": "Arterial",
    "Urban Minor Arterial": "Arterial",
    "Rural Principal Arterial - Other": "Arterial",
    "Rural Minor Arterial": "Arterial",
    # Collector/Distributor
    "Urban Major Collector": "CD Road",
    "Urban Minor Collector": "CD Road",
    "Urban Collector": "CD Road",
    "Rural Major Collector": "CD Road",
    "Urban Local System": "CD Road",  # Fallback
}

Show the code

match_results = []
used_segids = set()

print("Starting Route & Group Constrained Spatial Matching...")

for idx, ccs_row in gdf_ccs_locations.iterrows():
    station_id = ccs_row["STATION"]
    target_route = ccs_row["ROUTE"]

    # Identify the Target Group based on Station FC
    # Default to "Total" if FC is missing or unknown
    station_fc = ccs_row.get("FC", None)
    target_group = fc_map.get(station_fc, "Total")

    # --- PASS 1: Route-Constrained Spatial Match ---
    route_segments = gdf_network_clean[gdf_network_clean["MODEL_ROUTE"] == target_route]

    matched_segid = None
    match_type = "No Match"

    if not route_segments.empty:
        # Calculate distance to all segments on this route
        distances = route_segments.distance(ccs_row["geometry"])

        # Pick the absolute closest segment on this route
        best_idx = distances.idxmin()
        best_match = route_segments.loc[best_idx]

        matched_segid = best_match["SEGID"]
        match_type = "Route Match (Spatial)"

    # Add valid match to used set
    if matched_segid:
        used_segids.add(matched_segid)

    match_results.append(
        {
            "STATION": station_id,
            "MATCHED_SEGID": matched_segid,
            "MATCHED_GROUP": target_group,  # <--- NEW: Store the Target Group
            "MATCH_TYPE": match_type,
            "geometry": ccs_row["geometry"],
        }
    )

# --- PASS 2: Global Spatial Fallback (Unrestricted) ---
# (Same as your original code, but ensure we add MATCHED_GROUP = "Total" for fallbacks)
for i, result in enumerate(match_results):
    if result["MATCHED_SEGID"] is None:
        station_geom = result["geometry"]
        available_segments = gdf_network_clean[
            ~gdf_network_clean["SEGID"].isin(used_segids)
        ]

        if not available_segments.empty:
            distances = available_segments.distance(station_geom)
            min_dist_idx = distances.idxmin()
            best_match = available_segments.loc[min_dist_idx]

            match_results[i]["MATCHED_SEGID"] = best_match["SEGID"]
            match_results[i]["MATCHED_GROUP"] = "Total"  # Fallback is always Total
            match_results[i]["MATCH_TYPE"] = "Global Fallback (Nearest)"
            used_segids.add(best_match["SEGID"])

# Create DataFrame
df_matches = pd.DataFrame(match_results).drop(columns=["geometry"])
print("Matching Complete. Summary:")
print(df_matches["MATCH_TYPE"].value_counts())

Starting Route & Group Constrained Spatial Matching...
Matching Complete. Summary:
MATCH_TYPE
Route Match (Spatial)        120
Global Fallback (Nearest)      6
Name: count, dtype: int64

Enrich Bridge & Export

We attach the static network attributes here.

Show the code

# 1. Define Static Attributes from Network
static_cols = ["SEGID", "CO_FIPS", "SUTRUCKS", "CUTRUCKS"]

# 2. Map County Names
fips_map = {3: "Box Elder", 11: "Davis", 35: "Salt Lake", 49: "Utah", 57: "Weber"}

# Create a non-spatial attribute table
df_network_attrs = pd.DataFrame(gdf_network_clean[static_cols]).copy()
df_network_attrs["COUNTY_NAME"] = (
    df_network_attrs["CO_FIPS"].map(fips_map).fillna("Other")
)

# 3. Rename Truck Columns for standardization
df_network_attrs = df_network_attrs.rename(
    columns={"SUTRUCKS": "PCT_SUT", "CUTRUCKS": "PCT_CUT"}
)

# 4. Merge Matches with Attributes
df_bridge = df_matches.merge(
    df_network_attrs[["SEGID", "COUNTY_NAME", "PCT_SUT", "PCT_CUT"]],
    left_on="MATCHED_SEGID",
    right_on="SEGID",
    how="left",
)

# 5. Export to Intermediate
bridge_path = intermediate_dir / "station_segid_bridge.csv"
df_bridge.to_csv(bridge_path, index=False)
print(f"Step 1 Complete: Bridge exported to {bridge_path}")

Step 1 Complete: Bridge exported to _intermediate\station_segid_bridge.csv

5 Step 2: Load Observed Counts

Objective: Fetch raw count data and aggregate it. This creates the “Fact Table A”.

Setup BigQuery

Show the code

# Setup BigQuery
response = fetch_github(
    'https://raw.githubusercontent.com/WFRCAnalytics/Resources/refs/heads/master/2-Python/global-functions/BigQuery.py',
    mode="public"
)
BigQuery = importlib.util.module_from_spec(importlib.util.spec_from_loader('BigQuery', loader=None))
exec(response.text, BigQuery.__dict__)
client = BigQuery.getBigQueryClient_Confidential2023UtahHTS()

pukar.bhandari
C:/Users/Pukar.Bhandari/.private/confidential-2023-utah-hts-5fd7ddd219a7.json

Query Continouos Count Data

Show the code

# Query
query = f"""
    SELECT
        STATION,
        CAST(ROUND(AVG(AM_SUM)) AS INT64) as AVG_AM_VOL,
        CAST(ROUND(AVG(MD_SUM)) AS INT64) as AVG_MD_VOL,
        CAST(ROUND(AVG(PM_SUM)) AS INT64) as AVG_PM_VOL,
        CAST(ROUND(AVG(EV_SUM)) AS INT64) as AVG_EV_VOL,
        CAST(ROUND(AVG(DAILY_SUM)) AS INT64) as AVG_DY_VOL
    FROM (
        SELECT
            STATION,
            DATE_ONLY,
            SUM(CASE WHEN HOUR BETWEEN 6 AND 8 THEN VOLUME ELSE 0 END) as AM_SUM,
            SUM(CASE WHEN HOUR BETWEEN 9 AND 14 THEN VOLUME ELSE 0 END) as MD_SUM,
            SUM(CASE WHEN HOUR BETWEEN 15 AND 17 THEN VOLUME ELSE 0 END) as PM_SUM,
            SUM(CASE WHEN HOUR >= 18 OR HOUR <= 5 THEN VOLUME ELSE 0 END) as EV_SUM,
            SUM(VOLUME) as DAILY_SUM
        FROM `wfrc-modeling-data.src_udot_continuous_count_2023.data`
        WHERE
            DATE_ONLY BETWEEN '2023-09-01' AND '2023-11-15'
            AND EXTRACT(DAYOFWEEK FROM DATE_ONLY) IN (3, 4, 5)
        GROUP BY STATION, DATE_ONLY
    )
    GROUP BY STATION
    ORDER BY STATION
"""

df_observed_counts = client.query(query).to_dataframe()

Export Intermediate Data

Show the code

# Export
obs_path = intermediate_dir / "observed_counts_raw.csv"
df_observed_counts.to_csv(obs_path, index=False)
print(f"Step 2 Complete: Observed data exported to {obs_path}")

Step 2 Complete: Observed data exported to _intermediate\observed_counts_raw.csv

6 Step 3: Load Model Results (Dynamic)

Objective: Load the specific scenario results (“Fact Table B”).

Show the code

# Source: https://github.com/WFRCAnalytics/WF-TDM-Documentation/tree/v10-calib/v10x/v1000/validation/data/5-assignhwy
df_segid_detail = pd.read_csv(raw_dir / "WFv1000_BY_2023_Summary_SEGID_Detailed.csv")

# Filter for Bi-Directional Only
# REMOVED: (df_segid_detail["FUNCGROUP"] == "Total")
# We keep ALL groups (Freeway, Arterial, CD Road, Total)
df_model_clean = df_segid_detail[(df_segid_detail["DIRECTION"] == "Both")].copy()

# Ensure SEGID match format
df_model_clean["SEGID"] = df_model_clean["SEGID"].astype(str)

print(f"Step 3 Complete: Loaded {len(df_model_clean)} model records.")

Step 3 Complete: Loaded 8924 model records.

7 Step 4: Process & Validation (Integration)

Objective: Combine Bridge + Observed + Modeled into the final Dashboard CSV.

This step is designed to run independently if the CSVs exist.

Export Observed Data

Show the code

# Bridge + Observed (Inner Join = Only Stations with Data)
df_valid_obs = df_bridge.merge(df_observed_counts, on="STATION", how="inner")

# Master Observed File Generated for validation
df_valid_obs.to_csv(output_dir / "observed_traffic_with_crosswalk.csv", index=False)

Merge Data

Show the code

# --- 4.1 Load Intermediates (Resiliency Check) ---
# If this notebook is running top-to-bottom, we have the dfs.
# If running as a standalone script later, we load from CSV.

if "df_valid_obs" not in locals():
    print("Loading Bridge from CSV...")
    df_valid_obs = pd.read_csv(output_dir / "observed_traffic_with_crosswalk.csv")

Show the code

# --- 4.2 Merge Data ---

# B. Add Modeled Volumes AND Context Attributes
# We pull FTCLASS and ATYPENAME from the Model Results now
model_cols_needed = [
    "SEGID",
    "FUNCGROUP",
    "AM_Vol",
    "MD_Vol",
    "PM_Vol",
    "EV_Vol",
    "DY_Vol",
    "FTCLASS",
    "ATYPENAME",
]

# Safety check: ensure these columns actually exist in the model output
available_cols = [c for c in model_cols_needed if c in df_model_clean.columns]
df_model_subset = df_model_clean[available_cols].copy()

# Join 1: Attempt to match exact FUNCGROUP (e.g., Station Freeway -> Model Freeway)
df_join_specific = df_valid_obs.merge(
    df_model_subset,
    left_on=["MATCHED_SEGID", "MATCHED_GROUP"],
    right_on=["SEGID", "FUNCGROUP"],
    how="inner",
)

# Identify which Stations were successfully matched
matched_stations = set(df_join_specific["STATION"])

# Filter remaining unmatched stations
df_remaining = df_valid_obs[~df_valid_obs["STATION"].isin(matched_stations)].copy()

# Join 2: Fallback to "Total" for remaining stations
# (e.g., Station is Arterial, but Model only has a 'Total' row for this simple segment)
df_remaining["FALLBACK_GROUP"] = "Total"
df_join_fallback = df_remaining.merge(
    df_model_subset,
    left_on=["MATCHED_SEGID", "FALLBACK_GROUP"],
    right_on=["SEGID", "FUNCGROUP"],
    how="inner",
)

# C. Combine Results
df_master = pd.concat([df_join_specific, df_join_fallback], ignore_index=True)

# Fill missing model volumes with 0 (assuming no flow if missing from summary)
vol_cols = ["AM_Vol", "MD_Vol", "PM_Vol", "EV_Vol", "DY_Vol"]
df_master[vol_cols] = df_master[vol_cols].fillna(0)

Pivot & Calculate Vehicles

Show the code

# --- 4.3 Pivot & Calculate Vehicles ---

# Define ID variables (Dimensions)
id_vars = [
    "STATION",
    "MATCHED_SEGID",
    "FTCLASS",
    "ATYPENAME",
    "COUNTY_NAME",
    "PCT_SUT",
    "PCT_CUT",
]

# A. Melt Observed (Wide to Long)
df_obs_long = df_master.melt(
    id_vars=id_vars,
    value_vars=["AVG_AM_VOL", "AVG_MD_VOL", "AVG_PM_VOL", "AVG_EV_VOL"],
    var_name="PERIOD_KEY",
    value_name="OBSERVED_TOTAL",
)
df_obs_long["PERIOD"] = df_obs_long["PERIOD_KEY"].str.split("_").str[1]

# B. Melt Modeled (Wide to Long)
df_mod_long = df_master.melt(
    id_vars=id_vars,
    value_vars=["AM_Vol", "MD_Vol", "PM_Vol", "EV_Vol"],
    var_name="MOD_KEY",
    value_name="MODELED_TOTAL",
)
df_mod_long["PERIOD"] = df_mod_long["MOD_KEY"].str.split("_").str[0]

# C. Merge Long Forms
df_period = pd.merge(
    df_obs_long.drop(columns=["PERIOD_KEY"]),
    df_mod_long.drop(columns=["MOD_KEY"]),
    on=id_vars + ["PERIOD"],
    how="left",
)

# D. Expand Vehicle Types (Auto vs Truck)
# Calculate Auto %
df_period["PCT_AUTO"] = 1.0 - (
    df_period["PCT_SUT"].fillna(0) + df_period["PCT_CUT"].fillna(0)
)

# Melt Types
df_long = df_period.melt(
    id_vars=[
        "STATION",
        "MATCHED_SEGID",
        "FTCLASS",
        "ATYPENAME",
        "COUNTY_NAME",
        "PERIOD",
        "OBSERVED_TOTAL",
        "MODELED_TOTAL",
    ],
    value_vars=["PCT_AUTO", "PCT_SUT", "PCT_CUT"],
    var_name="TYPE_KEY",
    value_name="SHARE",
)

# Map Names
type_map = {"PCT_AUTO": "Auto", "PCT_SUT": "SUT", "PCT_CUT": "CUT"}
df_long["VEHICLE_TYPE"] = df_long["TYPE_KEY"].map(type_map)

# Calculate Final Volumes
df_long["OBSERVED"] = df_long["OBSERVED_TOTAL"] * df_long["SHARE"]
df_long["MODELED"] = df_long["MODELED_TOTAL"] * df_long["SHARE"]

Final Cleanup & Export

Show the code

# --- 4.4 Final Cleanup & Export ---
cols_final = [
    "STATION", "MATCHED_SEGID", "FTCLASS", "ATYPENAME", "COUNTY_NAME",
    "PERIOD", "VEHICLE_TYPE", "OBSERVED", "MODELED"
]
df_final = df_long[cols_final].copy()

# Add Diff Stats for CSV readability
df_final["DIFF"] = df_final["MODELED"] - df_final["OBSERVED"]
df_final["PCT_DIFF"] = (df_final["DIFF"] / df_final["OBSERVED"]).round(3)

8 Step 5: Export Final Output

Show the code

# --- 5.1 Export Final Data for OJS Plots ---
# Export
final_path = output_dir / "dashboard_data.csv"
df_final.to_csv(final_path, index=False)
print(f"Step 4 Complete: Dashboard data ready at {final_path}")

Step 4 Complete: Dashboard data ready at _output\dashboard_data.csv

Show the code

# --- 5.2 Export Geometry for OJS Map ---

# 1. Identify Valid IDs from the Final Dashboard Data
valid_stations = df_final["STATION"].unique()
valid_segments = df_final["MATCHED_SEGID"].unique()

# 2. Filter Geometry AND Project to Lat/Lon (EPSG:4326)
# Leaflet requires EPSG:4326. Without this .to_crs(), points will not show up.
gdf_out_stations = gdf_ccs_locations[
    gdf_ccs_locations["STATION"].isin(valid_stations)
].to_crs("EPSG:4326")
gdf_out_segments = gdf_network_clean[
    gdf_network_clean["SEGID"].isin(valid_segments)
].to_crs("EPSG:4326")

# 3. Export to GeoJSON
gdf_out_stations.to_file(output_dir / "stations.geojson", driver="GeoJSON")
gdf_out_segments.to_file(output_dir / "segments.geojson", driver="GeoJSON")

print(f"Step 4 Complete: Map Geometry Exported.")
print(f"  - Stations: {len(gdf_out_stations)}")
print(f"  - Segments: {len(gdf_out_segments)}")

Step 4 Complete: Map Geometry Exported.
  - Stations: 83
  - Segments: 81

9 Interactive Dashboard

Show the code

import { aq, op } from '@uwdata/arquero'
import { Plot } from '@observablehq/plot'

// 2. LOAD DATA
data_raw = FileAttachment("_output/dashboard_data.csv").csv({ typed: true })

// 3. DASHBOARD CONTROLS
viewof dashboard_ctrls = {
  // 1. Define the Form Object (Same as before)
  const form = Inputs.form({
    group_by: Inputs.select(
      ["FTCLASS", "ATYPENAME", "COUNTY_NAME", "PERIOD", "VEHICLE_TYPE"],
      {label: "Group Vars:", value: "FTCLASS"}
    ),
    filter_county: Inputs.select(
      ["All"].concat(Array.from(new Set(data_raw.map(d => d.COUNTY_NAME))).sort()),
      {label: "Filter County:", value: "All"}
    ),
    filter_ftclass: Inputs.select(
      ["All"].concat(Array.from(new Set(data_raw.map(d => d.FTCLASS))).sort()),
      {label: "Filter FTClass:", value: "All"}
    ),
    filter_atype: Inputs.select(
      ["All"].concat(Array.from(new Set(data_raw.map(d => d.ATYPENAME))).sort()),
      {label: "Filter AType:", value: "All"}
    ),
    filter_period: Inputs.select(
      ["All", "AM", "MD", "PM", "EV"],
      {label: "Filter Period:", value: "All"}
    ),
    filter_vehicle: Inputs.select(
      ["All", "Auto", "SUT", "CUT"],
      {label: "Filter Vehicle:", value: "All"}
    )
  });

  // 2. Apply Custom CSS Layout
  // This turns the container into a Grid with 3 flexible columns
  form.style.display = "grid";
  form.style.gridTemplateColumns = "repeat(auto-fit, minmax(250px, 1fr))";
  form.style.gap = "10px";
  form.style.alignItems = "flex-end"; // Aligns inputs to the bottom if labels vary in height
  form.style.marginBottom = "20px";   // Adds spacing below the controls

  return form;
}

// 4. DATA PROCESSING
dash_data = {
  const { group_by, filter_county, filter_ftclass, filter_atype, filter_period, filter_vehicle } = dashboard_ctrls;

  let base = aq.from(data_raw)
    .params({
      fc: filter_county,
      ff: filter_ftclass, // <--- Param for FTCLASS
      fa: filter_atype,
      fp: filter_period,
      fv: filter_vehicle,
      gb: group_by
    })
    .filter((d, $) => $.fc === "All" || d.COUNTY_NAME === $.fc)
    .filter((d, $) => $.ff === "All" || d.FTCLASS === $.ff) // <--- Filter Logic
    .filter((d, $) => $.fa === "All" || d.ATYPENAME === $.fa)
    .filter((d, $) => $.fp === "All" || d.PERIOD === $.fp)
    .filter((d, $) => $.fv === "All" || d.VEHICLE_TYPE === $.fv)

  // A. Scatter Data (Station Level)
  let scatter = base
    .groupby("STATION", "MATCHED_SEGID", group_by)
    .rollup({
      Obs_Total: op.sum('OBSERVED'),
      Mod_Total: op.sum('MODELED')
    })
    .derive({
      Label: (d, $) => d[$.gb],
      Obs_K: d => d.Obs_Total / 1000,
      Mod_K: d => d.Mod_Total / 1000,
      Pct_Error: d => d.Obs_Total > 0 ? (d.Mod_Total - d.Obs_Total) / d.Obs_Total : 0
    })

  // B. Summary Data (Aggregate Level)
  let summary = base
    .groupby(group_by)
    .rollup({
      Stations: op.count(),
      Obs_Total: op.sum('OBSERVED'),
      Mod_Total: op.sum('MODELED'),
      Diff_Sq_Sum: d => op.sum(op.pow(d.MODELED - d.OBSERVED, 2))
    })
    .derive({
      Difference: d => d.Mod_Total - d.Obs_Total,
      Percent_Difference: d => (d.Mod_Total - d.Obs_Total) / d.Obs_Total,
      RMSE: d => Math.sqrt(d.Diff_Sq_Sum / d.Stations),
      Percent_RMSE: d => d.Obs_Total > 0 ? Math.sqrt(d.Diff_Sq_Sum / d.Stations) / (d.Obs_Total / d.Stations) : 0,
      Label: (d, $) => d[$.gb]
    })
    .orderby(group_by)

  return { scatter, summary }
}

// 5. HELPER: TRENDLINES DATA
trendlines = {
  const max_k = dash_data.scatter.rollup({ max: op.max("Mod_K") }).get("max") || 1;
  const lines = [
    { slope: 1.4, label: "+40%" },
    { slope: 1.3, label: "+30%" },
    { slope: 1.2, label: "+20%" },
    { slope: 1.1, label: "+10%" },
    { slope: 1.0, label: "Equal" },
    { slope: 0.9, label: "-10%" },
    { slope: 0.8, label: "-20%" },
    { slope: 0.7, label: "-30%" },
    { slope: 0.6, label: "-40%" }
  ];
  return lines.flatMap(l => [
    { x: 0, y: 0, label: l.label, slope: l.slope },
    { x: max_k, y: max_k * l.slope, label: l.label, slope: l.slope }
  ]);
}

// 6. HELPER: ERROR BOUNDS
error_bounds = {
  const max_k = dash_data.scatter.rollup({ max: op.max("Obs_K") }).get("max") || 1;
  const bounds = [
    { x: 0, y_pos: 2.0, y_neg: -2.0 },
    { x: 1, y_pos: 2.0, y_neg: -2.0 },
    { x: 1, y_pos: 1.0, y_neg: -1.0 },
    { x: 2.5, y_pos: 1.0, y_neg: -1.0 },
    { x: 2.5, y_pos: 0.5, y_neg: -0.5 },
    { x: 5, y_pos: 0.5, y_neg: -0.5 },
    { x: 5, y_pos: 0.25, y_neg: -0.25 },
    { x: 10, y_pos: 0.25, y_neg: -0.25 },
    { x: 10, y_pos: 0.20, y_neg: -0.20 },
    { x: 25, y_pos: 0.20, y_neg: -0.20 },
    { x: 25, y_pos: 0.15, y_neg: -0.15 },
    { x: 50, y_pos: 0.15, y_neg: -0.15 },
    { x: 50, y_pos: 0.10, y_neg: -0.10 },
    { x: Math.max(max_k, 250), y_pos: 0.10, y_neg: -0.10 }
  ];
  return bounds;
}

// 7. VISUALIZATION

md`#### 1. Model vs Observed Volumes (000s)`

Show the code

max_obs_k = dash_data.scatter.rollup({ max: op.max("Obs_K") }).get("max") || 1
max_mod_k = dash_data.scatter.rollup({ max: op.max("Mod_K") }).get("max") || 1
max_vol_k = Math.max(max_obs_k, max_mod_k)

Plot.plot({
  height: 400,
  aspectRatio: 1,
  grid: true,
  x: { label: "Observed Volume (000s)", domain: [0, max_vol_k * 1.05] },
  y: { label: "Modeled Volume (000s)", domain: [0, max_vol_k * 1.05]  },
  style: { backgroundColor: 'transparent' },
  color: { legend: true, label: dashboard_ctrls.group_by },
  marks: [
    Plot.line(trendlines, {
      x: "x", y: "y", z: "slope",
      stroke: "currentColor", strokeOpacity: 0.2, strokeDasharray: "4"
    }),
    Plot.text(trendlines.filter(d => d.x > 0), {
      x: "x", y: "y", text: "label",
      dx: 5, fill: "currentColor", fillOpacity: 0.5, textAnchor: "start"
    }),
    Plot.line(trendlines.filter(d => d.slope === 1.0), {
      x: "x", y: "y", stroke: "currentColor", strokeOpacity: 0.5, strokeWidth: 1.5
    }),
    Plot.dot(dash_data.scatter, {
      x: "Obs_K", y: "Mod_K", fill: "Label",
      stroke: "white", strokeOpacity: 0.5, r: 4,
      title: d => `Station: ${d.STATION}\nGroup: ${d.Label}\nObs: ${d.Obs_Total.toLocaleString()}\nMod: ${d.Mod_Total.toLocaleString()}\nError: ${(d.Pct_Error*100).toFixed(1)}%`
    }),
    Plot.linearRegressionY(dash_data.scatter, {
      x: "Obs_K", y: "Mod_K", stroke: "rgb(80, 116, 230)",
      strokeDasharray: "4 4", strokeWidth: 2
    })
  ]
})

Show the code

Plot.plot({
  height: 400,
  grid: true,
  x: { label: "Observed Volume (000s)", domain: [0, max_obs_k * 1.05] },
  y: { label: "Percent Error", tickFormat: "+%", domain: [-2, 2] },
  style: { backgroundColor: 'transparent' },
  color: { legend: true, label: dashboard_ctrls.group_by },
  marks: [
    Plot.ruleY([0], { stroke: "currentColor", strokeWidth: 1.5, strokeOpacity: 0.5 }),
    Plot.line(error_bounds, { x: "x", y: "y_pos", stroke: "gray", strokeWidth: 2, strokeOpacity: 0.4 }),
    Plot.line(error_bounds, { x: "x", y: "y_neg", stroke: "gray", strokeWidth: 2, strokeOpacity: 0.4 }),
    Plot.dot(dash_data.scatter, {
      x: "Obs_K", y: "Pct_Error", fill: "Label",
      stroke: "white", strokeOpacity: 0.5, r: 4,
      title: d => `Station: ${d.STATION}\nObs: ${d.Obs_Total.toLocaleString()}\nError: ${(d.Pct_Error*100).toFixed(1)}%`
    })
  ]
})

Show the code

bar_data = dash_data.summary.fold(["Obs_Total", "Mod_Total"], { as: ["Metric", "Volume"] })
    .derive({ Metric_Label: d => d.Metric === "Obs_Total" ? "Observed" : "Modeled" })

Plot.plot({
  marginLeft: 80,
  height: Math.max(400, dash_data.summary.numRows() * 40),
  color: { domain: ["Observed", "Modeled"], range: ["#77933c", "#376092"], legend: true, label: "Data Source" },
  style: { backgroundColor: 'transparent' },
  x: { label: "Total Volume", grid: true, tickFormat: "s" },
  y: { axis: null },
  fy: { label: null, axis: "left" },
  marks: [
    Plot.barX(bar_data, {
      x: "Volume", y: "Metric_Label", fy: "Label", fill: "Metric_Label",
      title: d => `${d.Metric_Label}: ${d.Volume.toLocaleString()}`,
      sort: { fy: "x", reduce: "max", order: "descending" }
    }),
    Plot.text(bar_data, {
      x: "Volume", y: "Metric_Label", fy: "Label",
      text: d => d.Volume.toLocaleString(),
      dx: 5, textAnchor: "start", fill: "currentColor", fontSize: 10
    })
  ]
})

Show the code

Inputs.table(dash_data.summary, {
  columns: ["Label", "Stations", "Obs_Total", "Mod_Total", "Difference", "Percent_Difference", "RMSE", "Percent_RMSE"],
  header: {
    Label: dashboard_ctrls.group_by, Obs_Total: "Observed", Mod_Total: "Modeled",
    Difference: "Diff", Percent_Difference: "Pct Diff", RMSE: "RMSE", Percent_RMSE: "Pct RMSE"
  },
  format: {
    Obs_Total: d => d.toLocaleString(),
    Mod_Total: d => d.toLocaleString(),
    Difference: d => d.toLocaleString(),
    Percent_Difference: d => (d * 100).toFixed(1) + "%",
    RMSE: d => d.toLocaleString(undefined, {maximumFractionDigits: 0}),
    Percent_RMSE: d => (d * 100).toFixed(1) + "%"
  }
})

Show the code

stations_geo = FileAttachment("_output/stations.geojson").json()
segments_geo = FileAttachment("_output/segments.geojson").json()

// 3. DRAW REACTIVE MAP
map_display = {
  // A. PREPARE DATA
  const scatter_array = dash_data.scatter.objects();

  // Clean IDs
  const valid_station_ids = new Set(scatter_array.map(d => String(d.STATION).trim()));
  const valid_seg_ids = new Set(scatter_array.map(d => String(d.MATCHED_SEGID).trim()));

  // Data Maps
  const data_map = new Map(scatter_array.map(d => [String(d.STATION).trim(), d]));

  // Connectivity & Attribute Maps
  const seg_to_stations = new Map();
  const seg_label_map = new Map(); // <--- New: Maps SEGID to its Group Label (e.g., "Arterial")

  scatter_array.forEach(d => {
    const s = String(d.MATCHED_SEGID).trim();
    const st = String(d.STATION).trim();

    // Build Connectivity
    if(!seg_to_stations.has(s)) seg_to_stations.set(s, []);
    seg_to_stations.get(s).push(st);

    // Build Attribute Lookup (If segment has multiple stations, they usually share attributes)
    seg_label_map.set(s, d.Label);
  });

  // --- COLOR LOGIC ---
  const current_group = dashboard_ctrls.group_by;

  // Only color by static attributes (skip Period/Vehicle Type)
  const enable_color = ["FTCLASS", "ATYPENAME", "COUNTY_NAME"].includes(current_group);
  const palette = ["#4e79a7","#f28e2c","#e15759","#76b7b2","#59a14f","#edc948","#b07aa1","#ff9da7","#9c755f","#bab0ac"];

  // Generate Color Map
  const unique_labels = Array.from(new Set(scatter_array.map(d => d.Label))).sort();
  const color_map = new Map(unique_labels.map((l, i) => [l, palette[i % palette.length]]));

  // B. SETUP CONTAINER
  const container = document.createElement("div");
  container.style.height = "650px";
  container.style.width = "100%";
  container.style.borderRadius = "8px";
  container.style.border = "1px solid #ccc";
  container.style.zIndex = "1";

  // C. INITIALIZE MAP
  const map = L.map(container).setView([40.7608, -111.8910], 9);

  L.tileLayer('https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}{r}.png', {
    attribution: '&copy; OpenStreetMap &copy; CARTO',
    maxZoom: 19
  }).addTo(map);

  setTimeout(() => { map.invalidateSize(); }, 200);

  // --- HELPER FUNCTIONS ---
  function highlightSegment(segid) {
    if (!segid) return;
    const clean_segid = String(segid).trim();
    segmentLayer.eachLayer(layer => {
      if (layer._segid === clean_segid) {
        layer.setStyle({ color: "#00e5ff", weight: 8, opacity: 1.0 });
        layer.bringToFront();
      }
    });
  }

  function highlightStations(segid) {
    if (!segid) return;
    const clean_segid = String(segid).trim();
    const target_stations = new Set(seg_to_stations.get(clean_segid) || []);

    stationLayer.eachLayer(layer => {
      if (target_stations.has(layer._station_id)) {
        layer.setStyle({
            radius: 4,
            color: "#00e5ff",
            weight: 3,
            fillColor: "#00e5ff",
            fillOpacity: 1.0
        });
        layer.bringToFront();
      }
    });
  }

  function resetAll() {
    // This restores the original 'style' defined in L.geoJSON options
    segmentLayer.eachLayer(l => segmentLayer.resetStyle(l));
    stationLayer.eachLayer(l => stationLayer.resetStyle(l));
  }

  // D. ADD SEGMENTS (LINES)
  const filtered_segments = {
    type: "FeatureCollection",
    features: segments_geo.features.filter(f => {
        if (!f.properties.SEGID) return false;
        return valid_seg_ids.has(String(f.properties.SEGID).trim());
    })
  };

  const segmentLayer = L.geoJSON(filtered_segments, {
    // --- DYNAMIC STYLE FUNCTION ---
    style: (feature) => {
      const segid = String(feature.properties.SEGID).trim();
      const label = seg_label_map.get(segid);

      let finalColor = "#376092"; // Default Blue
      if (enable_color && label) {
         finalColor = color_map.get(label) || finalColor;
      }

      return {
        color: finalColor,
        weight: 4,
        opacity: 0.9
      };
    },
    onEachFeature: (feature, layer) => {
      const segid = String(feature.properties.SEGID).trim();
      layer._segid = segid;

      const stations = seg_to_stations.get(segid) || [];
      const st_label = stations.length > 0 ? stations.join(", ") : "None";
      const group_label = seg_label_map.get(segid) || "N/A";

      layer.bindPopup(`
        <div style="font-family:sans-serif; font-size:12px;">
          <b>Segment: ${segid}</b><br>
          Group: ${group_label}<br>
          Connected Stations: ${st_label}
        </div>
      `);

      layer.on('mouseover', () => {
         layer.setStyle({ color: "#00e5ff", weight: 6, opacity: 1.0 });
         layer.bringToFront();
         highlightStations(segid);
      });
      layer.on('mouseout', () => resetAll());
    }
  }).addTo(map);

  // E. ADD STATIONS (POINTS)
  const filtered_stations = {
    type: "FeatureCollection",
    features: stations_geo.features.filter(f => {
       if (!f.properties.STATION) return false;
       return valid_station_ids.has(String(f.properties.STATION).trim());
    })
  };

  const stationLayer = L.geoJSON(filtered_stations, {
    pointToLayer: (feature, latlng) => {
      const id = String(feature.properties.STATION).trim();
      const stats = data_map.get(id);

      let finalColor = "#77933c"; // Default Green
      if (enable_color && stats) {
          finalColor = color_map.get(stats.Label) || finalColor;
      }

      return L.circleMarker(latlng, {
        radius: 4,
        fillColor: finalColor,
        color: "#1e1e1e",
        weight: 1,
        opacity: 0.9,
        fillOpacity: 0.9
      });
    },
    onEachFeature: (feature, layer) => {
      const id = String(feature.properties.STATION).trim();
      layer._station_id = id;
      const stats = data_map.get(id);

      layer.on('mouseover', () => highlightSegment(stats.MATCHED_SEGID));
      layer.on('mouseout', () => resetAll());

      if (stats) {
        const diffColor = stats.Pct_Error > 0 ? '#d9534f' : '#5cb85c';
        layer.bindPopup(`
          <div style="font-family: sans-serif; min-width: 160px;">
            <strong>Station ${id}</strong><br/>
            <hr style="margin:4px 0; border:0; border-top:1px solid #eee;">
            Group: <b>${stats.Label}</b><br/>
            Observed: <b>${Math.round(stats.Obs_Total).toLocaleString()}</b><br/>
            Modeled: ${Math.round(stats.Mod_Total).toLocaleString()}<br/>
            Diff: <span style="color:${diffColor}; font-weight:bold;">
              ${(stats.Pct_Error * 100).toFixed(1)}%
            </span><br/>
            <span style="color:#999; font-size:11px;">Seg: ${stats.MATCHED_SEGID}</span>
          </div>
        `);
      }
    }
  }).addTo(map);

  // F. FIT BOUNDS
  if (filtered_stations.features.length > 0) {
    setTimeout(() => {
       map.fitBounds(stationLayer.getBounds(), { padding: [50, 50] });
    }, 250);
  }

  return container;
}

--- title: Traffic Assignment Validation subtitle: Validating the Observed vs Modeled Traffic Volume description: This notebook validates the traffic volume assigned by the Travel Demand Model against the observed traffic volume data captured by the Continuous Counting Stations (CCS). author: - name: Pukar Bhandari email: pukar.bhandari@wfrc.utah.gov affiliation: - name: Wasatch Front Regional Council url: "https://wfrc.utah.gov/" date: "2025-12-24" --- ## Overview This document validates the Travel Demand Model's traffic assignment by comparing modeled volumes against observed counts from Continuous Count Stations (CCS). The process is divided into modular steps to support a "Minimal Documentation" workflow: 1. **Bridge Creation:** Mapping CCS Stations to Network Segments (Static). 2. **Observed Data:** Aggregating raw telemetry data (Static). 3. **Model Data:** Loading specific scenario results (Dynamic). 4. **Validation:** Merging the three sources for visualization. ## Environment Setup ### Import Standard Libraries ```{python} # For Analysis import numpy as np import pandas as pd import geopandas as gpd # For Visualization import matplotlib.pyplot as plt import seaborn as sns import mapclassify # misc import os import requests from pathlib import Path import importlib.util from dotenv import load_dotenv load_dotenv() ``` ### Setup Directories & Variables ```{python} CRS_UTM = "EPSG:26912" # Define Output Paths intermediate_dir = Path("_intermediate") output_dir = Path("_output") raw_dir = Path("_data/raw") # Create directories if they don't exist intermediate_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True) raw_dir.mkdir(parents=True, exist_ok=True) print("Directories validated.") ``` ## Helper Functions ```{python} def fetch_github( url: str, mode: str = "private", token_env_var: str = "GITHUB_TOKEN" ) -> requests.Response: """ Fetch content from GitHub repositories. Args: url: GitHub raw URL (e.g., https://raw.githubusercontent.com/...) mode: "public" for public repos, "private" for private repos requiring authentication token_env_var: Name of environment variable containing GitHub token (default: GITHUB_TOKEN) Returns: requests.Response object Raises: ValueError: If token is missing for private mode or invalid mode requests.HTTPError: If request fails """ # Validate mode if mode not in ["public", "private"]: raise ValueError(f"mode must be 'public' or 'private', got '{mode}'") if mode == "public": response = requests.get(url, timeout=30) else: token = os.getenv(token_env_var) if not token: raise ValueError( f"GitHub token not found in environment variable '{token_env_var}'. " f"Check your .env file has: {token_env_var}=your_token_here" ) headers = { 'Authorization': f'token {token}', 'Accept': 'application/vnd.github.v3.raw' } response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response ``` ```{python} # Create function to read ArcGIS FeatureLayer or Table def arc_read(url, where="1=1", outFields="*", outSR=4326, **kwargs): """ Read an ArcGIS FeatureLayer or Table to a GeoDataFrame. Parameters: url (str): The ArcGIS REST service URL (e.g., ending in /FeatureServer/0) where (str): SQL WHERE clause for filtering. Default: "1=1" outFields (str): Comma-separated field names. Default: "*" outSR (int): Output spatial reference EPSG code. Default: 4326 **kwargs: Additional query parameters passed to the ArcGIS REST API Returns: geopandas.GeoDataFrame: Spatial data from the service """ # Ensure URL ends with /query if not url.endswith("/query"): url = url.rstrip("/") + "/query" # Build query parameters params = { "where": where, "outFields": outFields, "returnGeometry": "true", "outSR": outSR, "f": "geojson", } # Add any additional parameters params.update(kwargs) # Make request response = requests.get(url, params=params) response.raise_for_status() # Good practice to check for HTTP errors # Read as GeoDataFrame # We use io.BytesIO to handle the response content safely for read_file import io return gpd.read_file(io.BytesIO(response.content), engine="pyogrio") ``` ## Step 1: Create Station-to-Network Bridge **Objective:** Create a static reference file that links Station IDs to Network SEGIDs and attaches static network attributes (County, Functional Class, Truck %). ### Load Network (Input Shapefile) We load the raw input shapefile. We do **not** filter by model results here, ensuring the bridge is independent of any specific model run. ```{python} # Load Segments (Input) # Source: https://github.com/WFRCAnalytics/WF-TDM-Documentation/tree/v10-calib/v10x/v1000/validation/data/5-assignhwy/seg_shp gdf_segments = gpd.read_file( raw_dir / "Segments/WFv1000_Segments.shp", engine="pyogrio", ).to_crs(CRS_UTM) # Ensure SEGID is string for consistent matching gdf_segments["SEGID"] = gdf_segments["SEGID"].astype(str) # Parse Route and Milepost from SEGID directly (Independent of Model Results) temp_route = gdf_segments["SEGID"].str.split("_").str[0] gdf_segments["MODEL_ROUTE"] = pd.to_numeric(temp_route, errors="coerce") gdf_segments["MODEL_MP"] = gdf_segments["SEGID"].str.split("_").str[1].astype(float) # Filter to valid network links for matching gdf_network_clean = gdf_segments.dropna(subset=["MODEL_ROUTE", "MODEL_MP"]).copy() gdf_network_clean["MODEL_ROUTE"] = gdf_network_clean["MODEL_ROUTE"].astype(int) print(f"Loaded Network with {len(gdf_network_clean)} valid segments.") ``` ### WF-TDM Region ```{python} # Configuration service_url = "https://services1.arcgis.com/taguadKoI1XFwivx/ArcGIS/rest/services/RegionalBoundaryComponents/FeatureServer/0" gdb_path = raw_dir / "RegionalBoundary.gpkg" # Download/Load logic if not gdb_path.exists(): print("Downloading Regional Boundaries...") gdf_download = arc_read(service_url) gdf_download.to_file(gdb_path, layer="RegionalBoundaryComponents", driver="GPKG") else: print("Loading Regional Boundaries from cache...") # Load and Project gdf_region = ( gpd.read_file( gdb_path, layer="RegionalBoundaryComponents", where="PlanOrg IN ('MAG MPO', 'WFRC MPO')", ) .dissolve() .to_crs(CRS_UTM) ) # Fix the holes and slivers in the regional boundary gdf_region["geometry"] = gdf_region.buffer(1).buffer(-1) ``` ### Load CCS Stations ```{python} # Configuration service_url = "https://services.arcgis.com/pA2nEVnB6tquxgOW/ArcGIS/rest/services/CCS_Data_v2/FeatureServer/1" gdb_path = raw_dir / "CCS_Data_v2.gpkg" # Download/Load logic if not gdb_path.exists(): print("Downloading CCS Stations...") gdf_download = arc_read(service_url) if "SITEID" in gdf_download.columns: gdf_download["SITEID"] = gdf_download["SITEID"].astype("int32") gdf_download.to_file(gdb_path, layer="CCS_Data_v2", driver="GPKG") else: print("Loading CCS Stations from cache...") # Load and Project gdf_ccs_locations = gpd.read_file( gdb_path, layer="CCS_Data_v2", # where="COUNTY IN ('Box Elder', 'Weber', 'Davis', 'Salt Lake', 'Utah')", mask=gdf_region, ).to_crs(CRS_UTM) print(f"Total Stations Loaded (Region): {len(gdf_ccs_locations)}") ``` ### Run Spatial Matching Logic ```{python} # 1. Define FC Mapping (UDOT Station FC -> Model FUNCGROUP) # Add or refine these mappings based on your data fc_map = { # Freeways "Urban Principal Arterial - Interstate": "Freeway", "Rural Principal Arterial - Interstate": "Freeway", "Urban Principal Arterial - Other Freeways": "Freeway", # Default mapping # Arterials "Urban Principal Arterial - Other": "Arterial", "Urban Minor Arterial": "Arterial", "Rural Principal Arterial - Other": "Arterial", "Rural Minor Arterial": "Arterial", # Collector/Distributor "Urban Major Collector": "CD Road", "Urban Minor Collector": "CD Road", "Urban Collector": "CD Road", "Rural Major Collector": "CD Road", "Urban Local System": "CD Road", # Fallback } ``` ```{python} match_results = [] used_segids = set() print("Starting Route & Group Constrained Spatial Matching...") for idx, ccs_row in gdf_ccs_locations.iterrows(): station_id = ccs_row["STATION"] target_route = ccs_row["ROUTE"] # Identify the Target Group based on Station FC # Default to "Total" if FC is missing or unknown station_fc = ccs_row.get("FC", None) target_group = fc_map.get(station_fc, "Total") # --- PASS 1: Route-Constrained Spatial Match --- route_segments = gdf_network_clean[gdf_network_clean["MODEL_ROUTE"] == target_route] matched_segid = None match_type = "No Match" if not route_segments.empty: # Calculate distance to all segments on this route distances = route_segments.distance(ccs_row["geometry"]) # Pick the absolute closest segment on this route best_idx = distances.idxmin() best_match = route_segments.loc[best_idx] matched_segid = best_match["SEGID"] match_type = "Route Match (Spatial)" # Add valid match to used set if matched_segid: used_segids.add(matched_segid) match_results.append( { "STATION": station_id, "MATCHED_SEGID": matched_segid, "MATCHED_GROUP": target_group, # <--- NEW: Store the Target Group "MATCH_TYPE": match_type, "geometry": ccs_row["geometry"], } ) # --- PASS 2: Global Spatial Fallback (Unrestricted) --- # (Same as your original code, but ensure we add MATCHED_GROUP = "Total" for fallbacks) for i, result in enumerate(match_results): if result["MATCHED_SEGID"] is None: station_geom = result["geometry"] available_segments = gdf_network_clean[ ~gdf_network_clean["SEGID"].isin(used_segids) ] if not available_segments.empty: distances = available_segments.distance(station_geom) min_dist_idx = distances.idxmin() best_match = available_segments.loc[min_dist_idx] match_results[i]["MATCHED_SEGID"] = best_match["SEGID"] match_results[i]["MATCHED_GROUP"] = "Total" # Fallback is always Total match_results[i]["MATCH_TYPE"] = "Global Fallback (Nearest)" used_segids.add(best_match["SEGID"]) # Create DataFrame df_matches = pd.DataFrame(match_results).drop(columns=["geometry"]) print("Matching Complete. Summary:") print(df_matches["MATCH_TYPE"].value_counts()) ``` ### Enrich Bridge & Export We attach the static network attributes here. ```{python} # 1. Define Static Attributes from Network static_cols = ["SEGID", "CO_FIPS", "SUTRUCKS", "CUTRUCKS"] # 2. Map County Names fips_map = {3: "Box Elder", 11: "Davis", 35: "Salt Lake", 49: "Utah", 57: "Weber"} # Create a non-spatial attribute table df_network_attrs = pd.DataFrame(gdf_network_clean[static_cols]).copy() df_network_attrs["COUNTY_NAME"] = ( df_network_attrs["CO_FIPS"].map(fips_map).fillna("Other") ) # 3. Rename Truck Columns for standardization df_network_attrs = df_network_attrs.rename( columns={"SUTRUCKS": "PCT_SUT", "CUTRUCKS": "PCT_CUT"} ) # 4. Merge Matches with Attributes df_bridge = df_matches.merge( df_network_attrs[["SEGID", "COUNTY_NAME", "PCT_SUT", "PCT_CUT"]], left_on="MATCHED_SEGID", right_on="SEGID", how="left", ) # 5. Export to Intermediate bridge_path = intermediate_dir / "station_segid_bridge.csv" df_bridge.to_csv(bridge_path, index=False) print(f"Step 1 Complete: Bridge exported to {bridge_path}") ``` ## Step 2: Load Observed Counts **Objective:** Fetch raw count data and aggregate it. This creates the "Fact Table A". ### Setup BigQuery ```{python} # Setup BigQuery response = fetch_github( 'https://raw.githubusercontent.com/WFRCAnalytics/Resources/refs/heads/master/2-Python/global-functions/BigQuery.py', mode="public" ) BigQuery = importlib.util.module_from_spec(importlib.util.spec_from_loader('BigQuery', loader=None)) exec(response.text, BigQuery.__dict__) client = BigQuery.getBigQueryClient_Confidential2023UtahHTS() ``` ### Query Continouos Count Data ```{python} # Query query = f""" SELECT STATION, CAST(ROUND(AVG(AM_SUM)) AS INT64) as AVG_AM_VOL, CAST(ROUND(AVG(MD_SUM)) AS INT64) as AVG_MD_VOL, CAST(ROUND(AVG(PM_SUM)) AS INT64) as AVG_PM_VOL, CAST(ROUND(AVG(EV_SUM)) AS INT64) as AVG_EV_VOL, CAST(ROUND(AVG(DAILY_SUM)) AS INT64) as AVG_DY_VOL FROM ( SELECT STATION, DATE_ONLY, SUM(CASE WHEN HOUR BETWEEN 6 AND 8 THEN VOLUME ELSE 0 END) as AM_SUM, SUM(CASE WHEN HOUR BETWEEN 9 AND 14 THEN VOLUME ELSE 0 END) as MD_SUM, SUM(CASE WHEN HOUR BETWEEN 15 AND 17 THEN VOLUME ELSE 0 END) as PM_SUM, SUM(CASE WHEN HOUR >= 18 OR HOUR <= 5 THEN VOLUME ELSE 0 END) as EV_SUM, SUM(VOLUME) as DAILY_SUM FROM `wfrc-modeling-data.src_udot_continuous_count_2023.data` WHERE DATE_ONLY BETWEEN '2023-09-01' AND '2023-11-15' AND EXTRACT(DAYOFWEEK FROM DATE_ONLY) IN (3, 4, 5) GROUP BY STATION, DATE_ONLY ) GROUP BY STATION ORDER BY STATION """ df_observed_counts = client.query(query).to_dataframe() ``` ### Export Intermediate Data ```{python} # Export obs_path = intermediate_dir / "observed_counts_raw.csv" df_observed_counts.to_csv(obs_path, index=False) print(f"Step 2 Complete: Observed data exported to {obs_path}") ``` ## Step 3: Load Model Results (Dynamic) **Objective:** Load the specific scenario results ("Fact Table B"). ```{python} # Source: https://github.com/WFRCAnalytics/WF-TDM-Documentation/tree/v10-calib/v10x/v1000/validation/data/5-assignhwy df_segid_detail = pd.read_csv(raw_dir / "WFv1000_BY_2023_Summary_SEGID_Detailed.csv") # Filter for Bi-Directional Only # REMOVED: (df_segid_detail["FUNCGROUP"] == "Total") # We keep ALL groups (Freeway, Arterial, CD Road, Total) df_model_clean = df_segid_detail[(df_segid_detail["DIRECTION"] == "Both")].copy() # Ensure SEGID match format df_model_clean["SEGID"] = df_model_clean["SEGID"].astype(str) print(f"Step 3 Complete: Loaded {len(df_model_clean)} model records.") ``` ## Step 4: Process & Validation (Integration) **Objective:** Combine Bridge + Observed + Modeled into the final Dashboard CSV. This step is designed to run independently if the CSVs exist. ### Export Observed Data ```{python} # Bridge + Observed (Inner Join = Only Stations with Data) df_valid_obs = df_bridge.merge(df_observed_counts, on="STATION", how="inner") # Master Observed File Generated for validation df_valid_obs.to_csv(output_dir / "observed_traffic_with_crosswalk.csv", index=False) ``` ### Merge Data ```{python} # --- 4.1 Load Intermediates (Resiliency Check) --- # If this notebook is running top-to-bottom, we have the dfs. # If running as a standalone script later, we load from CSV. if "df_valid_obs" not in locals(): print("Loading Bridge from CSV...") df_valid_obs = pd.read_csv(output_dir / "observed_traffic_with_crosswalk.csv") ``` ```{python} # --- 4.2 Merge Data --- # B. Add Modeled Volumes AND Context Attributes # We pull FTCLASS and ATYPENAME from the Model Results now model_cols_needed = [ "SEGID", "FUNCGROUP", "AM_Vol", "MD_Vol", "PM_Vol", "EV_Vol", "DY_Vol", "FTCLASS", "ATYPENAME", ] # Safety check: ensure these columns actually exist in the model output available_cols = [c for c in model_cols_needed if c in df_model_clean.columns] df_model_subset = df_model_clean[available_cols].copy() # Join 1: Attempt to match exact FUNCGROUP (e.g., Station Freeway -> Model Freeway) df_join_specific = df_valid_obs.merge( df_model_subset, left_on=["MATCHED_SEGID", "MATCHED_GROUP"], right_on=["SEGID", "FUNCGROUP"], how="inner", ) # Identify which Stations were successfully matched matched_stations = set(df_join_specific["STATION"]) # Filter remaining unmatched stations df_remaining = df_valid_obs[~df_valid_obs["STATION"].isin(matched_stations)].copy() # Join 2: Fallback to "Total" for remaining stations # (e.g., Station is Arterial, but Model only has a 'Total' row for this simple segment) df_remaining["FALLBACK_GROUP"] = "Total" df_join_fallback = df_remaining.merge( df_model_subset, left_on=["MATCHED_SEGID", "FALLBACK_GROUP"], right_on=["SEGID", "FUNCGROUP"], how="inner", ) # C. Combine Results df_master = pd.concat([df_join_specific, df_join_fallback], ignore_index=True) # Fill missing model volumes with 0 (assuming no flow if missing from summary) vol_cols = ["AM_Vol", "MD_Vol", "PM_Vol", "EV_Vol", "DY_Vol"] df_master[vol_cols] = df_master[vol_cols].fillna(0) ``` ### Pivot & Calculate Vehicles ```{python} # --- 4.3 Pivot & Calculate Vehicles --- # Define ID variables (Dimensions) id_vars = [ "STATION", "MATCHED_SEGID", "FTCLASS", "ATYPENAME", "COUNTY_NAME", "PCT_SUT", "PCT_CUT", ] # A. Melt Observed (Wide to Long) df_obs_long = df_master.melt( id_vars=id_vars, value_vars=["AVG_AM_VOL", "AVG_MD_VOL", "AVG_PM_VOL", "AVG_EV_VOL"], var_name="PERIOD_KEY", value_name="OBSERVED_TOTAL", ) df_obs_long["PERIOD"] = df_obs_long["PERIOD_KEY"].str.split("_").str[1] # B. Melt Modeled (Wide to Long) df_mod_long = df_master.melt( id_vars=id_vars, value_vars=["AM_Vol", "MD_Vol", "PM_Vol", "EV_Vol"], var_name="MOD_KEY", value_name="MODELED_TOTAL", ) df_mod_long["PERIOD"] = df_mod_long["MOD_KEY"].str.split("_").str[0] # C. Merge Long Forms df_period = pd.merge( df_obs_long.drop(columns=["PERIOD_KEY"]), df_mod_long.drop(columns=["MOD_KEY"]), on=id_vars + ["PERIOD"], how="left", ) # D. Expand Vehicle Types (Auto vs Truck) # Calculate Auto % df_period["PCT_AUTO"] = 1.0 - ( df_period["PCT_SUT"].fillna(0) + df_period["PCT_CUT"].fillna(0) ) # Melt Types df_long = df_period.melt( id_vars=[ "STATION", "MATCHED_SEGID", "FTCLASS", "ATYPENAME", "COUNTY_NAME", "PERIOD", "OBSERVED_TOTAL", "MODELED_TOTAL", ], value_vars=["PCT_AUTO", "PCT_SUT", "PCT_CUT"], var_name="TYPE_KEY", value_name="SHARE", ) # Map Names type_map = {"PCT_AUTO": "Auto", "PCT_SUT": "SUT", "PCT_CUT": "CUT"} df_long["VEHICLE_TYPE"] = df_long["TYPE_KEY"].map(type_map) # Calculate Final Volumes df_long["OBSERVED"] = df_long["OBSERVED_TOTAL"] * df_long["SHARE"] df_long["MODELED"] = df_long["MODELED_TOTAL"] * df_long["SHARE"] ``` ### Final Cleanup & Export ```{python} # --- 4.4 Final Cleanup & Export --- cols_final = [ "STATION", "MATCHED_SEGID", "FTCLASS", "ATYPENAME", "COUNTY_NAME", "PERIOD", "VEHICLE_TYPE", "OBSERVED", "MODELED" ] df_final = df_long[cols_final].copy() # Add Diff Stats for CSV readability df_final["DIFF"] = df_final["MODELED"] - df_final["OBSERVED"] df_final["PCT_DIFF"] = (df_final["DIFF"] / df_final["OBSERVED"]).round(3) ``` ## Step 5: Export Final Output ```{python} # --- 5.1 Export Final Data for OJS Plots --- # Export final_path = output_dir / "dashboard_data.csv" df_final.to_csv(final_path, index=False) print(f"Step 4 Complete: Dashboard data ready at {final_path}") ``` ```{python} # --- 5.2 Export Geometry for OJS Map --- # 1. Identify Valid IDs from the Final Dashboard Data valid_stations = df_final["STATION"].unique() valid_segments = df_final["MATCHED_SEGID"].unique() # 2. Filter Geometry AND Project to Lat/Lon (EPSG:4326) # Leaflet requires EPSG:4326. Without this .to_crs(), points will not show up. gdf_out_stations = gdf_ccs_locations[ gdf_ccs_locations["STATION"].isin(valid_stations) ].to_crs("EPSG:4326") gdf_out_segments = gdf_network_clean[ gdf_network_clean["SEGID"].isin(valid_segments) ].to_crs("EPSG:4326") # 3. Export to GeoJSON gdf_out_stations.to_file(output_dir / "stations.geojson", driver="GeoJSON") gdf_out_segments.to_file(output_dir / "segments.geojson", driver="GeoJSON") print(f"Step 4 Complete: Map Geometry Exported.") print(f" - Stations: {len(gdf_out_stations)}") print(f" - Segments: {len(gdf_out_segments)}") ``` ## Interactive Dashboard ```{python} # | label: transfer-ojs # | echo: false # | eval: false # Transfer the clean dataframe to OJS ojs_define(data_py=df_final) ``` ```{ojs} //| label: ojs-dashboard //| echo: false // 1. IMPORT LIBRARIES import { aq, op } from '@uwdata/arquero' import { Plot } from '@observablehq/plot' // 2. LOAD DATA data_raw = FileAttachment("_output/dashboard_data.csv").csv({ typed: true }) // 3. DASHBOARD CONTROLS viewof dashboard_ctrls = { // 1. Define the Form Object (Same as before) const form = Inputs.form({ group_by: Inputs.select( ["FTCLASS", "ATYPENAME", "COUNTY_NAME", "PERIOD", "VEHICLE_TYPE"], {label: "Group Vars:", value: "FTCLASS"} ), filter_county: Inputs.select( ["All"].concat(Array.from(new Set(data_raw.map(d => d.COUNTY_NAME))).sort()), {label: "Filter County:", value: "All"} ), filter_ftclass: Inputs.select( ["All"].concat(Array.from(new Set(data_raw.map(d => d.FTCLASS))).sort()), {label: "Filter FTClass:", value: "All"} ), filter_atype: Inputs.select( ["All"].concat(Array.from(new Set(data_raw.map(d => d.ATYPENAME))).sort()), {label: "Filter AType:", value: "All"} ), filter_period: Inputs.select( ["All", "AM", "MD", "PM", "EV"], {label: "Filter Period:", value: "All"} ), filter_vehicle: Inputs.select( ["All", "Auto", "SUT", "CUT"], {label: "Filter Vehicle:", value: "All"} ) }); // 2. Apply Custom CSS Layout // This turns the container into a Grid with 3 flexible columns form.style.display = "grid"; form.style.gridTemplateColumns = "repeat(auto-fit, minmax(250px, 1fr))"; form.style.gap = "10px"; form.style.alignItems = "flex-end"; // Aligns inputs to the bottom if labels vary in height form.style.marginBottom = "20px"; // Adds spacing below the controls return form; } // 4. DATA PROCESSING dash_data = { const { group_by, filter_county, filter_ftclass, filter_atype, filter_period, filter_vehicle } = dashboard_ctrls; let base = aq.from(data_raw) .params({ fc: filter_county, ff: filter_ftclass, // <--- Param for FTCLASS fa: filter_atype, fp: filter_period, fv: filter_vehicle, gb: group_by }) .filter((d, $) => $.fc === "All" || d.COUNTY_NAME === $.fc) .filter((d, $) => $.ff === "All" || d.FTCLASS === $.ff) // <--- Filter Logic .filter((d, $) => $.fa === "All" || d.ATYPENAME === $.fa) .filter((d, $) => $.fp === "All" || d.PERIOD === $.fp) .filter((d, $) => $.fv === "All" || d.VEHICLE_TYPE === $.fv) // A. Scatter Data (Station Level) let scatter = base .groupby("STATION", "MATCHED_SEGID", group_by) .rollup({ Obs_Total: op.sum('OBSERVED'), Mod_Total: op.sum('MODELED') }) .derive({ Label: (d, $) => d[$.gb], Obs_K: d => d.Obs_Total / 1000, Mod_K: d => d.Mod_Total / 1000, Pct_Error: d => d.Obs_Total > 0 ? (d.Mod_Total - d.Obs_Total) / d.Obs_Total : 0 }) // B. Summary Data (Aggregate Level) let summary = base .groupby(group_by) .rollup({ Stations: op.count(), Obs_Total: op.sum('OBSERVED'), Mod_Total: op.sum('MODELED'), Diff_Sq_Sum: d => op.sum(op.pow(d.MODELED - d.OBSERVED, 2)) }) .derive({ Difference: d => d.Mod_Total - d.Obs_Total, Percent_Difference: d => (d.Mod_Total - d.Obs_Total) / d.Obs_Total, RMSE: d => Math.sqrt(d.Diff_Sq_Sum / d.Stations), Percent_RMSE: d => d.Obs_Total > 0 ? Math.sqrt(d.Diff_Sq_Sum / d.Stations) / (d.Obs_Total / d.Stations) : 0, Label: (d, $) => d[$.gb] }) .orderby(group_by) return { scatter, summary } } // 5. HELPER: TRENDLINES DATA trendlines = { const max_k = dash_data.scatter.rollup({ max: op.max("Mod_K") }).get("max") || 1; const lines = [ { slope: 1.4, label: "+40%" }, { slope: 1.3, label: "+30%" }, { slope: 1.2, label: "+20%" }, { slope: 1.1, label: "+10%" }, { slope: 1.0, label: "Equal" }, { slope: 0.9, label: "-10%" }, { slope: 0.8, label: "-20%" }, { slope: 0.7, label: "-30%" }, { slope: 0.6, label: "-40%" } ]; return lines.flatMap(l => [ { x: 0, y: 0, label: l.label, slope: l.slope }, { x: max_k, y: max_k * l.slope, label: l.label, slope: l.slope } ]); } // 6. HELPER: ERROR BOUNDS error_bounds = { const max_k = dash_data.scatter.rollup({ max: op.max("Obs_K") }).get("max") || 1; const bounds = [ { x: 0, y_pos: 2.0, y_neg: -2.0 }, { x: 1, y_pos: 2.0, y_neg: -2.0 }, { x: 1, y_pos: 1.0, y_neg: -1.0 }, { x: 2.5, y_pos: 1.0, y_neg: -1.0 }, { x: 2.5, y_pos: 0.5, y_neg: -0.5 }, { x: 5, y_pos: 0.5, y_neg: -0.5 }, { x: 5, y_pos: 0.25, y_neg: -0.25 }, { x: 10, y_pos: 0.25, y_neg: -0.25 }, { x: 10, y_pos: 0.20, y_neg: -0.20 }, { x: 25, y_pos: 0.20, y_neg: -0.20 }, { x: 25, y_pos: 0.15, y_neg: -0.15 }, { x: 50, y_pos: 0.15, y_neg: -0.15 }, { x: 50, y_pos: 0.10, y_neg: -0.10 }, { x: Math.max(max_k, 250), y_pos: 0.10, y_neg: -0.10 } ]; return bounds; } // 7. VISUALIZATION md`#### 1. Model vs Observed Volumes (000s)` max_obs_k = dash_data.scatter.rollup({ max: op.max("Obs_K") }).get("max") || 1 max_mod_k = dash_data.scatter.rollup({ max: op.max("Mod_K") }).get("max") || 1 max_vol_k = Math.max(max_obs_k, max_mod_k) Plot.plot({ height: 400, aspectRatio: 1, grid: true, x: { label: "Observed Volume (000s)", domain: [0, max_vol_k * 1.05] }, y: { label: "Modeled Volume (000s)", domain: [0, max_vol_k * 1.05] }, style: { backgroundColor: 'transparent' }, color: { legend: true, label: dashboard_ctrls.group_by }, marks: [ Plot.line(trendlines, { x: "x", y: "y", z: "slope", stroke: "currentColor", strokeOpacity: 0.2, strokeDasharray: "4" }), Plot.text(trendlines.filter(d => d.x > 0), { x: "x", y: "y", text: "label", dx: 5, fill: "currentColor", fillOpacity: 0.5, textAnchor: "start" }), Plot.line(trendlines.filter(d => d.slope === 1.0), { x: "x", y: "y", stroke: "currentColor", strokeOpacity: 0.5, strokeWidth: 1.5 }), Plot.dot(dash_data.scatter, { x: "Obs_K", y: "Mod_K", fill: "Label", stroke: "white", strokeOpacity: 0.5, r: 4, title: d => `Station: ${d.STATION}\nGroup: ${d.Label}\nObs: ${d.Obs_Total.toLocaleString()}\nMod: ${d.Mod_Total.toLocaleString()}\nError: ${(d.Pct_Error*100).toFixed(1)}%` }), Plot.linearRegressionY(dash_data.scatter, { x: "Obs_K", y: "Mod_K", stroke: "rgb(80, 116, 230)", strokeDasharray: "4 4", strokeWidth: 2 }) ] }) md`#### 2. Model vs Observed Percent Error` Plot.plot({ height: 400, grid: true, x: { label: "Observed Volume (000s)", domain: [0, max_obs_k * 1.05] }, y: { label: "Percent Error", tickFormat: "+%", domain: [-2, 2] }, style: { backgroundColor: 'transparent' }, color: { legend: true, label: dashboard_ctrls.group_by }, marks: [ Plot.ruleY([0], { stroke: "currentColor", strokeWidth: 1.5, strokeOpacity: 0.5 }), Plot.line(error_bounds, { x: "x", y: "y_pos", stroke: "gray", strokeWidth: 2, strokeOpacity: 0.4 }), Plot.line(error_bounds, { x: "x", y: "y_neg", stroke: "gray", strokeWidth: 2, strokeOpacity: 0.4 }), Plot.dot(dash_data.scatter, { x: "Obs_K", y: "Pct_Error", fill: "Label", stroke: "white", strokeOpacity: 0.5, r: 4, title: d => `Station: ${d.STATION}\nObs: ${d.Obs_Total.toLocaleString()}\nError: ${(d.Pct_Error*100).toFixed(1)}%` }) ] }) md`#### 3. Aggregate Comparison & Statistics` bar_data = dash_data.summary.fold(["Obs_Total", "Mod_Total"], { as: ["Metric", "Volume"] }) .derive({ Metric_Label: d => d.Metric === "Obs_Total" ? "Observed" : "Modeled" }) Plot.plot({ marginLeft: 80, height: Math.max(400, dash_data.summary.numRows() * 40), color: { domain: ["Observed", "Modeled"], range: ["#77933c", "#376092"], legend: true, label: "Data Source" }, style: { backgroundColor: 'transparent' }, x: { label: "Total Volume", grid: true, tickFormat: "s" }, y: { axis: null }, fy: { label: null, axis: "left" }, marks: [ Plot.barX(bar_data, { x: "Volume", y: "Metric_Label", fy: "Label", fill: "Metric_Label", title: d => `${d.Metric_Label}: ${d.Volume.toLocaleString()}`, sort: { fy: "x", reduce: "max", order: "descending" } }), Plot.text(bar_data, { x: "Volume", y: "Metric_Label", fy: "Label", text: d => d.Volume.toLocaleString(), dx: 5, textAnchor: "start", fill: "currentColor", fontSize: 10 }) ] }) Inputs.table(dash_data.summary, { columns: ["Label", "Stations", "Obs_Total", "Mod_Total", "Difference", "Percent_Difference", "RMSE", "Percent_RMSE"], header: { Label: dashboard_ctrls.group_by, Obs_Total: "Observed", Mod_Total: "Modeled", Difference: "Diff", Percent_Difference: "Pct Diff", RMSE: "RMSE", Percent_RMSE: "Pct RMSE" }, format: { Obs_Total: d => d.toLocaleString(), Mod_Total: d => d.toLocaleString(), Difference: d => d.toLocaleString(), Percent_Difference: d => (d * 100).toFixed(1) + "%", RMSE: d => d.toLocaleString(undefined, {maximumFractionDigits: 0}), Percent_RMSE: d => (d * 100).toFixed(1) + "%" } }) ``` ```{ojs} //| label: ojs-map //| echo: false // 1. IMPORT LEAFLET L = require("leaflet@1.9.4") html`<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css">` // 2. LOAD GEOMETRY stations_geo = FileAttachment("_output/stations.geojson").json() segments_geo = FileAttachment("_output/segments.geojson").json() // 3. DRAW REACTIVE MAP map_display = { // A. PREPARE DATA const scatter_array = dash_data.scatter.objects(); // Clean IDs const valid_station_ids = new Set(scatter_array.map(d => String(d.STATION).trim())); const valid_seg_ids = new Set(scatter_array.map(d => String(d.MATCHED_SEGID).trim())); // Data Maps const data_map = new Map(scatter_array.map(d => [String(d.STATION).trim(), d])); // Connectivity & Attribute Maps const seg_to_stations = new Map(); const seg_label_map = new Map(); // <--- New: Maps SEGID to its Group Label (e.g., "Arterial") scatter_array.forEach(d => { const s = String(d.MATCHED_SEGID).trim(); const st = String(d.STATION).trim(); // Build Connectivity if(!seg_to_stations.has(s)) seg_to_stations.set(s, []); seg_to_stations.get(s).push(st); // Build Attribute Lookup (If segment has multiple stations, they usually share attributes) seg_label_map.set(s, d.Label); }); // --- COLOR LOGIC --- const current_group = dashboard_ctrls.group_by; // Only color by static attributes (skip Period/Vehicle Type) const enable_color = ["FTCLASS", "ATYPENAME", "COUNTY_NAME"].includes(current_group); const palette = ["#4e79a7","#f28e2c","#e15759","#76b7b2","#59a14f","#edc948","#b07aa1","#ff9da7","#9c755f","#bab0ac"]; // Generate Color Map const unique_labels = Array.from(new Set(scatter_array.map(d => d.Label))).sort(); const color_map = new Map(unique_labels.map((l, i) => [l, palette[i % palette.length]])); // B. SETUP CONTAINER const container = document.createElement("div"); container.style.height = "650px"; container.style.width = "100%"; container.style.borderRadius = "8px"; container.style.border = "1px solid #ccc"; container.style.zIndex = "1"; // C. INITIALIZE MAP const map = L.map(container).setView([40.7608, -111.8910], 9); L.tileLayer('https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}{r}.png', { attribution: '© OpenStreetMap © CARTO', maxZoom: 19 }).addTo(map); setTimeout(() => { map.invalidateSize(); }, 200); // --- HELPER FUNCTIONS --- function highlightSegment(segid) { if (!segid) return; const clean_segid = String(segid).trim(); segmentLayer.eachLayer(layer => { if (layer._segid === clean_segid) { layer.setStyle({ color: "#00e5ff", weight: 8, opacity: 1.0 }); layer.bringToFront(); } }); } function highlightStations(segid) { if (!segid) return; const clean_segid = String(segid).trim(); const target_stations = new Set(seg_to_stations.get(clean_segid) || []); stationLayer.eachLayer(layer => { if (target_stations.has(layer._station_id)) { layer.setStyle({ radius: 4, color: "#00e5ff", weight: 3, fillColor: "#00e5ff", fillOpacity: 1.0 }); layer.bringToFront(); } }); } function resetAll() { // This restores the original 'style' defined in L.geoJSON options segmentLayer.eachLayer(l => segmentLayer.resetStyle(l)); stationLayer.eachLayer(l => stationLayer.resetStyle(l)); } // D. ADD SEGMENTS (LINES) const filtered_segments = { type: "FeatureCollection", features: segments_geo.features.filter(f => { if (!f.properties.SEGID) return false; return valid_seg_ids.has(String(f.properties.SEGID).trim()); }) }; const segmentLayer = L.geoJSON(filtered_segments, { // --- DYNAMIC STYLE FUNCTION --- style: (feature) => { const segid = String(feature.properties.SEGID).trim(); const label = seg_label_map.get(segid); let finalColor = "#376092"; // Default Blue if (enable_color && label) { finalColor = color_map.get(label) || finalColor; } return { color: finalColor, weight: 4, opacity: 0.9 }; }, onEachFeature: (feature, layer) => { const segid = String(feature.properties.SEGID).trim(); layer._segid = segid; const stations = seg_to_stations.get(segid) || []; const st_label = stations.length > 0 ? stations.join(", ") : "None"; const group_label = seg_label_map.get(segid) || "N/A"; layer.bindPopup(` <div style="font-family:sans-serif; font-size:12px;"> <b>Segment: ${segid}</b><br> Group: ${group_label}<br> Connected Stations: ${st_label} </div> `); layer.on('mouseover', () => { layer.setStyle({ color: "#00e5ff", weight: 6, opacity: 1.0 }); layer.bringToFront(); highlightStations(segid); }); layer.on('mouseout', () => resetAll()); } }).addTo(map); // E. ADD STATIONS (POINTS) const filtered_stations = { type: "FeatureCollection", features: stations_geo.features.filter(f => { if (!f.properties.STATION) return false; return valid_station_ids.has(String(f.properties.STATION).trim()); }) }; const stationLayer = L.geoJSON(filtered_stations, { pointToLayer: (feature, latlng) => { const id = String(feature.properties.STATION).trim(); const stats = data_map.get(id); let finalColor = "#77933c"; // Default Green if (enable_color && stats) { finalColor = color_map.get(stats.Label) || finalColor; } return L.circleMarker(latlng, { radius: 4, fillColor: finalColor, color: "#1e1e1e", weight: 1, opacity: 0.9, fillOpacity: 0.9 }); }, onEachFeature: (feature, layer) => { const id = String(feature.properties.STATION).trim(); layer._station_id = id; const stats = data_map.get(id); layer.on('mouseover', () => highlightSegment(stats.MATCHED_SEGID)); layer.on('mouseout', () => resetAll()); if (stats) { const diffColor = stats.Pct_Error > 0 ? '#d9534f' : '#5cb85c'; layer.bindPopup(` <div style="font-family: sans-serif; min-width: 160px;"> <strong>Station ${id}</strong><br/> <hr style="margin:4px 0; border:0; border-top:1px solid #eee;"> Group: <b>${stats.Label}</b><br/> Observed: <b>${Math.round(stats.Obs_Total).toLocaleString()}</b><br/> Modeled: ${Math.round(stats.Mod_Total).toLocaleString()}<br/> Diff: <span style="color:${diffColor}; font-weight:bold;"> ${(stats.Pct_Error * 100).toFixed(1)}% </span><br/> <span style="color:#999; font-size:11px;">Seg: ${stats.MATCHED_SEGID}</span> </div> `); } } }).addTo(map); // F. FIT BOUNDS if (filtered_stations.features.length > 0) { setTimeout(() => { map.fitBounds(stationLayer.getBounds(), { padding: [50, 50] }); }, 250); } return container; } ```