---
title: Traffic Assignment Validation
subtitle: Validating the Observed vs Modeled Traffic Volume
description: This notebook validates the traffic volume assigned by the Travel Demand Model against the observed traffic volume data captured by the Continuous Counting Stations (CCS).
author:
- name: Pukar Bhandari
email: pukar.bhandari@wfrc.utah.gov
affiliation:
- name: Wasatch Front Regional Council
url: "https://wfrc.utah.gov/"
date: "2025-12-24"
---
## Overview
This document validates the Travel Demand Model's traffic assignment by comparing modeled volumes against observed counts from Continuous Count Stations (CCS).
The process is divided into modular steps to support a "Minimal Documentation" workflow:
1. **Bridge Creation:** Mapping CCS Stations to Network Segments (Static).
2. **Observed Data:** Aggregating raw telemetry data (Static).
3. **Model Data:** Loading specific scenario results (Dynamic).
4. **Validation:** Merging the three sources for visualization.
## Environment Setup
### Import Standard Libraries
```{python}
# For Analysis
import numpy as np
import pandas as pd
import geopandas as gpd
# For Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import mapclassify
# misc
import os
import requests
from pathlib import Path
import importlib.util
from dotenv import load_dotenv
load_dotenv()
```
### Setup Directories & Variables
```{python}
CRS_UTM = "EPSG:26912"
# Define Output Paths
intermediate_dir = Path("_intermediate")
output_dir = Path("_output")
raw_dir = Path("_data/raw")
# Create directories if they don't exist
intermediate_dir.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)
raw_dir.mkdir(parents=True, exist_ok=True)
print("Directories validated.")
```
## Helper Functions
```{python}
def fetch_github(
url: str,
mode: str = "private",
token_env_var: str = "GITHUB_TOKEN"
) -> requests.Response:
"""
Fetch content from GitHub repositories.
Args:
url: GitHub raw URL (e.g., https://raw.githubusercontent.com/...)
mode: "public" for public repos, "private" for private repos requiring authentication
token_env_var: Name of environment variable containing GitHub token (default: GITHUB_TOKEN)
Returns:
requests.Response object
Raises:
ValueError: If token is missing for private mode or invalid mode
requests.HTTPError: If request fails
"""
# Validate mode
if mode not in ["public", "private"]:
raise ValueError(f"mode must be 'public' or 'private', got '{mode}'")
if mode == "public":
response = requests.get(url, timeout=30)
else:
token = os.getenv(token_env_var)
if not token:
raise ValueError(
f"GitHub token not found in environment variable '{token_env_var}'. "
f"Check your .env file has: {token_env_var}=your_token_here"
)
headers = {
'Authorization': f'token {token}',
'Accept': 'application/vnd.github.v3.raw'
}
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
return response
```
```{python}
# Create function to read ArcGIS FeatureLayer or Table
def arc_read(url, where="1=1", outFields="*", outSR=4326, **kwargs):
"""
Read an ArcGIS FeatureLayer or Table to a GeoDataFrame.
Parameters:
url (str): The ArcGIS REST service URL (e.g., ending in /FeatureServer/0)
where (str): SQL WHERE clause for filtering. Default: "1=1"
outFields (str): Comma-separated field names. Default: "*"
outSR (int): Output spatial reference EPSG code. Default: 4326
**kwargs: Additional query parameters passed to the ArcGIS REST API
Returns:
geopandas.GeoDataFrame: Spatial data from the service
"""
# Ensure URL ends with /query
if not url.endswith("/query"):
url = url.rstrip("/") + "/query"
# Build query parameters
params = {
"where": where,
"outFields": outFields,
"returnGeometry": "true",
"outSR": outSR,
"f": "geojson",
}
# Add any additional parameters
params.update(kwargs)
# Make request
response = requests.get(url, params=params)
response.raise_for_status() # Good practice to check for HTTP errors
# Read as GeoDataFrame
# We use io.BytesIO to handle the response content safely for read_file
import io
return gpd.read_file(io.BytesIO(response.content), engine="pyogrio")
```
## Step 1: Create Station-to-Network Bridge
**Objective:** Create a static reference file that links Station IDs to Network SEGIDs and attaches static network attributes (County, Functional Class, Truck %).
### Load Network (Input Shapefile)
We load the raw input shapefile. We do **not** filter by model results here, ensuring the bridge is independent of any specific model run.
```{python}
# Load Segments (Input)
# Source: https://github.com/WFRCAnalytics/WF-TDM-Documentation/tree/v10-calib/v10x/v1000/validation/data/5-assignhwy/seg_shp
gdf_segments = gpd.read_file(
raw_dir / "Segments/WFv1000_Segments.shp",
engine="pyogrio",
).to_crs(CRS_UTM)
# Ensure SEGID is string for consistent matching
gdf_segments["SEGID"] = gdf_segments["SEGID"].astype(str)
# Parse Route and Milepost from SEGID directly (Independent of Model Results)
temp_route = gdf_segments["SEGID"].str.split("_").str[0]
gdf_segments["MODEL_ROUTE"] = pd.to_numeric(temp_route, errors="coerce")
gdf_segments["MODEL_MP"] = gdf_segments["SEGID"].str.split("_").str[1].astype(float)
# Filter to valid network links for matching
gdf_network_clean = gdf_segments.dropna(subset=["MODEL_ROUTE", "MODEL_MP"]).copy()
gdf_network_clean["MODEL_ROUTE"] = gdf_network_clean["MODEL_ROUTE"].astype(int)
print(f"Loaded Network with {len(gdf_network_clean)} valid segments.")
```
### WF-TDM Region
```{python}
# Configuration
service_url = "https://services1.arcgis.com/taguadKoI1XFwivx/ArcGIS/rest/services/RegionalBoundaryComponents/FeatureServer/0"
gdb_path = raw_dir / "RegionalBoundary.gpkg"
# Download/Load logic
if not gdb_path.exists():
print("Downloading Regional Boundaries...")
gdf_download = arc_read(service_url)
gdf_download.to_file(gdb_path, layer="RegionalBoundaryComponents", driver="GPKG")
else:
print("Loading Regional Boundaries from cache...")
# Load and Project
gdf_region = (
gpd.read_file(
gdb_path,
layer="RegionalBoundaryComponents",
where="PlanOrg IN ('MAG MPO', 'WFRC MPO')",
)
.dissolve()
.to_crs(CRS_UTM)
)
# Fix the holes and slivers in the regional boundary
gdf_region["geometry"] = gdf_region.buffer(1).buffer(-1)
```
### Load CCS Stations
```{python}
# Configuration
service_url = "https://services.arcgis.com/pA2nEVnB6tquxgOW/ArcGIS/rest/services/CCS_Data_v2/FeatureServer/1"
gdb_path = raw_dir / "CCS_Data_v2.gpkg"
# Download/Load logic
if not gdb_path.exists():
print("Downloading CCS Stations...")
gdf_download = arc_read(service_url)
if "SITEID" in gdf_download.columns:
gdf_download["SITEID"] = gdf_download["SITEID"].astype("int32")
gdf_download.to_file(gdb_path, layer="CCS_Data_v2", driver="GPKG")
else:
print("Loading CCS Stations from cache...")
# Load and Project
gdf_ccs_locations = gpd.read_file(
gdb_path,
layer="CCS_Data_v2",
# where="COUNTY IN ('Box Elder', 'Weber', 'Davis', 'Salt Lake', 'Utah')",
mask=gdf_region,
).to_crs(CRS_UTM)
print(f"Total Stations Loaded (Region): {len(gdf_ccs_locations)}")
```
### Run Spatial Matching Logic
```{python}
# 1. Define FC Mapping (UDOT Station FC -> Model FUNCGROUP)
# Add or refine these mappings based on your data
fc_map = {
# Freeways
"Urban Principal Arterial - Interstate": "Freeway",
"Rural Principal Arterial - Interstate": "Freeway",
"Urban Principal Arterial - Other Freeways": "Freeway", # Default mapping
# Arterials
"Urban Principal Arterial - Other": "Arterial",
"Urban Minor Arterial": "Arterial",
"Rural Principal Arterial - Other": "Arterial",
"Rural Minor Arterial": "Arterial",
# Collector/Distributor
"Urban Major Collector": "CD Road",
"Urban Minor Collector": "CD Road",
"Urban Collector": "CD Road",
"Rural Major Collector": "CD Road",
"Urban Local System": "CD Road", # Fallback
}
```
```{python}
match_results = []
used_segids = set()
print("Starting Route & Group Constrained Spatial Matching...")
for idx, ccs_row in gdf_ccs_locations.iterrows():
station_id = ccs_row["STATION"]
target_route = ccs_row["ROUTE"]
# Identify the Target Group based on Station FC
# Default to "Total" if FC is missing or unknown
station_fc = ccs_row.get("FC", None)
target_group = fc_map.get(station_fc, "Total")
# --- PASS 1: Route-Constrained Spatial Match ---
route_segments = gdf_network_clean[gdf_network_clean["MODEL_ROUTE"] == target_route]
matched_segid = None
match_type = "No Match"
if not route_segments.empty:
# Calculate distance to all segments on this route
distances = route_segments.distance(ccs_row["geometry"])
# Pick the absolute closest segment on this route
best_idx = distances.idxmin()
best_match = route_segments.loc[best_idx]
matched_segid = best_match["SEGID"]
match_type = "Route Match (Spatial)"
# Add valid match to used set
if matched_segid:
used_segids.add(matched_segid)
match_results.append(
{
"STATION": station_id,
"MATCHED_SEGID": matched_segid,
"MATCHED_GROUP": target_group, # <--- NEW: Store the Target Group
"MATCH_TYPE": match_type,
"geometry": ccs_row["geometry"],
}
)
# --- PASS 2: Global Spatial Fallback (Unrestricted) ---
# (Same as your original code, but ensure we add MATCHED_GROUP = "Total" for fallbacks)
for i, result in enumerate(match_results):
if result["MATCHED_SEGID"] is None:
station_geom = result["geometry"]
available_segments = gdf_network_clean[
~gdf_network_clean["SEGID"].isin(used_segids)
]
if not available_segments.empty:
distances = available_segments.distance(station_geom)
min_dist_idx = distances.idxmin()
best_match = available_segments.loc[min_dist_idx]
match_results[i]["MATCHED_SEGID"] = best_match["SEGID"]
match_results[i]["MATCHED_GROUP"] = "Total" # Fallback is always Total
match_results[i]["MATCH_TYPE"] = "Global Fallback (Nearest)"
used_segids.add(best_match["SEGID"])
# Create DataFrame
df_matches = pd.DataFrame(match_results).drop(columns=["geometry"])
print("Matching Complete. Summary:")
print(df_matches["MATCH_TYPE"].value_counts())
```
### Enrich Bridge & Export
We attach the static network attributes here.
```{python}
# 1. Define Static Attributes from Network
static_cols = ["SEGID", "CO_FIPS", "SUTRUCKS", "CUTRUCKS"]
# 2. Map County Names
fips_map = {3: "Box Elder", 11: "Davis", 35: "Salt Lake", 49: "Utah", 57: "Weber"}
# Create a non-spatial attribute table
df_network_attrs = pd.DataFrame(gdf_network_clean[static_cols]).copy()
df_network_attrs["COUNTY_NAME"] = (
df_network_attrs["CO_FIPS"].map(fips_map).fillna("Other")
)
# 3. Rename Truck Columns for standardization
df_network_attrs = df_network_attrs.rename(
columns={"SUTRUCKS": "PCT_SUT", "CUTRUCKS": "PCT_CUT"}
)
# 4. Merge Matches with Attributes
df_bridge = df_matches.merge(
df_network_attrs[["SEGID", "COUNTY_NAME", "PCT_SUT", "PCT_CUT"]],
left_on="MATCHED_SEGID",
right_on="SEGID",
how="left",
)
# 5. Export to Intermediate
bridge_path = intermediate_dir / "station_segid_bridge.csv"
df_bridge.to_csv(bridge_path, index=False)
print(f"Step 1 Complete: Bridge exported to {bridge_path}")
```
## Step 2: Load Observed Counts
**Objective:** Fetch raw count data and aggregate it. This creates the "Fact Table A".
### Setup BigQuery
```{python}
# Setup BigQuery
response = fetch_github(
'https://raw.githubusercontent.com/WFRCAnalytics/Resources/refs/heads/master/2-Python/global-functions/BigQuery.py',
mode="public"
)
BigQuery = importlib.util.module_from_spec(importlib.util.spec_from_loader('BigQuery', loader=None))
exec(response.text, BigQuery.__dict__)
client = BigQuery.getBigQueryClient_Confidential2023UtahHTS()
```
### Query Continouos Count Data
```{python}
# Query
query = f"""
SELECT
STATION,
CAST(ROUND(AVG(AM_SUM)) AS INT64) as AVG_AM_VOL,
CAST(ROUND(AVG(MD_SUM)) AS INT64) as AVG_MD_VOL,
CAST(ROUND(AVG(PM_SUM)) AS INT64) as AVG_PM_VOL,
CAST(ROUND(AVG(EV_SUM)) AS INT64) as AVG_EV_VOL,
CAST(ROUND(AVG(DAILY_SUM)) AS INT64) as AVG_DY_VOL
FROM (
SELECT
STATION,
DATE_ONLY,
SUM(CASE WHEN HOUR BETWEEN 6 AND 8 THEN VOLUME ELSE 0 END) as AM_SUM,
SUM(CASE WHEN HOUR BETWEEN 9 AND 14 THEN VOLUME ELSE 0 END) as MD_SUM,
SUM(CASE WHEN HOUR BETWEEN 15 AND 17 THEN VOLUME ELSE 0 END) as PM_SUM,
SUM(CASE WHEN HOUR >= 18 OR HOUR <= 5 THEN VOLUME ELSE 0 END) as EV_SUM,
SUM(VOLUME) as DAILY_SUM
FROM `wfrc-modeling-data.src_udot_continuous_count_2023.data`
WHERE
DATE_ONLY BETWEEN '2023-09-01' AND '2023-11-15'
AND EXTRACT(DAYOFWEEK FROM DATE_ONLY) IN (3, 4, 5)
GROUP BY STATION, DATE_ONLY
)
GROUP BY STATION
ORDER BY STATION
"""
df_observed_counts = client.query(query).to_dataframe()
```
### Export Intermediate Data
```{python}
# Export
obs_path = intermediate_dir / "observed_counts_raw.csv"
df_observed_counts.to_csv(obs_path, index=False)
print(f"Step 2 Complete: Observed data exported to {obs_path}")
```
## Step 3: Load Model Results (Dynamic)
**Objective:** Load the specific scenario results ("Fact Table B").
```{python}
# Source: https://github.com/WFRCAnalytics/WF-TDM-Documentation/tree/v10-calib/v10x/v1000/validation/data/5-assignhwy
df_segid_detail = pd.read_csv(raw_dir / "WFv1000_BY_2023_Summary_SEGID_Detailed.csv")
# Filter for Bi-Directional Only
# REMOVED: (df_segid_detail["FUNCGROUP"] == "Total")
# We keep ALL groups (Freeway, Arterial, CD Road, Total)
df_model_clean = df_segid_detail[(df_segid_detail["DIRECTION"] == "Both")].copy()
# Ensure SEGID match format
df_model_clean["SEGID"] = df_model_clean["SEGID"].astype(str)
print(f"Step 3 Complete: Loaded {len(df_model_clean)} model records.")
```
## Step 4: Process & Validation (Integration)
**Objective:** Combine Bridge + Observed + Modeled into the final Dashboard CSV.
This step is designed to run independently if the CSVs exist.
### Export Observed Data
```{python}
# Bridge + Observed (Inner Join = Only Stations with Data)
df_valid_obs = df_bridge.merge(df_observed_counts, on="STATION", how="inner")
# Master Observed File Generated for validation
df_valid_obs.to_csv(output_dir / "observed_traffic_with_crosswalk.csv", index=False)
```
### Merge Data
```{python}
# --- 4.1 Load Intermediates (Resiliency Check) ---
# If this notebook is running top-to-bottom, we have the dfs.
# If running as a standalone script later, we load from CSV.
if "df_valid_obs" not in locals():
print("Loading Bridge from CSV...")
df_valid_obs = pd.read_csv(output_dir / "observed_traffic_with_crosswalk.csv")
```
```{python}
# --- 4.2 Merge Data ---
# B. Add Modeled Volumes AND Context Attributes
# We pull FTCLASS and ATYPENAME from the Model Results now
model_cols_needed = [
"SEGID",
"FUNCGROUP",
"AM_Vol",
"MD_Vol",
"PM_Vol",
"EV_Vol",
"DY_Vol",
"FTCLASS",
"ATYPENAME",
]
# Safety check: ensure these columns actually exist in the model output
available_cols = [c for c in model_cols_needed if c in df_model_clean.columns]
df_model_subset = df_model_clean[available_cols].copy()
# Join 1: Attempt to match exact FUNCGROUP (e.g., Station Freeway -> Model Freeway)
df_join_specific = df_valid_obs.merge(
df_model_subset,
left_on=["MATCHED_SEGID", "MATCHED_GROUP"],
right_on=["SEGID", "FUNCGROUP"],
how="inner",
)
# Identify which Stations were successfully matched
matched_stations = set(df_join_specific["STATION"])
# Filter remaining unmatched stations
df_remaining = df_valid_obs[~df_valid_obs["STATION"].isin(matched_stations)].copy()
# Join 2: Fallback to "Total" for remaining stations
# (e.g., Station is Arterial, but Model only has a 'Total' row for this simple segment)
df_remaining["FALLBACK_GROUP"] = "Total"
df_join_fallback = df_remaining.merge(
df_model_subset,
left_on=["MATCHED_SEGID", "FALLBACK_GROUP"],
right_on=["SEGID", "FUNCGROUP"],
how="inner",
)
# C. Combine Results
df_master = pd.concat([df_join_specific, df_join_fallback], ignore_index=True)
# Fill missing model volumes with 0 (assuming no flow if missing from summary)
vol_cols = ["AM_Vol", "MD_Vol", "PM_Vol", "EV_Vol", "DY_Vol"]
df_master[vol_cols] = df_master[vol_cols].fillna(0)
```
### Pivot & Calculate Vehicles
```{python}
# --- 4.3 Pivot & Calculate Vehicles ---
# Define ID variables (Dimensions)
id_vars = [
"STATION",
"MATCHED_SEGID",
"FTCLASS",
"ATYPENAME",
"COUNTY_NAME",
"PCT_SUT",
"PCT_CUT",
]
# A. Melt Observed (Wide to Long)
df_obs_long = df_master.melt(
id_vars=id_vars,
value_vars=["AVG_AM_VOL", "AVG_MD_VOL", "AVG_PM_VOL", "AVG_EV_VOL"],
var_name="PERIOD_KEY",
value_name="OBSERVED_TOTAL",
)
df_obs_long["PERIOD"] = df_obs_long["PERIOD_KEY"].str.split("_").str[1]
# B. Melt Modeled (Wide to Long)
df_mod_long = df_master.melt(
id_vars=id_vars,
value_vars=["AM_Vol", "MD_Vol", "PM_Vol", "EV_Vol"],
var_name="MOD_KEY",
value_name="MODELED_TOTAL",
)
df_mod_long["PERIOD"] = df_mod_long["MOD_KEY"].str.split("_").str[0]
# C. Merge Long Forms
df_period = pd.merge(
df_obs_long.drop(columns=["PERIOD_KEY"]),
df_mod_long.drop(columns=["MOD_KEY"]),
on=id_vars + ["PERIOD"],
how="left",
)
# D. Expand Vehicle Types (Auto vs Truck)
# Calculate Auto %
df_period["PCT_AUTO"] = 1.0 - (
df_period["PCT_SUT"].fillna(0) + df_period["PCT_CUT"].fillna(0)
)
# Melt Types
df_long = df_period.melt(
id_vars=[
"STATION",
"MATCHED_SEGID",
"FTCLASS",
"ATYPENAME",
"COUNTY_NAME",
"PERIOD",
"OBSERVED_TOTAL",
"MODELED_TOTAL",
],
value_vars=["PCT_AUTO", "PCT_SUT", "PCT_CUT"],
var_name="TYPE_KEY",
value_name="SHARE",
)
# Map Names
type_map = {"PCT_AUTO": "Auto", "PCT_SUT": "SUT", "PCT_CUT": "CUT"}
df_long["VEHICLE_TYPE"] = df_long["TYPE_KEY"].map(type_map)
# Calculate Final Volumes
df_long["OBSERVED"] = df_long["OBSERVED_TOTAL"] * df_long["SHARE"]
df_long["MODELED"] = df_long["MODELED_TOTAL"] * df_long["SHARE"]
```
### Final Cleanup & Export
```{python}
# --- 4.4 Final Cleanup & Export ---
cols_final = [
"STATION", "MATCHED_SEGID", "FTCLASS", "ATYPENAME", "COUNTY_NAME",
"PERIOD", "VEHICLE_TYPE", "OBSERVED", "MODELED"
]
df_final = df_long[cols_final].copy()
# Add Diff Stats for CSV readability
df_final["DIFF"] = df_final["MODELED"] - df_final["OBSERVED"]
df_final["PCT_DIFF"] = (df_final["DIFF"] / df_final["OBSERVED"]).round(3)
```
## Step 5: Export Final Output
```{python}
# --- 5.1 Export Final Data for OJS Plots ---
# Export
final_path = output_dir / "dashboard_data.csv"
df_final.to_csv(final_path, index=False)
print(f"Step 4 Complete: Dashboard data ready at {final_path}")
```
```{python}
# --- 5.2 Export Geometry for OJS Map ---
# 1. Identify Valid IDs from the Final Dashboard Data
valid_stations = df_final["STATION"].unique()
valid_segments = df_final["MATCHED_SEGID"].unique()
# 2. Filter Geometry AND Project to Lat/Lon (EPSG:4326)
# Leaflet requires EPSG:4326. Without this .to_crs(), points will not show up.
gdf_out_stations = gdf_ccs_locations[
gdf_ccs_locations["STATION"].isin(valid_stations)
].to_crs("EPSG:4326")
gdf_out_segments = gdf_network_clean[
gdf_network_clean["SEGID"].isin(valid_segments)
].to_crs("EPSG:4326")
# 3. Export to GeoJSON
gdf_out_stations.to_file(output_dir / "stations.geojson", driver="GeoJSON")
gdf_out_segments.to_file(output_dir / "segments.geojson", driver="GeoJSON")
print(f"Step 4 Complete: Map Geometry Exported.")
print(f" - Stations: {len(gdf_out_stations)}")
print(f" - Segments: {len(gdf_out_segments)}")
```
## Interactive Dashboard
```{python}
# | label: transfer-ojs
# | echo: false
# | eval: false
# Transfer the clean dataframe to OJS
ojs_define(data_py=df_final)
```
```{ojs}
//| label: ojs-dashboard
//| echo: false
// 1. IMPORT LIBRARIES
import { aq, op } from '@uwdata/arquero'
import { Plot } from '@observablehq/plot'
// 2. LOAD DATA
data_raw = FileAttachment("_output/dashboard_data.csv").csv({ typed: true })
// 3. DASHBOARD CONTROLS
viewof dashboard_ctrls = {
// 1. Define the Form Object (Same as before)
const form = Inputs.form({
group_by: Inputs.select(
["FTCLASS", "ATYPENAME", "COUNTY_NAME", "PERIOD", "VEHICLE_TYPE"],
{label: "Group Vars:", value: "FTCLASS"}
),
filter_county: Inputs.select(
["All"].concat(Array.from(new Set(data_raw.map(d => d.COUNTY_NAME))).sort()),
{label: "Filter County:", value: "All"}
),
filter_ftclass: Inputs.select(
["All"].concat(Array.from(new Set(data_raw.map(d => d.FTCLASS))).sort()),
{label: "Filter FTClass:", value: "All"}
),
filter_atype: Inputs.select(
["All"].concat(Array.from(new Set(data_raw.map(d => d.ATYPENAME))).sort()),
{label: "Filter AType:", value: "All"}
),
filter_period: Inputs.select(
["All", "AM", "MD", "PM", "EV"],
{label: "Filter Period:", value: "All"}
),
filter_vehicle: Inputs.select(
["All", "Auto", "SUT", "CUT"],
{label: "Filter Vehicle:", value: "All"}
)
});
// 2. Apply Custom CSS Layout
// This turns the container into a Grid with 3 flexible columns
form.style.display = "grid";
form.style.gridTemplateColumns = "repeat(auto-fit, minmax(250px, 1fr))";
form.style.gap = "10px";
form.style.alignItems = "flex-end"; // Aligns inputs to the bottom if labels vary in height
form.style.marginBottom = "20px"; // Adds spacing below the controls
return form;
}
// 4. DATA PROCESSING
dash_data = {
const { group_by, filter_county, filter_ftclass, filter_atype, filter_period, filter_vehicle } = dashboard_ctrls;
let base = aq.from(data_raw)
.params({
fc: filter_county,
ff: filter_ftclass, // <--- Param for FTCLASS
fa: filter_atype,
fp: filter_period,
fv: filter_vehicle,
gb: group_by
})
.filter((d, $) => $.fc === "All" || d.COUNTY_NAME === $.fc)
.filter((d, $) => $.ff === "All" || d.FTCLASS === $.ff) // <--- Filter Logic
.filter((d, $) => $.fa === "All" || d.ATYPENAME === $.fa)
.filter((d, $) => $.fp === "All" || d.PERIOD === $.fp)
.filter((d, $) => $.fv === "All" || d.VEHICLE_TYPE === $.fv)
// A. Scatter Data (Station Level)
let scatter = base
.groupby("STATION", "MATCHED_SEGID", group_by)
.rollup({
Obs_Total: op.sum('OBSERVED'),
Mod_Total: op.sum('MODELED')
})
.derive({
Label: (d, $) => d[$.gb],
Obs_K: d => d.Obs_Total / 1000,
Mod_K: d => d.Mod_Total / 1000,
Pct_Error: d => d.Obs_Total > 0 ? (d.Mod_Total - d.Obs_Total) / d.Obs_Total : 0
})
// B. Summary Data (Aggregate Level)
let summary = base
.groupby(group_by)
.rollup({
Stations: op.count(),
Obs_Total: op.sum('OBSERVED'),
Mod_Total: op.sum('MODELED'),
Diff_Sq_Sum: d => op.sum(op.pow(d.MODELED - d.OBSERVED, 2))
})
.derive({
Difference: d => d.Mod_Total - d.Obs_Total,
Percent_Difference: d => (d.Mod_Total - d.Obs_Total) / d.Obs_Total,
RMSE: d => Math.sqrt(d.Diff_Sq_Sum / d.Stations),
Percent_RMSE: d => d.Obs_Total > 0 ? Math.sqrt(d.Diff_Sq_Sum / d.Stations) / (d.Obs_Total / d.Stations) : 0,
Label: (d, $) => d[$.gb]
})
.orderby(group_by)
return { scatter, summary }
}
// 5. HELPER: TRENDLINES DATA
trendlines = {
const max_k = dash_data.scatter.rollup({ max: op.max("Mod_K") }).get("max") || 1;
const lines = [
{ slope: 1.4, label: "+40%" },
{ slope: 1.3, label: "+30%" },
{ slope: 1.2, label: "+20%" },
{ slope: 1.1, label: "+10%" },
{ slope: 1.0, label: "Equal" },
{ slope: 0.9, label: "-10%" },
{ slope: 0.8, label: "-20%" },
{ slope: 0.7, label: "-30%" },
{ slope: 0.6, label: "-40%" }
];
return lines.flatMap(l => [
{ x: 0, y: 0, label: l.label, slope: l.slope },
{ x: max_k, y: max_k * l.slope, label: l.label, slope: l.slope }
]);
}
// 6. HELPER: ERROR BOUNDS
error_bounds = {
const max_k = dash_data.scatter.rollup({ max: op.max("Obs_K") }).get("max") || 1;
const bounds = [
{ x: 0, y_pos: 2.0, y_neg: -2.0 },
{ x: 1, y_pos: 2.0, y_neg: -2.0 },
{ x: 1, y_pos: 1.0, y_neg: -1.0 },
{ x: 2.5, y_pos: 1.0, y_neg: -1.0 },
{ x: 2.5, y_pos: 0.5, y_neg: -0.5 },
{ x: 5, y_pos: 0.5, y_neg: -0.5 },
{ x: 5, y_pos: 0.25, y_neg: -0.25 },
{ x: 10, y_pos: 0.25, y_neg: -0.25 },
{ x: 10, y_pos: 0.20, y_neg: -0.20 },
{ x: 25, y_pos: 0.20, y_neg: -0.20 },
{ x: 25, y_pos: 0.15, y_neg: -0.15 },
{ x: 50, y_pos: 0.15, y_neg: -0.15 },
{ x: 50, y_pos: 0.10, y_neg: -0.10 },
{ x: Math.max(max_k, 250), y_pos: 0.10, y_neg: -0.10 }
];
return bounds;
}
// 7. VISUALIZATION
md`#### 1. Model vs Observed Volumes (000s)`
max_obs_k = dash_data.scatter.rollup({ max: op.max("Obs_K") }).get("max") || 1
max_mod_k = dash_data.scatter.rollup({ max: op.max("Mod_K") }).get("max") || 1
max_vol_k = Math.max(max_obs_k, max_mod_k)
Plot.plot({
height: 400,
aspectRatio: 1,
grid: true,
x: { label: "Observed Volume (000s)", domain: [0, max_vol_k * 1.05] },
y: { label: "Modeled Volume (000s)", domain: [0, max_vol_k * 1.05] },
style: { backgroundColor: 'transparent' },
color: { legend: true, label: dashboard_ctrls.group_by },
marks: [
Plot.line(trendlines, {
x: "x", y: "y", z: "slope",
stroke: "currentColor", strokeOpacity: 0.2, strokeDasharray: "4"
}),
Plot.text(trendlines.filter(d => d.x > 0), {
x: "x", y: "y", text: "label",
dx: 5, fill: "currentColor", fillOpacity: 0.5, textAnchor: "start"
}),
Plot.line(trendlines.filter(d => d.slope === 1.0), {
x: "x", y: "y", stroke: "currentColor", strokeOpacity: 0.5, strokeWidth: 1.5
}),
Plot.dot(dash_data.scatter, {
x: "Obs_K", y: "Mod_K", fill: "Label",
stroke: "white", strokeOpacity: 0.5, r: 4,
title: d => `Station: ${d.STATION}\nGroup: ${d.Label}\nObs: ${d.Obs_Total.toLocaleString()}\nMod: ${d.Mod_Total.toLocaleString()}\nError: ${(d.Pct_Error*100).toFixed(1)}%`
}),
Plot.linearRegressionY(dash_data.scatter, {
x: "Obs_K", y: "Mod_K", stroke: "rgb(80, 116, 230)",
strokeDasharray: "4 4", strokeWidth: 2
})
]
})
md`#### 2. Model vs Observed Percent Error`
Plot.plot({
height: 400,
grid: true,
x: { label: "Observed Volume (000s)", domain: [0, max_obs_k * 1.05] },
y: { label: "Percent Error", tickFormat: "+%", domain: [-2, 2] },
style: { backgroundColor: 'transparent' },
color: { legend: true, label: dashboard_ctrls.group_by },
marks: [
Plot.ruleY([0], { stroke: "currentColor", strokeWidth: 1.5, strokeOpacity: 0.5 }),
Plot.line(error_bounds, { x: "x", y: "y_pos", stroke: "gray", strokeWidth: 2, strokeOpacity: 0.4 }),
Plot.line(error_bounds, { x: "x", y: "y_neg", stroke: "gray", strokeWidth: 2, strokeOpacity: 0.4 }),
Plot.dot(dash_data.scatter, {
x: "Obs_K", y: "Pct_Error", fill: "Label",
stroke: "white", strokeOpacity: 0.5, r: 4,
title: d => `Station: ${d.STATION}\nObs: ${d.Obs_Total.toLocaleString()}\nError: ${(d.Pct_Error*100).toFixed(1)}%`
})
]
})
md`#### 3. Aggregate Comparison & Statistics`
bar_data = dash_data.summary.fold(["Obs_Total", "Mod_Total"], { as: ["Metric", "Volume"] })
.derive({ Metric_Label: d => d.Metric === "Obs_Total" ? "Observed" : "Modeled" })
Plot.plot({
marginLeft: 80,
height: Math.max(400, dash_data.summary.numRows() * 40),
color: { domain: ["Observed", "Modeled"], range: ["#77933c", "#376092"], legend: true, label: "Data Source" },
style: { backgroundColor: 'transparent' },
x: { label: "Total Volume", grid: true, tickFormat: "s" },
y: { axis: null },
fy: { label: null, axis: "left" },
marks: [
Plot.barX(bar_data, {
x: "Volume", y: "Metric_Label", fy: "Label", fill: "Metric_Label",
title: d => `${d.Metric_Label}: ${d.Volume.toLocaleString()}`,
sort: { fy: "x", reduce: "max", order: "descending" }
}),
Plot.text(bar_data, {
x: "Volume", y: "Metric_Label", fy: "Label",
text: d => d.Volume.toLocaleString(),
dx: 5, textAnchor: "start", fill: "currentColor", fontSize: 10
})
]
})
Inputs.table(dash_data.summary, {
columns: ["Label", "Stations", "Obs_Total", "Mod_Total", "Difference", "Percent_Difference", "RMSE", "Percent_RMSE"],
header: {
Label: dashboard_ctrls.group_by, Obs_Total: "Observed", Mod_Total: "Modeled",
Difference: "Diff", Percent_Difference: "Pct Diff", RMSE: "RMSE", Percent_RMSE: "Pct RMSE"
},
format: {
Obs_Total: d => d.toLocaleString(),
Mod_Total: d => d.toLocaleString(),
Difference: d => d.toLocaleString(),
Percent_Difference: d => (d * 100).toFixed(1) + "%",
RMSE: d => d.toLocaleString(undefined, {maximumFractionDigits: 0}),
Percent_RMSE: d => (d * 100).toFixed(1) + "%"
}
})
```
```{ojs}
//| label: ojs-map
//| echo: false
// 1. IMPORT LEAFLET
L = require("leaflet@1.9.4")
html`<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css">`
// 2. LOAD GEOMETRY
stations_geo = FileAttachment("_output/stations.geojson").json()
segments_geo = FileAttachment("_output/segments.geojson").json()
// 3. DRAW REACTIVE MAP
map_display = {
// A. PREPARE DATA
const scatter_array = dash_data.scatter.objects();
// Clean IDs
const valid_station_ids = new Set(scatter_array.map(d => String(d.STATION).trim()));
const valid_seg_ids = new Set(scatter_array.map(d => String(d.MATCHED_SEGID).trim()));
// Data Maps
const data_map = new Map(scatter_array.map(d => [String(d.STATION).trim(), d]));
// Connectivity & Attribute Maps
const seg_to_stations = new Map();
const seg_label_map = new Map(); // <--- New: Maps SEGID to its Group Label (e.g., "Arterial")
scatter_array.forEach(d => {
const s = String(d.MATCHED_SEGID).trim();
const st = String(d.STATION).trim();
// Build Connectivity
if(!seg_to_stations.has(s)) seg_to_stations.set(s, []);
seg_to_stations.get(s).push(st);
// Build Attribute Lookup (If segment has multiple stations, they usually share attributes)
seg_label_map.set(s, d.Label);
});
// --- COLOR LOGIC ---
const current_group = dashboard_ctrls.group_by;
// Only color by static attributes (skip Period/Vehicle Type)
const enable_color = ["FTCLASS", "ATYPENAME", "COUNTY_NAME"].includes(current_group);
const palette = ["#4e79a7","#f28e2c","#e15759","#76b7b2","#59a14f","#edc948","#b07aa1","#ff9da7","#9c755f","#bab0ac"];
// Generate Color Map
const unique_labels = Array.from(new Set(scatter_array.map(d => d.Label))).sort();
const color_map = new Map(unique_labels.map((l, i) => [l, palette[i % palette.length]]));
// B. SETUP CONTAINER
const container = document.createElement("div");
container.style.height = "650px";
container.style.width = "100%";
container.style.borderRadius = "8px";
container.style.border = "1px solid #ccc";
container.style.zIndex = "1";
// C. INITIALIZE MAP
const map = L.map(container).setView([40.7608, -111.8910], 9);
L.tileLayer('https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}{r}.png', {
attribution: '© OpenStreetMap © CARTO',
maxZoom: 19
}).addTo(map);
setTimeout(() => { map.invalidateSize(); }, 200);
// --- HELPER FUNCTIONS ---
function highlightSegment(segid) {
if (!segid) return;
const clean_segid = String(segid).trim();
segmentLayer.eachLayer(layer => {
if (layer._segid === clean_segid) {
layer.setStyle({ color: "#00e5ff", weight: 8, opacity: 1.0 });
layer.bringToFront();
}
});
}
function highlightStations(segid) {
if (!segid) return;
const clean_segid = String(segid).trim();
const target_stations = new Set(seg_to_stations.get(clean_segid) || []);
stationLayer.eachLayer(layer => {
if (target_stations.has(layer._station_id)) {
layer.setStyle({
radius: 4,
color: "#00e5ff",
weight: 3,
fillColor: "#00e5ff",
fillOpacity: 1.0
});
layer.bringToFront();
}
});
}
function resetAll() {
// This restores the original 'style' defined in L.geoJSON options
segmentLayer.eachLayer(l => segmentLayer.resetStyle(l));
stationLayer.eachLayer(l => stationLayer.resetStyle(l));
}
// D. ADD SEGMENTS (LINES)
const filtered_segments = {
type: "FeatureCollection",
features: segments_geo.features.filter(f => {
if (!f.properties.SEGID) return false;
return valid_seg_ids.has(String(f.properties.SEGID).trim());
})
};
const segmentLayer = L.geoJSON(filtered_segments, {
// --- DYNAMIC STYLE FUNCTION ---
style: (feature) => {
const segid = String(feature.properties.SEGID).trim();
const label = seg_label_map.get(segid);
let finalColor = "#376092"; // Default Blue
if (enable_color && label) {
finalColor = color_map.get(label) || finalColor;
}
return {
color: finalColor,
weight: 4,
opacity: 0.9
};
},
onEachFeature: (feature, layer) => {
const segid = String(feature.properties.SEGID).trim();
layer._segid = segid;
const stations = seg_to_stations.get(segid) || [];
const st_label = stations.length > 0 ? stations.join(", ") : "None";
const group_label = seg_label_map.get(segid) || "N/A";
layer.bindPopup(`
<div style="font-family:sans-serif; font-size:12px;">
<b>Segment: ${segid}</b><br>
Group: ${group_label}<br>
Connected Stations: ${st_label}
</div>
`);
layer.on('mouseover', () => {
layer.setStyle({ color: "#00e5ff", weight: 6, opacity: 1.0 });
layer.bringToFront();
highlightStations(segid);
});
layer.on('mouseout', () => resetAll());
}
}).addTo(map);
// E. ADD STATIONS (POINTS)
const filtered_stations = {
type: "FeatureCollection",
features: stations_geo.features.filter(f => {
if (!f.properties.STATION) return false;
return valid_station_ids.has(String(f.properties.STATION).trim());
})
};
const stationLayer = L.geoJSON(filtered_stations, {
pointToLayer: (feature, latlng) => {
const id = String(feature.properties.STATION).trim();
const stats = data_map.get(id);
let finalColor = "#77933c"; // Default Green
if (enable_color && stats) {
finalColor = color_map.get(stats.Label) || finalColor;
}
return L.circleMarker(latlng, {
radius: 4,
fillColor: finalColor,
color: "#1e1e1e",
weight: 1,
opacity: 0.9,
fillOpacity: 0.9
});
},
onEachFeature: (feature, layer) => {
const id = String(feature.properties.STATION).trim();
layer._station_id = id;
const stats = data_map.get(id);
layer.on('mouseover', () => highlightSegment(stats.MATCHED_SEGID));
layer.on('mouseout', () => resetAll());
if (stats) {
const diffColor = stats.Pct_Error > 0 ? '#d9534f' : '#5cb85c';
layer.bindPopup(`
<div style="font-family: sans-serif; min-width: 160px;">
<strong>Station ${id}</strong><br/>
<hr style="margin:4px 0; border:0; border-top:1px solid #eee;">
Group: <b>${stats.Label}</b><br/>
Observed: <b>${Math.round(stats.Obs_Total).toLocaleString()}</b><br/>
Modeled: ${Math.round(stats.Mod_Total).toLocaleString()}<br/>
Diff: <span style="color:${diffColor}; font-weight:bold;">
${(stats.Pct_Error * 100).toFixed(1)}%
</span><br/>
<span style="color:#999; font-size:11px;">Seg: ${stats.MATCHED_SEGID}</span>
</div>
`);
}
}
}).addTo(map);
// F. FIT BOUNDS
if (filtered_stations.features.length > 0) {
setTimeout(() => {
map.fitBounds(stationLayer.getBounds(), { padding: [50, 50] });
}, 250);
}
return container;
}
```