USGS Study Package From Primitives¶
This notebook demonstrates the composable primitives in ras_commander.usgs for USGS gauge study assembly.
What You'll Learn:
- UsgsObservations — metadata, datasets, peak flow, summaries, gap analysis
- UsgsDrainageAreaComparison — multi-source area comparison with agreement status
- How to assemble a study report from primitives (notebook-local workflow)
Key Principle: Keep study packaging, readiness assessment, and report assembly in notebook or project code. The library exposes only composable primitives.
# Uncomment to install/upgrade ras-commander from pip
#!pip install --upgrade ras-commander
#Import the ras-commander package
from ras_commander import *
Development Mode¶
To test with a local development copy instead of the pip-installed package, convert this cell to code and convert the cell above to markdown:
import sys
from pathlib import Path
# Add repo root to path
notebook_dir = Path.cwd()
repo_root = notebook_dir.parent if notebook_dir.name == 'examples' else notebook_dir
if str(repo_root) not in sys.path:
sys.path.insert(0, str(repo_root))
from ras_commander import *
import json
from pathlib import Path
from ras_commander.usgs import (
UsgsObservations,
UsgsDrainageAreaComparison,
)
Gauge Metadata and Continuous Observations¶
Retrieve metadata and observation datasets for USGS gauge 05577500 (Sangamon River at Springfield, IL).
site_id = "05577500"
start_datetime = "2024-01-01"
end_datetime = "2024-12-31"
metadata = UsgsObservations.get_gauge_metadata(site_id)
print(f"Gauge: {metadata.get('station_name', 'N/A')}")
print(f"Site ID: {site_id}")
print(f"Drainage area: {metadata.get('drainage_area_sqmi', 'N/A')} sq mi")
print(f"Latitude: {metadata.get('latitude', 'N/A')}")
print(f"Longitude: {metadata.get('longitude', 'N/A')}")
assert isinstance(metadata, dict), "Metadata should be a dict"
assert 'drainage_area_sqmi' in metadata, "Should have drainage area"
print(f"\nMetadata keys: {sorted(metadata.keys())}")
continuous_flow = UsgsObservations.get_dataset(
site_id=site_id,
dataset_id="continuous_flow",
start_datetime=start_datetime,
end_datetime=end_datetime,
)
daily_flow = UsgsObservations.get_dataset(
site_id=site_id,
dataset_id="daily_flow",
start_datetime=start_datetime,
end_datetime=end_datetime,
)
print(f"Continuous flow: {len(continuous_flow)} records")
print(f"Daily flow: {len(daily_flow)} records")
assert len(continuous_flow) > 0 or len(daily_flow) > 0, "Should have flow data"
if not continuous_flow.empty:
print(f"\nContinuous flow columns: {list(continuous_flow.columns)}")
print(f"Date range: {continuous_flow.index.min()} to {continuous_flow.index.max()}")
if not daily_flow.empty:
print(f"\nDaily flow columns: {list(daily_flow.columns)}")
print(f"Date range: {daily_flow.index.min()} to {daily_flow.index.max()}")
Dataset Summary and Gap Analysis¶
Use summarize_dataset and analyze_gaps to build report-ready metadata for each dataset.
continuous_flow_summary = UsgsObservations.summarize_dataset(
dataset_id="continuous_flow",
frame=continuous_flow,
status="ok" if not continuous_flow.empty else "empty",
)
continuous_flow_gaps = UsgsObservations.analyze_gaps(
dataset_id="continuous_flow",
frame=continuous_flow,
)
print("=== Continuous Flow Summary ===")
for key, val in continuous_flow_summary.items():
print(f" {key}: {val}")
print("\n=== Continuous Flow Gap Analysis ===")
for key, val in continuous_flow_gaps.items():
print(f" {key}: {val}")
assert isinstance(continuous_flow_summary, dict), "Summary should be dict"
assert isinstance(continuous_flow_gaps, dict), "Gaps should be dict"
Peak Flow Data¶
Retrieve annual peak flow records — essential for flood frequency analysis and model calibration.
peak_flow = UsgsObservations.get_peak_flow_data(site_id)
print(f"Peak flow records: {len(peak_flow)}")
assert len(peak_flow) > 0, "Should have peak flow records for a major gauge"
print(f"Columns: {list(peak_flow.columns)}")
print(f"\nFirst 5 records:")
peak_flow.head()
Drainage Area Comparison¶
Compare up to 4 drainage area sources. Agreement status: - close (<=5%): Areas agree well - moderate (<=10%): Minor discrepancies - review (>10%): Needs investigation
drainage_area_review = UsgsDrainageAreaComparison.compare_areas(
gauge_area_sqmi=metadata.get("drainage_area_sqmi"),
official_basin_area_sqmi=125.8,
taudem_area_sqmi=124.9,
model_area_sqmi=126.4,
)
print(f"Reference: {drainage_area_review['reference_label']} "
f"({drainage_area_review['reference_area_sqmi']} sq mi)")
print(f"Agreement status: {drainage_area_review['agreement_status']}")
print(f"Max percent difference: {drainage_area_review['max_abs_percent_difference']:.2f}%")
assert isinstance(drainage_area_review, dict), "Should return dict"
assert 'agreement_status' in drainage_area_review, "Should have agreement_status"
assert drainage_area_review['provided_area_count'] >= 2, "Should compare multiple areas"
print("\n=== Per-Source Comparisons ===")
for key, comp in drainage_area_review['comparisons'].items():
ref_tag = " (REFERENCE)" if comp['is_reference'] else ""
pct = comp['difference_from_reference_percent']
pct_str = f"{pct:+.2f}%" if pct is not None else "--"
print(f" {comp['label']}: {comp['area_sqmi']:.1f} sq mi [{pct_str}]{ref_tag}")
Notebook-Local Report Assembly¶
This cell demonstrates how to assemble a complete study report from the primitives above. This is intentionally notebook-local workflow code — not part of the library's public API.
report = {
"study": {
"site_id": site_id,
"start_datetime": start_datetime,
"end_datetime": end_datetime,
},
"gauge_metadata": metadata,
"datasets": {
"continuous_flow": continuous_flow_summary,
"daily_flow": UsgsObservations.summarize_dataset(
dataset_id="daily_flow",
frame=daily_flow,
status="ok" if not daily_flow.empty else "empty",
),
"peak_flow": {"record_count": len(peak_flow), "status": "ok" if not peak_flow.empty else "empty"},
},
"data_gaps": {
"continuous_flow": continuous_flow_gaps,
"daily_flow": UsgsObservations.analyze_gaps(
dataset_id="daily_flow",
frame=daily_flow,
),
},
"drainage_area_review": drainage_area_review,
}
def _to_json_safe(value):
"""Recursively convert non-serializable types for JSON output."""
if isinstance(value, dict):
return {key: _to_json_safe(val) for key, val in value.items()}
if isinstance(value, (list, tuple)):
return [_to_json_safe(item) for item in value]
if hasattr(value, "tolist"):
try:
return _to_json_safe(value.tolist())
except Exception:
pass
if hasattr(value, "item"):
try:
return value.item()
except Exception:
pass
if hasattr(value, "isoformat"):
try:
return value.isoformat()
except Exception:
pass
return value
output_dir = Path("out") / f"USGS_{site_id}_study_package"
output_dir.mkdir(parents=True, exist_ok=True)
report_path = output_dir / "report.json"
report_path.write_text(json.dumps(_to_json_safe(report), indent=2), encoding="utf-8")
print(f"Report written to: {report_path}")
print(f"Report size: {report_path.stat().st_size:,} bytes")
print(f"\nDatasets covered: {list(report['datasets'].keys())}")
print(f"Drainage area agreement: {report['drainage_area_review']['agreement_status']}")
assert report_path.exists(), "Report file should exist"
roundtrip = json.loads(report_path.read_text(encoding='utf-8'))
assert roundtrip['study']['site_id'] == site_id, "Report roundtrip should preserve site_id"
print("\nAll assertions passed.")