###################################################### ##### Streamlit app for X-MethaneWet exploration ##### ###################################################### from __future__ import annotations from pathlib import Path from typing import Tuple import matplotlib.pyplot as plt import numpy as np import pandas as pd import streamlit as st import xarray as xr from huggingface_hub import hf_hub_download REPO_ID = "ymsun99/X-MethaneWet" DATA_ROOT = Path("data/x-methanewet") FLUXNET_META_PATH = "FLUXNET-CH4/FLUXNET_CH4_2024.csv" FLUXNET_DAILY_PATH = "FLUXNET-CH4/FLUXNET_T1_DD.csv" FLUXNET_META_FILE = DATA_ROOT / FLUXNET_META_PATH FLUXNET_DAILY_FILE = DATA_ROOT / FLUXNET_DAILY_PATH TEM_YEARS = [1990, 2005, 2018] DEFAULT_SITE = "BW.Gum" @st.cache_data(show_spinner=False) def load_fluxnet_meta() -> pd.DataFrame: """Load FLUXNET site metadata.""" path = FLUXNET_META_FILE if FLUXNET_META_FILE.exists() else download_file( FLUXNET_META_PATH ) return pd.read_csv(path) @st.cache_data(show_spinner=False) def load_fluxnet_daily() -> pd.DataFrame: """Load FLUXNET daily methane data.""" path = FLUXNET_DAILY_FILE if FLUXNET_DAILY_FILE.exists() else download_file( FLUXNET_DAILY_PATH ) return pd.read_csv(path) @st.cache_resource(show_spinner=False) def load_tem_year_ds(year: int) -> xr.Dataset: """Load a TEM-MDM NetCDF file for a given year.""" filename = DATA_ROOT / f"TEM-MDM/CH4_emission_intensity_{year}.nc" if filename.exists(): return xr.open_dataset(filename) path = download_file(f"TEM-MDM/CH4_emission_intensity_{year}.nc") return xr.open_dataset(path) def normalize_site_id(site_id: str) -> str: """Normalize site IDs to match daily data format.""" return site_id.replace("-", ".") def download_file(filename: str) -> Path: """Download a dataset file from the HF Hub (used on Spaces).""" path = hf_hub_download(repo_id=REPO_ID, filename=filename, repo_type="dataset") return Path(path) def get_site_row(site_id: str) -> pd.Series: """Return the metadata row for a site.""" meta = load_fluxnet_meta() meta["SITE_ID_NORM"] = meta["SITE_ID"].astype(str).map(normalize_site_id) matches = meta.loc[meta["SITE_ID_NORM"] == site_id] if matches.empty: raise ValueError(f"Site not found in metadata: {site_id}") return matches.iloc[0] def get_site_latlon(site_id: str) -> Tuple[float, float]: """Return latitude and longitude for a site.""" row = get_site_row(site_id) return float(row["LOCATION_LAT"]), float(row["LOCATION_LONG"]) def get_site_elevation(site_id: str) -> float: """Return elevation (meters) for a site.""" row = get_site_row(site_id) return float(row["LOCATION_ELEV"]) def fluxnet_site_timeseries(site_id: str) -> Tuple[pd.Series, str]: """Return daily methane flux series for a site.""" site_id = normalize_site_id(site_id) daily_df = load_fluxnet_daily() site_col = "Site" if "Site" in daily_df.columns else "SITE_ID" flux_col = "FCH4_F_ANNOPTLM" site_df = daily_df.loc[daily_df[site_col] == site_id, ["TIMESTAMP", flux_col]].dropna() timestamps = pd.to_datetime(site_df["TIMESTAMP"], errors="coerce") site_df = site_df.loc[timestamps.notna()].copy() flux_series = pd.Series(site_df[flux_col].values, index=timestamps[timestamps.notna()]) return flux_series.sort_index(), flux_col def aggregate_series(series: pd.Series, agg: str) -> pd.Series: """Aggregate a time series to daily or monthly mean.""" if agg == "Daily": return series if agg == "Monthly mean": return series.resample("MS").mean() def nearest_grid_timeseries(ds: xr.Dataset, lat: float, lon: float) -> pd.Series: """Extract daily CH4 emission at the nearest grid cell.""" latitudes = ds["latitude"].values longitudes = ds["longitude"].values lon_val = lon if longitudes.max() > 180 and lon < 0: lon_val = lon + 360 lat_idx = int(np.abs(latitudes - lat).argmin()) lon_idx = int(np.abs(longitudes - lon_val).argmin()) emission_da = ds["CH4_emission"].isel(latitude=lat_idx, longitude=lon_idx) series = emission_da.stack(time=("month", "day")).to_series() date_index = pd.to_datetime( { "year": 2000, "month": series.index.get_level_values("month"), "day": series.index.get_level_values("day"), }, errors="coerce", ) series.index = date_index return series.dropna().sort_index() def plot_timeseries(series: pd.Series, title: str, y_label: str) -> plt.Figure: """Plot a single time series.""" figure, axis = plt.subplots(figsize=(10, 4)) axis.plot(series.index, series.values, color="#1e88e5", linewidth=1.2) axis.set_title(title) axis.set_xlabel("Time") axis.set_ylabel(y_label) axis.grid(True, linewidth=0.3, alpha=0.4) figure.tight_layout() return figure def plot_combined_timeseries( observed: pd.Series, simulated: pd.Series, site_id: str, aggregation: str ) -> plt.Figure: """Plot observed and simulated series on the same standardized scale.""" obs_z = (observed - observed.mean()) / observed.std() sim_z = (simulated - simulated.mean()) / simulated.std() figure, axis = plt.subplots(figsize=(10, 4)) axis.plot(obs_z.index, obs_z.values, color="#1e88e5", linewidth=1.2, label="Observed (z-score)") axis.plot(sim_z.index, sim_z.values, color="#e53935", linewidth=1.2, label="Simulated (z-score)") axis.set_title(f"Observed vs. simulated methane flux - {site_id} ({aggregation})") axis.set_xlabel("Time") axis.set_ylabel("Standardized flux (z-score)") axis.grid(True, linewidth=0.3, alpha=0.4) axis.legend(loc="upper right") figure.tight_layout() return figure def plot_spatial_slice(ds: xr.Dataset, month: int, day: int, lat: float, lon: float) -> plt.Figure: """Plot a spatial slice for a single day.""" emission_slice = ds["CH4_emission"].sel(month=month, day=day) latitudes = emission_slice["latitude"].values longitudes = emission_slice["longitude"].values emission_values = emission_slice.values vmin = np.nanpercentile(emission_values, 5) vmax = np.nanpercentile(emission_values, 95) figure, axis = plt.subplots(figsize=(10, 4)) image = axis.imshow( emission_values, origin="lower", extent=[ float(longitudes.min()), float(longitudes.max()), float(latitudes.min()), float(latitudes.max()), ], cmap="viridis", vmin=vmin, vmax=vmax, aspect="auto", ) axis.scatter([lon], [lat], c="#ff3d00", s=90, edgecolors="white", linewidth=1.5, zorder=3) axis.set_title(f"TEM-MDM spatial slice (month {month}, day {day})") axis.set_axis_off() figure.colorbar(image, ax=axis, fraction=0.03, pad=0.02, label="CH4_emission") figure.tight_layout() return figure def main() -> None: """Render the Streamlit app.""" st.set_page_config(page_title="X-MethaneWet Explorer", layout="wide") st.title("X-MethaneWet Explorer") hero_text = ( "Explore cross-scale methane emissions dynamics by comparing sparse site observations " "(FluxNet) with global gridded simulation data (TEM-MDM)." ) st.markdown( """ """, unsafe_allow_html=True, ) st.markdown(f'
{hero_text}
', unsafe_allow_html=True) meta = load_fluxnet_meta() sites = sorted(meta["SITE_ID"].dropna().map(normalize_site_id).unique().tolist()) default_index = sites.index(DEFAULT_SITE) if DEFAULT_SITE in sites else 0 with st.sidebar: st.header("Controls") st.caption("Select a site, choose a simulation year, then explore time-series and spatial context.") site_choice = st.selectbox( "FLUXNET site", sites, index=default_index, help="Observed methane flux site; the nearest simulation grid cell is used for comparison.", ) lat, lon = get_site_latlon(site_choice) elev = get_site_elevation(site_choice) st.markdown(f"**Coordinates:** {lat:.2f}, {lon:.2f} \n**Elevation:** {elev:.1f} m") aggregation = st.radio( "Aggregation", ["Daily", "Monthly mean"], help="Controls temporal smoothing for both observed and simulated series.", ) tem_year = st.selectbox( "TEM-MDM year", TEM_YEARS, index=2, help="Year for the simulated gridded emissions.", ) day_of_year = st.slider( "Day of year (slide to visualize shifts)", 1, 365, 196, help="Single-day spatial slice for the simulation grid (mapped to month/day).", ) flux_series, flux_col = fluxnet_site_timeseries(site_choice) flux_series_agg = aggregate_series(flux_series, aggregation) tem_dataset = load_tem_year_ds(int(tem_year)) simulated_series = nearest_grid_timeseries(tem_dataset, lat, lon) simulated_series.index = simulated_series.index.map( lambda dt: dt.replace(year=int(tem_year)) ) simulated_series_agg = aggregate_series(simulated_series, aggregation) st.markdown("### 1) Aligned time series (standardized)") st.markdown("Observed and simulated signals are standardized to compare seasonal shape and timing.") combined_fig = plot_combined_timeseries( flux_series_agg, simulated_series_agg, site_choice, aggregation ) st.pyplot(combined_fig) st.markdown("### 2) Spatial context (TEM-MDM grid)") date = pd.Timestamp("2001-01-01") + pd.Timedelta(days=int(day_of_year) - 1) spatial_fig = plot_spatial_slice( tem_dataset, int(date.month), int(date.day), float(lat), float(lon) ) st.pyplot(spatial_fig) if __name__ == "__main__": main()