diff --git a/how-to-use.qmd b/how-to-use.qmd index f47e61e..e9d530a 100644 --- a/how-to-use.qmd +++ b/how-to-use.qmd @@ -60,10 +60,20 @@ The two main files carrying the sample records themselves: | File | Size | Shape | Rows | Use when you need… | |---|---:|---|---:|---| -| [`isamples_202601_wide.parquet`](https://data.isamples.org/isamples_202601_wide.parquet) | 278 MB | Wide (one row per entity, nested relationships in `p__*` array columns) | 20 M | General entity queries, UI filtering, description text | +| [`current/wide.parquet`](https://data.isamples.org/current/wide.parquet) ∗ | 292 MB | Wide (one row per entity, nested relationships in `p__*` array columns) | 20 M | General entity queries, UI filtering, description text | | [`isamples_202601_wide_h3.parquet`](https://data.isamples.org/isamples_202601_wide_h3.parquet) | 292 MB | Wide + H3 BIGINT indices (`h3_res4`, `h3_res6`, `h3_res8`) | 20 M | Geospatial queries with H3 clustering at arbitrary zoom | | [`isamples_202512_narrow.parquet`](https://data.isamples.org/isamples_202512_narrow.parquet) | 820 MB | Narrow (graph: nodes + explicit `_edge_` rows, s/p/o/n fields) | 106 M | Graph traversals, relationship-centric analysis, PQG work | +∗ `/current/wide.parquet` is a stable alias that HTTP 302-redirects to the +latest dated file (currently +[`isamples_202604_wide.parquet`](https://data.isamples.org/isamples_202604_wide.parquet), +enriched with ~47 K OpenContext thumbnails). The dated filename is +immutable; the alias rotates atomically when we rebuild. Use the alias for +interactive work, the dated URL when you want a pinned, reproducible +reference. The original +[`isamples_202601_wide.parquet`](https://data.isamples.org/isamples_202601_wide.parquet) +(278 MB, no thumbnails) is kept available for historical pinning. + All three represent the same underlying data (SESAR + OpenContext + GEOME + Smithsonian) with identical semantics — they differ only in serialization strategy. See the @@ -123,7 +133,7 @@ import duckdb con = duckdb.connect() con.sql(""" SELECT source, COUNT(*) AS n - FROM read_parquet('https://data.isamples.org/isamples_202601_wide.parquet') + FROM read_parquet('https://data.isamples.org/current/wide.parquet') WHERE otype = 'MaterialSampleRecord' GROUP BY 1 ORDER BY 2 DESC """).df() diff --git a/tutorials/index.qmd b/tutorials/index.qmd index cc39228..a4e5bf8 100644 --- a/tutorials/index.qmd +++ b/tutorials/index.qmd @@ -28,7 +28,7 @@ All data is hosted on [`data.isamples.org`](https://data.isamples.org) with HTTP | File | Size | Description | |------|------|-------------| -| [Wide format](https://data.isamples.org/isamples_202601_wide.parquet) | 278 MB | One row per entity, all sources — primary file for tutorials | +| [Wide format](https://data.isamples.org/current/wide.parquet) | 292 MB | One row per entity, all sources — primary file for tutorials. Stable alias redirects to the current dated build (`isamples_YYYYMM_wide.parquet`). | | [Wide + H3](https://data.isamples.org/isamples_202601_wide_h3.parquet) | 292 MB | Wide format with H3 spatial indices for globe visualizations | | [Facet summaries](https://data.isamples.org/isamples_202601_facet_summaries.parquet) | 2 KB | Pre-computed filter counts — loads instantly | | [H3 clusters (res4)](https://data.isamples.org/isamples_202601_h3_summary_res4.parquet) | 0.6 MB | Zoomed-out globe view | diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index ccd5818..a3d1a44 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -82,8 +82,10 @@ duckdbModule = import("https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.28.0/+ // description fetch on click, no ORDER BY RANDOM(), lazy Cesium mount). explorerVersion = new URLSearchParams(location.search).get('v') === '2' ? 'v2' : 'v1' -// Data source configuration -wide_url = "https://data.isamples.org/isamples_202601_wide.parquet" +// Data source configuration. +// wide_url uses the /current/ alias so we pick up the latest enriched build +// (with OpenContext thumbnails); the alias 302-redirects to the dated file. +wide_url = "https://data.isamples.org/current/wide.parquet" lite_url = "https://data.isamples.org/isamples_202601_samples_map_lite.parquet" parquet_url = explorerVersion === 'v2' ? lite_url : wide_url diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index d0b0e83..aec008e 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -202,7 +202,9 @@ h3_res4_url = `${R2_BASE}/isamples_202601_h3_summary_res4.parquet` h3_res6_url = `${R2_BASE}/isamples_202601_h3_summary_res6.parquet` h3_res8_url = `${R2_BASE}/isamples_202601_h3_summary_res8.parquet` lite_url = `${R2_BASE}/isamples_202601_samples_map_lite.parquet` -wide_url = `${R2_BASE}/isamples_202601_wide.parquet` +// Stable alias that 302-redirects to the current enriched wide parquet +// (isamples_YYYYMM_wide.parquet). Gets OpenContext thumbnails populated. +wide_url = `${R2_BASE}/current/wide.parquet` facets_url = `${R2_BASE}/isamples_202601_sample_facets.parquet` facet_summaries_url = `${R2_BASE}/isamples_202601_facet_summaries.parquet` diff --git a/tutorials/zenodo_isamples_analysis.qmd b/tutorials/zenodo_isamples_analysis.qmd index 650630e..d3a499b 100644 --- a/tutorials/zenodo_isamples_analysis.qmd +++ b/tutorials/zenodo_isamples_analysis.qmd @@ -95,7 +95,7 @@ parquet_urls = [ 'https://data.isamples.org/isamples_202601_wide_h3.parquet', // Fallback: original wide format without H3 - 'https://data.isamples.org/isamples_202601_wide.parquet', + 'https://data.isamples.org/current/wide.parquet', // Fallback: older versions 'https://labs.dataunbound.com/docs/2025/07/isamples_export_2025_04_21_16_23_46_geo.parquet',