From ebda8ff238a051ef9c3489922c57bfd19a04480e Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 27 May 2026 16:08:11 -0700 Subject: [PATCH 1/2] chore: remove datasets command Removing the datasets CLI surface for now. Deletes datasets.rs entirely and strips the Datasets variant from Commands, DatasetsCommands enum, the dispatch block in main.rs, and the stale cross-reference in the databases error message. --- src/command.rs | 86 ----------- src/databases.rs | 2 +- src/datasets.rs | 381 ----------------------------------------------- src/main.rs | 71 +-------- 4 files changed, 2 insertions(+), 538 deletions(-) delete mode 100644 src/datasets.rs diff --git a/src/command.rs b/src/command.rs index a8c33ef..8afc01e 100644 --- a/src/command.rs +++ b/src/command.rs @@ -8,23 +8,6 @@ pub enum Commands { command: Option, }, - /// Derived views — virtual SQL tables built from queries over your data - Datasets { - /// Dataset ID to show details - id: Option, - - /// Workspace ID (defaults to first workspace from login) - #[arg(long, short = 'w', global = true)] - workspace_id: Option, - - /// Output format (used with dataset ID) - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - - #[command(subcommand)] - command: Option, - }, - /// Execute a SQL query, or check status of a running query Query { /// SQL query string (omit when using a subcommand) @@ -444,75 +427,6 @@ pub enum JobsCommands { }, } -#[derive(Subcommand)] -pub enum DatasetsCommands { - /// List all datasets in a workspace - List { - /// Maximum number of results (default: 100, max: 1000) - #[arg(long)] - limit: Option, - - /// Pagination offset - #[arg(long)] - offset: Option, - - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, - - /// Create a derived view from a SQL query or saved query - Create { - /// SQL table name the dataset is addressable as (e.g. my_view) - #[arg(long)] - name: String, - - /// Human-readable display label - #[arg(long)] - description: Option, - - /// SQL query to create the dataset from - #[arg(long, conflicts_with = "query_id", required_unless_present = "query_id")] - sql: Option, - - /// Saved query ID to create the dataset from - #[arg(long, conflicts_with = "sql", required_unless_present = "sql")] - query_id: Option, - - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, - - /// Update a dataset's description and/or name - Update { - /// Dataset ID - id: String, - - /// New display label - #[arg(long)] - description: Option, - - /// New SQL table name (must be a valid identifier) - #[arg(long)] - name: Option, - - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, - - /// Refresh a dataset by re-running its source (URL fetch or saved query) and creating a new version - Refresh { - /// Dataset ID - id: String, - - /// Submit as a background job - #[arg(long)] - r#async: bool, - }, -} - #[derive(Subcommand)] pub enum WorkspaceCommands { /// List all workspaces diff --git a/src/databases.rs b/src/databases.rs index 15526dd..d43497b 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -231,7 +231,7 @@ fn upload_parquet_file(api: &ApiClient, path: &str) -> String { if !is_parquet_path(path) { eprintln!( "error: managed table loads require a parquet file (got '{}'). \ - Convert your data to parquet or use `hotdata datasets create` for CSV/JSON.", + Convert your data to parquet before loading.", path ); std::process::exit(1); diff --git a/src/datasets.rs b/src/datasets.rs deleted file mode 100644 index 735031e..0000000 --- a/src/datasets.rs +++ /dev/null @@ -1,381 +0,0 @@ -use crate::api::ApiClient; -use serde::{Deserialize, Serialize}; -use serde_json::json; - -#[derive(Deserialize, Serialize)] -struct Dataset { - id: String, - label: String, - #[serde(default = "default_schema")] - schema_name: String, - table_name: String, - created_at: String, - updated_at: String, -} - -fn default_schema() -> String { - "main".to_string() -} - -#[derive(Deserialize, Serialize)] -struct CreateResponse { - id: String, - label: String, - #[serde(default = "default_schema")] - schema_name: String, - table_name: String, -} - -#[derive(Deserialize)] -struct ListResponse { - datasets: Vec, - count: u64, - has_more: bool, -} - -#[derive(Deserialize, Serialize)] -struct Column { - name: String, - data_type: String, - nullable: bool, -} - -#[derive(Deserialize, Serialize)] -struct DatasetDetail { - id: String, - label: String, - schema_name: String, - table_name: String, - source_type: String, - created_at: String, - updated_at: String, - columns: Vec, -} - -#[derive(Deserialize, Serialize)] -struct UpdateResponse { - id: String, - label: String, - // Not currently in runtimedb's UpdateDatasetResponse; kept Optional so we - // print `full_name` only when the server actually returns the schema. - // Synthesizing "main" is wrong for sandbox-scoped datasets where - // schema_name == sandbox_id. - #[serde(default)] - schema_name: Option, - table_name: String, - #[serde(default)] - latest_version: Option, - #[serde(default)] - pinned_version: Option, - updated_at: String, -} - -fn create_dataset( - api: &ApiClient, - description: Option<&str>, - name: &str, - source: serde_json::Value, - format: &str, -) { - let label = description.unwrap_or(name); - let body = json!({ "table_name": name, "label": label, "source": source }); - - let (status, resp_body) = api.post_raw("/datasets", &body); - - if !status.is_success() { - use crossterm::style::Stylize; - eprintln!("{}", crate::util::api_error(resp_body).red()); - std::process::exit(1); - } - - let dataset: CreateResponse = match serde_json::from_str(&resp_body) { - Ok(v) => v, - Err(e) => { - eprintln!("error parsing response: {e}"); - std::process::exit(1); - } - }; - - use crossterm::style::Stylize; - match format { - "json" => println!("{}", serde_json::to_string_pretty(&dataset).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&dataset).unwrap()), - "table" => { - eprintln!("{}", "Dataset created".green()); - println!("id: {}", dataset.id); - println!("label: {}", dataset.label); - println!( - "full_name: datasets.{}.{}", - dataset.schema_name, dataset.table_name - ); - } - _ => unreachable!(), - } -} - -pub fn create_from_query(workspace_id: &str, sql: &str, description: Option<&str>, name: &str, format: &str) { - let api = ApiClient::new(Some(workspace_id)); - create_dataset(&api, description, name, json!({ "type": "sql_query", "sql": sql }), format); -} - -pub fn create_from_saved_query( - workspace_id: &str, - query_id: &str, - description: Option<&str>, - name: &str, - format: &str, -) { - let api = ApiClient::new(Some(workspace_id)); - create_dataset(&api, description, name, json!({ "type": "saved_query", "saved_query_id": query_id }), format); -} - -pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { - let api = ApiClient::new(Some(workspace_id)); - - let params = [ - ("limit", limit.map(|l| l.to_string())), - ("offset", offset.map(|o| o.to_string())), - ]; - let body: ListResponse = api.get_with_params("/datasets", ¶ms); - - match format { - "json" => println!("{}", serde_json::to_string_pretty(&body.datasets).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&body.datasets).unwrap()), - "table" => { - if body.datasets.is_empty() { - use crossterm::style::Stylize; - eprintln!("{}", "No datasets found.".dark_grey()); - } else { - let rows: Vec> = body - .datasets - .iter() - .map(|d| { - vec![ - d.id.clone(), - d.label.clone(), - format!("datasets.{}.{}", d.schema_name, d.table_name), - crate::util::format_date(&d.created_at), - ] - }) - .collect(); - crate::table::print(&["ID", "LABEL", "FULL NAME", "CREATED AT"], &rows); - } - if body.has_more { - let next = offset.unwrap_or(0) + body.count as u32; - use crossterm::style::Stylize; - eprintln!( - "{}", - format!( - "showing {} results — use --offset {next} for more", - body.count - ) - .dark_grey() - ); - } - } - _ => unreachable!(), - } -} - -pub fn get(dataset_id: &str, workspace_id: &str, format: &str) { - let api = ApiClient::new(Some(workspace_id)); - - let d: DatasetDetail = api.get(&format!("/datasets/{dataset_id}")); - - match format { - "json" => println!("{}", serde_json::to_string_pretty(&d).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&d).unwrap()), - "table" => { - let created_at = crate::util::format_date(&d.created_at); - let updated_at = crate::util::format_date(&d.updated_at); - println!("id: {}", d.id); - println!("label: {}", d.label); - println!("full_name: datasets.main.{}", d.table_name); - println!("source_type: {}", d.source_type); - println!("created_at: {created_at}"); - println!("updated_at: {updated_at}"); - if !d.columns.is_empty() { - println!(); - let rows: Vec> = d - .columns - .iter() - .map(|col| { - vec![ - col.name.clone(), - col.data_type.clone(), - col.nullable.to_string(), - ] - }) - .collect(); - crate::table::print(&["COLUMN", "DATA TYPE", "NULLABLE"], &rows); - } - } - _ => unreachable!(), - } -} - -pub fn update( - dataset_id: &str, - workspace_id: &str, - description: Option<&str>, - name: Option<&str>, - format: &str, -) { - if description.is_none() && name.is_none() { - eprintln!("error: provide at least one of --description or --name."); - std::process::exit(1); - } - - let api = ApiClient::new(Some(workspace_id)); - - let mut body = json!({}); - if let Some(d) = description { - body["label"] = json!(d); - } - if let Some(n) = name { - body["table_name"] = json!(n); - } - - let d: UpdateResponse = api.put(&format!("/datasets/{dataset_id}"), &body); - - use crossterm::style::Stylize; - eprintln!("{}", "Dataset updated".green()); - match format { - "json" => println!("{}", serde_json::to_string_pretty(&d).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&d).unwrap()), - "table" => { - println!("id: {}", d.id); - println!("label: {}", d.label); - match &d.schema_name { - Some(schema) => { - println!("full_name: datasets.{}.{}", schema, d.table_name); - } - None => { - println!("table_name: {}", d.table_name); - eprintln!( - "{}", - format!( - "(run `hotdata datasets {}` to see the qualified name)", - d.id - ) - .dark_grey() - ); - } - } - println!("updated_at: {}", crate::util::format_date(&d.updated_at)); - } - _ => unreachable!(), - } -} - -pub fn refresh(workspace_id: &str, dataset_id: &str, async_mode: bool) { - use crossterm::style::Stylize; - - let mut body = json!({ - "dataset_id": dataset_id, - }); - if async_mode { - body["async"] = json!(true); - } - - let api = ApiClient::new(Some(workspace_id)); - let (status, resp_body) = api.post_raw("/refresh", &body); - - if !status.is_success() { - eprintln!("{}", crate::util::api_error(resp_body).red()); - std::process::exit(1); - } - - let parsed: serde_json::Value = serde_json::from_str(&resp_body).unwrap_or_default(); - - if async_mode { - let job_id = parsed["id"].as_str().unwrap_or("unknown"); - println!("{}", "Dataset refresh submitted.".green()); - println!("job_id: {}", job_id); - println!( - "{}", - format!("Use 'hotdata jobs {}' to check status.", job_id).dark_grey() - ); - return; - } - - let id = parsed["id"].as_str().unwrap_or("unknown"); - let version = parsed["version"].as_i64().unwrap_or(0); - let dataset_status = parsed["status"].as_str().unwrap_or(""); - println!("{}", "Dataset refresh completed.".green()); - println!( - "{}", - format!(" id: {id}, version: {version}, status: {dataset_status}").dark_grey() - ); -} - -#[cfg(test)] -mod tests { - use super::*; - - /// Mirrors runtimedb's `UpdateDatasetResponse` (see runtimedb/src/http/models.rs). - /// The CLI must deserialize this exact shape — schema_name, source_type, - /// created_at, and columns are NOT in the response. If runtimedb's response - /// gains or loses fields, update this fixture in lockstep. - #[test] - fn update_response_deserializes_runtimedb_payload() { - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "url_test", - "table_name": "url_test", - "latest_version": 3, - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert_eq!(resp.id, "ds_abc123"); - assert_eq!(resp.label, "url_test"); - assert_eq!(resp.table_name, "url_test"); - // The server doesn't currently send schema_name, so we don't synthesize - // one — sandbox-scoped datasets live under datasets.., - // not datasets.main.*, and a fabricated "main" would mislead users. - assert!(resp.schema_name.is_none()); - assert_eq!(resp.latest_version, Some(3)); - assert!(resp.pinned_version.is_none()); - } - - #[test] - fn update_response_uses_schema_name_when_server_supplies_it() { - // Forward-compat: if runtimedb later includes schema_name, we use it. - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "x", - "schema_name": "sandbox_xyz", - "table_name": "x", - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert_eq!(resp.schema_name.as_deref(), Some("sandbox_xyz")); - } - - #[test] - fn update_response_handles_pinned_version() { - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "x", - "table_name": "x", - "latest_version": 5, - "pinned_version": 2, - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert_eq!(resp.pinned_version, Some(2)); - } - - #[test] - fn update_response_tolerates_missing_latest_version() { - // Defensive: treat latest_version as optional in case the server omits it. - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "x", - "table_name": "x", - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert!(resp.latest_version.is_none()); - } -} diff --git a/src/main.rs b/src/main.rs index e5cb8dc..669deb9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,6 @@ mod connections; mod connections_new; mod context; mod databases; -mod datasets; mod embedding_providers; mod indexes; mod jobs; @@ -27,7 +26,7 @@ use anstyle::AnsiColor; use clap::{Parser, builder::Styles}; use command::{ AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, ContextCommands, - DatabaseTablesCommands, DatabasesCommands, DatasetsCommands, EmbeddingProvidersCommands, + DatabaseTablesCommands, DatabasesCommands, EmbeddingProvidersCommands, IndexesCommands, JobsCommands, QueriesCommands, QueryCommands, ResultsCommands, SandboxCommands, SkillCommands, TablesCommands, WorkspaceCommands, }; @@ -195,74 +194,6 @@ fn main() { Some(AuthCommands::Status) => auth::status("default"), Some(AuthCommands::Logout) => auth::logout("default"), }, - Commands::Datasets { - id, - workspace_id, - output, - command, - } => { - let workspace_id = resolve_workspace(workspace_id); - if let Some(id) = id { - datasets::get(&id, &workspace_id, &output) - } else { - match command { - Some(DatasetsCommands::List { - limit, - offset, - output, - }) => datasets::list(&workspace_id, limit, offset, &output), - Some(DatasetsCommands::Create { - name, - description, - sql, - query_id, - output, - }) => { - if let Some(sql) = sql { - datasets::create_from_query( - &workspace_id, - &sql, - description.as_deref(), - &name, - &output, - ) - } else { - datasets::create_from_saved_query( - &workspace_id, - query_id.as_deref().unwrap_or_else(|| unreachable!("clap enforces --sql or --query-id")), - description.as_deref(), - &name, - &output, - ) - } - } - Some(DatasetsCommands::Update { - id, - description, - name, - output, - }) => datasets::update( - &id, - &workspace_id, - description.as_deref(), - name.as_deref(), - &output, - ), - Some(DatasetsCommands::Refresh { id, r#async }) => { - datasets::refresh(&workspace_id, &id, r#async) - } - None => { - use clap::CommandFactory; - let mut cmd = Cli::command(); - cmd.build(); - cmd.find_subcommand_mut("datasets") - .unwrap() - .print_help() - .unwrap(); - } - } - } - } Commands::Query { sql, workspace_id, From 4a45f5df6be3b040aad69654611cccc8c953d1ce Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 27 May 2026 16:28:40 -0700 Subject: [PATCH 2/2] feat: rename datasets command to views Renames the `hotdata datasets` CLI command to `hotdata views` with a new `src/views.rs` module. The command and all user-facing terminology (help text, output messages, SQL prefix `views.`, skill docs) now use "view" / "views". Server-side API paths remain unchanged (`/datasets`). - Add `src/views.rs` (renamed from deleted `datasets.rs`) - Add `Views` / `ViewsCommands` to `command.rs` - Wire dispatch in `main.rs` - Update README, SKILL.md, WORKFLOWS.md, DATA_MODEL.template.md, MODEL_BUILD.md across hotdata and hotdata-analytics skills --- README.md | 27 +- skills/hotdata-analytics/SKILL.md | 16 +- .../hotdata-analytics/references/WORKFLOWS.md | 24 +- skills/hotdata/SKILL.md | 77 ++-- .../hotdata/references/DATA_MODEL.template.md | 10 +- skills/hotdata/references/MODEL_BUILD.md | 10 +- skills/hotdata/references/WORKFLOWS.md | 69 ++-- src/command.rs | 86 ++++ src/main.rs | 71 +++- src/views.rs | 382 ++++++++++++++++++ 10 files changed, 651 insertions(+), 121 deletions(-) create mode 100644 src/views.rs diff --git a/README.md b/README.md index af968d5..5525399 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ API key priority (lowest to highest): config file → `HOTDATA_API_KEY` env var | `connections` | `list`, `create`, `refresh`, `new` | Manage connections | | `databases` | `list`, `create`, `delete`, `tables` | Managed databases (create and load tables via parquet) | | `tables` | `list` | List tables and columns | -| `datasets` | `list`, `create`, `update` | Manage uploaded datasets | +| `views` | `list`, `create`, `update`, `refresh` | Manage SQL-derived views | | `context` | `list`, `show`, `pull`, `push` | Workspace Markdown context (e.g. data model `DATAMODEL`) via the context API | | `query` | | Execute a SQL query | | `queries` | `list` | Inspect query run history | @@ -146,7 +146,7 @@ hotdata databases tables delete
[--schema public] - `create` registers a managed connection (`source_type: managed`) with no external credentials. Use `--table` to declare tables up front (required before `tables load` on the current API). - `tables load` uploads a **parquet** file (or uses a staged `upload_id` from `POST /v1/files`) and publishes it as the table generation (`replace` mode). -- For CSV/JSON uploads without a managed database, use `hotdata datasets create` instead (`datasets.main.*`). +- For SQL-query materializations without a managed database, use `hotdata views create` instead (`views.main.*`). Example: @@ -167,24 +167,19 @@ hotdata tables list [--workspace-id ] [--connection-id ] [--schema ..
` — use this format in SQL queries. -## Datasets +## Views ```sh -hotdata datasets list [--workspace-id ] [--limit ] [--offset ] [--format table|json|yaml] -hotdata datasets [--workspace-id ] [--format table|json|yaml] -hotdata datasets create --file data.csv [--label "My Dataset"] [--table-name my_dataset] -hotdata datasets create --sql "SELECT ..." --label "My Dataset" -hotdata datasets create --url "https://example.com/data.parquet" --label "My Dataset" -hotdata datasets update [--label "New Label"] [--table-name new_table] -hotdata datasets refresh [--workspace-id ] [--async] +hotdata views list [--workspace-id ] [--limit ] [--offset ] [--output table|json|yaml] +hotdata views [--workspace-id ] [--output table|json|yaml] +hotdata views create --name my_view [--description "My View"] (--sql "SELECT ..." | --query-id ) +hotdata views update [--description "New Label"] [--name new_table] +hotdata views refresh [--workspace-id ] [--async] ``` -- Datasets are queryable as `datasets.main.`. -- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive. -- `--url` imports data directly from a URL (supports csv, json, parquet). -- Format is auto-detected from file extension or content. -- Piped stdin is supported: `cat data.csv | hotdata datasets create --label "My Dataset"` -- `refresh` re-runs the dataset's source (URL fetch or saved query) and creates a new version. Not supported for upload-source datasets. +- Views are queryable as `views.main.`. +- `--sql` and `--query-id` are mutually exclusive; exactly one is required for `create`. +- `refresh` re-runs the view's source query and creates a new version. - `--async` submits the refresh as a background job and returns a job ID; poll with `hotdata jobs `. ## Workspace context diff --git a/skills/hotdata-analytics/SKILL.md b/skills/hotdata-analytics/SKILL.md index 66a22a3..b98a0fb 100644 --- a/skills/hotdata-analytics/SKILL.md +++ b/skills/hotdata-analytics/SKILL.md @@ -1,6 +1,6 @@ --- name: hotdata-analytics -description: Use this skill when the user wants OLAP-style SQL analytics in Hotdata — aggregations, GROUP BY, JOINs, reporting, exploratory queries, query run history, stored results, or materialized follow-up tables (Chain via datasets or managed databases). Activate for "analyze", "aggregate", "rollup", "pivot", "report", "metrics", "GROUP BY", "query history", "past queries", "query runs", "stored results", "materialize", "chain", "intermediate table", or sorted indexes for filters/range scans. Do not load for BM25/vector search or geospatial SQL — use hotdata-search or hotdata-geospatial. Requires the core hotdata skill for connections, tables, datasets, and auth. +description: Use this skill when the user wants OLAP-style SQL analytics in Hotdata — aggregations, GROUP BY, JOINs, reporting, exploratory queries, query run history, stored results, or materialized follow-up tables (Chain via views or managed databases). Activate for "analyze", "aggregate", "rollup", "pivot", "report", "metrics", "GROUP BY", "query history", "past queries", "query runs", "stored results", "materialize", "chain", "intermediate table", or sorted indexes for filters/range scans. Do not load for BM25/vector search or geospatial SQL — use hotdata-search or hotdata-geospatial. Requires the core hotdata skill for connections, tables, views, and auth. version: 0.3.2 --- @@ -8,7 +8,7 @@ version: 0.3.2 **OLAP-style analytics** in Hotdata: PostgreSQL-dialect SQL, query execution, run history, stored results, **Chain** materializations, and **sorted** indexes for filters and joins. -**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `datasets`, `databases`). +**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `views`, `databases`). **Related skills:** **`hotdata-search`** (BM25, vector, retrieval indexes), **`hotdata-geospatial`** (spatial SQL). @@ -23,7 +23,7 @@ hotdata query status [--output table|json|csv] - **PostgreSQL dialect.** Quote mixed-case identifiers: `"CustomerName"`. - Use **`hotdata tables list`** for schema discovery — not `information_schema` via `query`. -- Fully qualified names: `..
`, `datasets..
`, `..
`. +- Fully qualified names: `..
`, `views..
`, `..
`. - Long-running queries may return `query_run_id` → poll with **`query status`** (exit `2` = still running). Do not re-run identical heavy SQL while polling. - For **workspace-wide** joins and naming, load **context:DATAMODEL** when listed (`hotdata context list` → `show DATAMODEL`) — see **`hotdata`** skill. @@ -82,8 +82,8 @@ hotdata results [--workspace-id ] [--output table|json 2. **Materialize** (pick one) ```bash - hotdata datasets create --name chain_slice [--description "chain slice"] --sql "SELECT ..." - hotdata datasets create --name chain_from_saved [--description "from saved"] --query-id + hotdata views create --name chain_slice --description "chain slice" --sql "SELECT ..." + hotdata views create --name chain_from_saved --description "from saved" --query-id ``` Or managed parquet: @@ -94,10 +94,10 @@ hotdata results [--workspace-id ] [--output table|json hotdata databases tables load slice --file ./slice.parquet ``` -3. **Chain query** — use printed **`full_name`** or `datasets list` **FULL NAME** column: +3. **Chain query** — use printed **`full_name`** or `views list` **FULL NAME** column: ```bash - hotdata query "SELECT * FROM datasets.main.chain_slice WHERE ..." + hotdata query "SELECT * FROM views.main.chain_slice WHERE ..." hotdata query "SELECT * FROM analytics.public.slice WHERE ..." ``` @@ -122,4 +122,4 @@ List and delete use the same `hotdata indexes` commands as in the search skill; ## Sandboxes and chains -Sandbox datasets use **`datasets..
`**, not `datasets.main`. Run queries with active sandbox config or `hotdata sandbox run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**. +Sandbox views use **`views..
`**, not `views.main`. Run queries with active sandbox config or `hotdata sandbox run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**. diff --git a/skills/hotdata-analytics/references/WORKFLOWS.md b/skills/hotdata-analytics/references/WORKFLOWS.md index 0a11385..affeffe 100644 --- a/skills/hotdata-analytics/references/WORKFLOWS.md +++ b/skills/hotdata-analytics/references/WORKFLOWS.md @@ -2,7 +2,7 @@ OLAP-style SQL, **History** (query runs and stored results), and **Chain** (materialized follow-ups). Requires **`hotdata`** for auth, workspaces, and catalog commands. -**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for datasets vs managed databases. +**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for views vs managed databases. --- @@ -66,11 +66,11 @@ hotdata query "SELECT ..." Land a smaller table — pick one: -**Datasets** (CSV/JSON/URL/SQL snapshot → `datasets..
`): +**Views** (SQL snapshot → `views..
`): ```bash -hotdata datasets create --label "chain revenue slice" --sql "SELECT ..." [--table-name chain_revenue_slice] -hotdata datasets create --label "from saved" --query-id [--table-name ...] +hotdata views create --name chain_revenue_slice --description "chain revenue slice" --sql "SELECT ..." +hotdata views create --name chain_from_saved --description "from saved" --query-id ``` **Managed database** (parquet → `..
`): @@ -80,17 +80,17 @@ hotdata databases create --name chain_db --table revenue_slice hotdata databases tables load chain_db revenue_slice --file ./revenue_slice.parquet ``` -Note the printed **`full_name`** (e.g. `datasets.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For datasets, **`FULL NAME`** from `datasets list` is authoritative. +Note the printed **`full_name`** (e.g. `views.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For views, **`FULL NAME`** from `views list` is authoritative. ### 3. Chain query -Query using that name — do not hardcode `datasets.main` if the schema segment is a sandbox id: +Query using that name — do not hardcode `views.main` if the schema segment is a sandbox id: ```bash -hotdata datasets list -hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..." +hotdata views list +hotdata query "SELECT * FROM views.main.chain_revenue_slice WHERE ..." # Sandbox example (use actual full_name from create or list): -# hotdata query "SELECT * FROM datasets.s_ufmblmvq.chain_revenue_slice WHERE ..." +# hotdata query "SELECT * FROM views.s_ufmblmvq.chain_revenue_slice WHERE ..." # Managed database: # hotdata query "SELECT * FROM chain_db.public.revenue_slice WHERE ..." ``` @@ -99,18 +99,18 @@ hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..." For **sandbox-scoped** chain tables: -- Qualified name is **`datasets..
`**, not `datasets.main`. +- Qualified name is **`views..
`**, not `views.main`. - Run queries with **active sandbox** in config (`hotdata sandbox set`) **or** inside **`hotdata sandbox run hotdata query "…"`**. - Without sandbox context, you may get **access denied** on sandbox-only tables. ### Naming and documentation - Prefer predictable `--table-name` values: `chain__`. -- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`datasets.…` or `database.schema.table`). +- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`views.…` or `database.schema.table`). - Promote join/grain findings to **context:DATAMODEL** when they should outlive the sandbox (**`hotdata`** skill). ### Guardrails - Materialize when the base scan is large and the follow-up runs many times. - Keep Chain tables focused; avoid wide `SELECT *` materializations when a narrow projection suffices. -- For upload format choice (datasets vs databases), see **`hotdata`** WORKFLOWS — [Datasets vs managed databases](../../hotdata/references/WORKFLOWS.md#datasets-vs-managed-databases). +- For source format choice (views vs databases), see **`hotdata`** WORKFLOWS — [Views vs managed databases](../../hotdata/references/WORKFLOWS.md#views-vs-managed-databases). diff --git a/skills/hotdata/SKILL.md b/skills/hotdata/SKILL.md index ef45914..0bec6df 100644 --- a/skills/hotdata/SKILL.md +++ b/skills/hotdata/SKILL.md @@ -1,6 +1,6 @@ --- name: hotdata -description: Use this skill when the user wants to run core hotdata CLI commands — auth, workspaces, connections, managed databases, datasets, tables, basic SQL query, sandboxes, database context (context:DATAMODEL), jobs, and skill install. Activate for "run hotdata", "list workspaces", "list connections", "create a connection", "list databases", "managed database", "load parquet", "list tables", "list datasets", "create a dataset", "execute a query", "list sandboxes", "database context", "context:DATAMODEL", or general Hotdata CLI usage. For full-text/vector search and retrieval indexes use hotdata-search; for OLAP analytics, query history, stored results, and Chain materializations use hotdata-analytics; for geospatial/GIS use hotdata-geospatial. +description: Use this skill when the user wants to run core hotdata CLI commands — auth, workspaces, connections, managed databases, views, tables, basic SQL query, sandboxes, database context (context:DATAMODEL), jobs, and skill install. Activate for "run hotdata", "list workspaces", "list connections", "create a connection", "list databases", "managed database", "load parquet", "list tables", "list views", "create a view", "execute a query", "list sandboxes", "database context", "context:DATAMODEL", or general Hotdata CLI usage. For full-text/vector search and retrieval indexes use hotdata-search; for OLAP analytics, query history, stored results, and Chain materializations use hotdata-analytics; for geospatial/GIS use hotdata-geospatial. version: 0.3.2 --- @@ -20,7 +20,7 @@ Install all skills with **`hotdata skills install`**. Load specialized skills on | Skill | Use for | |-------|---------| -| **`hotdata`** (this file) | Auth, workspaces, connections, databases, datasets, tables, basic `query`, context, sandboxes, jobs | +| **`hotdata`** (this file) | Auth, workspaces, connections, databases, views, tables, basic `query`, context, sandboxes, jobs | | **`hotdata-search`** | BM25, vector search, `hotdata search`, bm25/vector indexes, embedding providers | | **`hotdata-analytics`** | OLAP SQL, aggregations, query/results history, Chain materializations, sorted indexes | | **`hotdata-geospatial`** | PostGIS-style `ST_*`, WKB, spatial joins | @@ -82,15 +82,15 @@ Use [references/DATA_MODEL.template.md](references/DATA_MODEL.template.md) and [ These are **patterns** built from the commands below—not separate CLI subcommands: -- **Model (`context:DATAMODEL`)** — The **shared** Markdown semantic map of the active database (entities, keys, joins across connections). **Store and read it only via database context** (`hotdata context list`, then `hotdata context show DATAMODEL` **only when listed**, `context push DATAMODEL`); refresh using `connections`, `connections refresh`, `tables list`, and `datasets list`. For a **deep** pass (connector enrichment, indexes, per-table detail), see [references/MODEL_BUILD.md](references/MODEL_BUILD.md). Contrast **analysis modeling** in sandboxes or chat (see [Analysis modeling vs context:DATAMODEL](#analysis-modeling-vs-contextdatamodel)). +- **Model (`context:DATAMODEL`)** — The **shared** Markdown semantic map of the active database (entities, keys, joins across connections). **Store and read it only via database context** (`hotdata context list`, then `hotdata context show DATAMODEL` **only when listed**, `context push DATAMODEL`); refresh using `connections`, `connections refresh`, `tables list`, and `views list`. For a **deep** pass (connector enrichment, indexes, per-table detail), see [references/MODEL_BUILD.md](references/MODEL_BUILD.md). Contrast **analysis modeling** in sandboxes or chat (see [Analysis modeling vs context:DATAMODEL](#analysis-modeling-vs-contextdatamodel)). - **History / Chain / OLAP SQL** — See **`hotdata-analytics`** and [references/WORKFLOWS.md](references/WORKFLOWS.md). - **Search / retrieval indexes** — See **`hotdata-search`**. -Catalog, skill decision tree, epic flows (onboard, chain, retrieval), datasets vs databases, and sandbox procedures: [references/WORKFLOWS.md](references/WORKFLOWS.md). +Catalog, skill decision tree, epic flows (onboard, chain, retrieval), views vs databases, and sandbox procedures: [references/WORKFLOWS.md](references/WORKFLOWS.md). ## Available Commands -Top-level subcommands (each detailed below): **`auth`**, **`datasets`**, **`query`**, **`workspaces`**, **`connections`**, **`databases`**, **`tables`**, **`skills`**, **`results`**, **`jobs`**, **`indexes`**, **`embedding-providers`**, **`search`**, **`queries`**, **`sandbox`**, **`context`**, **`completions`**. Search, indexes (bm25/vector), and embedding providers are documented in **`hotdata-search`**; query history, results, Chain, and OLAP patterns in **`hotdata-analytics`**. +Top-level subcommands (each detailed below): **`auth`**, **`views`**, **`query`**, **`workspaces`**, **`connections`**, **`databases`**, **`tables`**, **`skills`**, **`results`**, **`jobs`**, **`indexes`**, **`embedding-providers`**, **`search`**, **`queries`**, **`sandbox`**, **`context`**, **`completions`**. Search, indexes (bm25/vector), and embedding providers are documented in **`hotdata-search`**; query history, results, Chain, and OLAP patterns in **`hotdata-analytics`**. Global CLI options: **`--api-key`**, **`-v` / `--version`**, **`-h` / `--help`**. Hidden developer flag: **`--debug`** (verbose HTTP logs). @@ -181,7 +181,7 @@ hotdata connections create \ **Managed databases** are Hotdata-owned catalogs you create and populate yourself — no remote source to sync. Query them in SQL as **`..
`**. Prefer **`hotdata databases`** for this workflow. -**Parquet vs datasets:** `databases tables load` accepts **parquet only**. For SQL-query or saved-query materializations, use **`hotdata datasets create`**. +**Parquet vs views:** `databases tables load` accepts **parquet only**. For SQL-query or saved-query materializations, use **`hotdata views create`**. **Active database:** `hotdata databases set ` saves the active database to config. All `databases tables` subcommands and all `context` commands default to the active database; pass **`--database `** to override per-command. @@ -231,63 +231,62 @@ hotdata tables list [--workspace-id ] [--connection-id ] [--limit ] [--offset ] [--output table|json|yaml] +hotdata views list [--workspace-id ] [--limit ] [--offset ] [--output table|json|yaml] ``` - Default format is `table`. -- Returns `id`, `label`, and `created_at`; table output includes a **`FULL NAME`** column (`datasets..
`). +- Returns `id`, `label`, and `created_at`; table output includes a **`FULL NAME`** column (`views..
`). - Results are paginated (default 100). Use `--offset` to fetch further pages. -- **There is no filter for “this sandbox only.”** `datasets list` always returns **all** datasets in the workspace. To tell sandbox-scoped datasets from workspace-wide ones, read **`FULL NAME`**: the middle segment is the sandbox id (e.g. `datasets.s_ufmblmvq.tac_csat`) for sandbox data, and usually **`main`** (e.g. `datasets.main.my_table`) for ordinary uploads. +- **There is no filter for “this sandbox only.”** `views list` always returns **all** views in the workspace. To tell sandbox-scoped views from workspace-wide ones, read **`FULL NAME`**: the middle segment is the sandbox id (e.g. `views.s_ufmblmvq.tac_csat`) for sandbox data, and usually **`main`** (e.g. `views.main.my_table`) for ordinary views. -#### Get dataset details +#### Get view details ``` -hotdata datasets [--workspace-id ] [--output table|json|yaml] +hotdata views [--workspace-id ] [--output table|json|yaml] ``` -- Shows dataset metadata and a full column listing with `name`, `data_type`, `nullable`. +- Shows view metadata and a full column listing with `name`, `data_type`, `nullable`. - Use this to inspect schema before querying. -- For the **qualified SQL name**, prefer **`FULL NAME` from `datasets list`** or the **`full_name` printed by `datasets create`**—especially for sandbox datasets, where the schema is **`datasets.`**, not `datasets.main`. +- For the **qualified SQL name**, prefer **`FULL NAME` from `views list`** or the **`full_name` printed by `views create`**—especially for sandbox views, where the schema is **`views.`**, not `views.main`. -#### Update a dataset +#### Update a view ``` -hotdata datasets update [--description
` and active sandbox or `hotdata sandbox run …` +5. [ ] (Sandbox) Use `views..
` and active sandbox or `hotdata sandbox run …` 6. [ ] Record stable chains in **context:DATAMODEL** when they should outlive the session **Detail:** [hotdata-analytics WORKFLOWS — Chain](../../hotdata-analytics/references/WORKFLOWS.md#chain) @@ -80,38 +80,37 @@ End-to-end checklists. Use the linked sections for command detail and guardrails --- -## Datasets vs managed databases +## Views vs managed databases -Both land queryable tables in the workspace; the path depends on **format** and **how you want to name tables in SQL**. +Both land queryable tables in the workspace; the path depends on **source** and **how you want to name tables in SQL**. -| | **Datasets** | **Managed databases** | -|---|-------------|------------------------| -| **Best for** | CSV, JSON, URL import, stdin, SQL/query snapshot | Parquet files you own; catalog-style `name.schema.table` | -| **SQL prefix** | `datasets..
` (often `datasets.main.*`) | `..
` (database = connection name) | -| **CLI** | `hotdata datasets create` | `hotdata databases create` + `databases tables load` | +| | **Views** | **Managed databases** | +|---|-----------|------------------------| +| **Best for** | SQL/query snapshot | Parquet files you own; catalog-style `name.schema.table` | +| **SQL prefix** | `views..
` (often `views.main.*`) | `..
` (database = connection name) | +| **CLI** | `hotdata views create` | `hotdata databases create` + `databases tables load` | | **Declare schema up front** | No | Yes — `--table` on create (required before load on current API) | -| **Parquet** | Yes (`--file`, `--url`, `--upload-id`) | **Only** parquet on `tables load` | -| **Refresh upstream** | `datasets refresh` (URL/query sources) | Replace via `tables load` again | +| **Parquet** | No | **Only** parquet on `tables load` | +| **Refresh upstream** | `views refresh` (query sources) | Replace via `tables load` again | -**Rule of thumb:** CSV/JSON or “upload a file from a URL” → **datasets**. Parquet catalog you control as **`mydb.public.orders`** → **databases**. +**Rule of thumb:** SQL-query snapshot → **views**. Parquet catalog you control as **`mydb.public.orders`** → **databases**. -### Workflow: dataset upload and query +### Workflow: view creation and query 1. Authenticate and set workspace (`hotdata auth`, `hotdata workspaces set` if needed). -2. Create the dataset (one source): +2. Create the view: ```bash - hotdata datasets create --label "Orders" --file ./orders.csv - # or: --url "https://example.com/orders.parquet" - # or: --sql "SELECT ..." # materialize from a query + hotdata views create --name orders --sql “SELECT ...” + # or: --query-id # materialize from a saved query ``` -3. Note the printed **`full_name`** (e.g. `datasets.main.orders`) — do not assume `datasets.main`. -4. Inspect if needed: `hotdata datasets list`, `hotdata datasets `. +3. Note the printed **`full_name`** (e.g. `views.main.orders`) — do not assume `views.main`. +4. Inspect if needed: `hotdata views list`, `hotdata views `. 5. Query: ```bash - hotdata query "SELECT count(*) FROM datasets.main.orders" + hotdata query “SELECT count(*) FROM views.main.orders” ``` ### Workflow: managed database (parquet) @@ -137,7 +136,7 @@ Both land queryable tables in the workspace; the path depends on **format** and hotdata query "SELECT count(*) FROM sales.public.orders" ``` -For **Chain** materializations into datasets or databases, see **`hotdata-analytics`**. +For **Chain** materializations into views or databases, see **`hotdata-analytics`**. --- @@ -165,8 +164,8 @@ hotdata connections list hotdata connections refresh # after DDL / stale remote metadata hotdata tables list hotdata tables list --connection-id -hotdata datasets list -hotdata datasets +hotdata views list +hotdata views hotdata databases list ``` @@ -174,24 +173,24 @@ Use `hotdata tables list` for discovery; do not query `information_schema` for t --- -## Sandboxes and datasets +## Sandboxes and views -Use this when work is isolated in a **sandbox** (exploratory runs, ephemeral datasets). +Use this when work is isolated in a **sandbox** (exploratory runs, ephemeral views). -**Active sandbox vs `sandbox run`:** After `sandbox new` or `sandbox set`, run **`datasets create`**, **`query`**, etc. **directly**. **`sandbox run `** (no id before `run`) **always creates a new sandbox**. +**Active sandbox vs `sandbox run`:** After `sandbox new` or `sandbox set`, run **`views create`**, **`query`**, etc. **directly**. **`sandbox run `** (no id before `run`) **always creates a new sandbox**. -**Qualified names:** Workspace datasets → **`datasets.main.
`**. Sandbox datasets → **`datasets..
`**. Use **`full_name`** from create or **FULL NAME** from `datasets list`. +**Qualified names:** Workspace views → **`views.main.
`**. Sandbox views → **`views..
`**. Use **`full_name`** from create or **FULL NAME** from `views list`. **Access:** Sandbox-only tables need active sandbox config or **`hotdata sandbox run …`**. **SQL:** Quote mixed-case columns with double quotes. -**Listing:** `datasets list` returns all workspace datasets; use **FULL NAME** to spot sandbox vs `main` rows. +**Listing:** `views list` returns all workspace views; use **FULL NAME** to spot sandbox vs `main` rows. --- ## Cross-cutting - **Workspace:** Active workspace or `--workspace-id`. **`hotdata queries`** uses the active workspace only (no `--workspace-id`). -- **Jobs:** `hotdata jobs list` / `jobs ` for async refreshes, dataset refresh, and index builds. +- **Jobs:** `hotdata jobs list` / `jobs ` for async refreshes, view refresh, and index builds. - **Discovery:** `hotdata tables list` — not `query` on `information_schema`. diff --git a/src/command.rs b/src/command.rs index 8afc01e..97b7971 100644 --- a/src/command.rs +++ b/src/command.rs @@ -56,6 +56,23 @@ pub enum Commands { command: Option, }, + /// SQL-derived views materialized from queries or saved queries + Views { + /// View ID to show details + id: Option, + + /// Workspace ID (defaults to first workspace from login) + #[arg(long, short = 'w', global = true)] + workspace_id: Option, + + /// Output format (used with view ID) + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + + #[command(subcommand)] + command: Option, + }, + /// Managed databases you create and populate with tables (parquet uploads) Databases { /// Database id or description (omit to use a subcommand) @@ -456,6 +473,75 @@ pub enum ConnectionsCreateCommands { }, } +#[derive(Subcommand)] +pub enum ViewsCommands { + /// List all views in a workspace + List { + /// Maximum number of results (default: 100, max: 1000) + #[arg(long)] + limit: Option, + + /// Pagination offset + #[arg(long)] + offset: Option, + + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, + + /// Create a view from a SQL query or saved query + Create { + /// SQL table name the view is addressable as (e.g. my_view) + #[arg(long)] + name: String, + + /// Human-readable display label + #[arg(long)] + description: Option, + + /// SQL query to create the view from + #[arg(long, conflicts_with = "query_id", required_unless_present = "query_id")] + sql: Option, + + /// Saved query ID to create the view from + #[arg(long, conflicts_with = "sql", required_unless_present = "sql")] + query_id: Option, + + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, + + /// Update a view's description and/or name + Update { + /// View ID + id: String, + + /// New display label + #[arg(long)] + description: Option, + + /// New SQL table name (must be a valid identifier) + #[arg(long)] + name: Option, + + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, + + /// Refresh a view by re-running its source query and creating a new version + Refresh { + /// View ID + id: String, + + /// Submit as a background job + #[arg(long)] + r#async: bool, + }, +} + #[derive(Subcommand)] pub enum DatabasesCommands { /// List managed databases in the workspace diff --git a/src/main.rs b/src/main.rs index 669deb9..aca7472 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ mod table; mod tables; mod update; mod util; +mod views; mod workspace; use anstyle::AnsiColor; @@ -28,7 +29,7 @@ use command::{ AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, ContextCommands, DatabaseTablesCommands, DatabasesCommands, EmbeddingProvidersCommands, IndexesCommands, JobsCommands, QueriesCommands, QueryCommands, ResultsCommands, - SandboxCommands, SkillCommands, TablesCommands, WorkspaceCommands, + SandboxCommands, SkillCommands, TablesCommands, ViewsCommands, WorkspaceCommands, }; #[derive(Parser)] @@ -314,6 +315,74 @@ fn main() { } } } + Commands::Views { + id, + workspace_id, + output, + command, + } => { + let workspace_id = resolve_workspace(workspace_id); + if let Some(id) = id { + views::get(&id, &workspace_id, &output) + } else { + match command { + Some(ViewsCommands::List { + limit, + offset, + output, + }) => views::list(&workspace_id, limit, offset, &output), + Some(ViewsCommands::Create { + name, + description, + sql, + query_id, + output, + }) => { + if let Some(sql) = sql { + views::create_from_query( + &workspace_id, + &sql, + description.as_deref(), + &name, + &output, + ) + } else { + views::create_from_saved_query( + &workspace_id, + query_id.as_deref().unwrap_or_else(|| unreachable!("clap enforces --sql or --query-id")), + description.as_deref(), + &name, + &output, + ) + } + } + Some(ViewsCommands::Update { + id, + description, + name, + output, + }) => views::update( + &id, + &workspace_id, + description.as_deref(), + name.as_deref(), + &output, + ), + Some(ViewsCommands::Refresh { id, r#async }) => { + views::refresh(&workspace_id, &id, r#async) + } + None => { + use clap::CommandFactory; + let mut cmd = Cli::command(); + cmd.build(); + cmd.find_subcommand_mut("views") + .unwrap() + .print_help() + .unwrap(); + } + } + } + } Commands::Databases { name_or_id, workspace_id, diff --git a/src/views.rs b/src/views.rs new file mode 100644 index 0000000..e1e28b0 --- /dev/null +++ b/src/views.rs @@ -0,0 +1,382 @@ +use crate::api::ApiClient; +use serde::{Deserialize, Serialize}; +use serde_json::json; + +#[derive(Deserialize, Serialize)] +struct View { + id: String, + label: String, + #[serde(default = "default_schema")] + schema_name: String, + table_name: String, + created_at: String, + updated_at: String, +} + +fn default_schema() -> String { + "main".to_string() +} + +#[derive(Deserialize, Serialize)] +struct CreateResponse { + id: String, + label: String, + #[serde(default = "default_schema")] + schema_name: String, + table_name: String, +} + +#[derive(Deserialize)] +struct ListResponse { + #[serde(rename = "datasets")] + views: Vec, + count: u64, + has_more: bool, +} + +#[derive(Deserialize, Serialize)] +struct Column { + name: String, + data_type: String, + nullable: bool, +} + +#[derive(Deserialize, Serialize)] +struct ViewDetail { + id: String, + label: String, + schema_name: String, + table_name: String, + source_type: String, + created_at: String, + updated_at: String, + columns: Vec, +} + +#[derive(Deserialize, Serialize)] +struct UpdateResponse { + id: String, + label: String, + // Not currently in runtimedb's UpdateDatasetResponse (see runtimedb/src/http/models.rs). + // Kept Optional so we print `full_name` only when the server actually returns the schema. + // Synthesizing "main" is wrong for sandbox-scoped views where + // schema_name == sandbox_id. + #[serde(default)] + schema_name: Option, + table_name: String, + #[serde(default)] + latest_version: Option, + #[serde(default)] + pinned_version: Option, + updated_at: String, +} + +fn create_view( + api: &ApiClient, + description: Option<&str>, + name: &str, + source: serde_json::Value, + format: &str, +) { + let label = description.unwrap_or(name); + let body = json!({ "table_name": name, "label": label, "source": source }); + + let (status, resp_body) = api.post_raw("/datasets", &body); + + if !status.is_success() { + use crossterm::style::Stylize; + eprintln!("{}", crate::util::api_error(resp_body).red()); + std::process::exit(1); + } + + let view: CreateResponse = match serde_json::from_str(&resp_body) { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + use crossterm::style::Stylize; + match format { + "json" => println!("{}", serde_json::to_string_pretty(&view).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&view).unwrap()), + "table" => { + eprintln!("{}", "View created".green()); + println!("id: {}", view.id); + println!("label: {}", view.label); + println!( + "full_name: views.{}.{}", + view.schema_name, view.table_name + ); + } + _ => unreachable!(), + } +} + +pub fn create_from_query(workspace_id: &str, sql: &str, description: Option<&str>, name: &str, format: &str) { + let api = ApiClient::new(Some(workspace_id)); + create_view(&api, description, name, json!({ "type": "sql_query", "sql": sql }), format); +} + +pub fn create_from_saved_query( + workspace_id: &str, + query_id: &str, + description: Option<&str>, + name: &str, + format: &str, +) { + let api = ApiClient::new(Some(workspace_id)); + create_view(&api, description, name, json!({ "type": "saved_query", "saved_query_id": query_id }), format); +} + +pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { + let api = ApiClient::new(Some(workspace_id)); + + let params = [ + ("limit", limit.map(|l| l.to_string())), + ("offset", offset.map(|o| o.to_string())), + ]; + let body: ListResponse = api.get_with_params("/datasets", ¶ms); + + match format { + "json" => println!("{}", serde_json::to_string_pretty(&body.views).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&body.views).unwrap()), + "table" => { + if body.views.is_empty() { + use crossterm::style::Stylize; + eprintln!("{}", "No views found.".dark_grey()); + } else { + let rows: Vec> = body + .views + .iter() + .map(|v| { + vec![ + v.id.clone(), + v.label.clone(), + format!("views.{}.{}", v.schema_name, v.table_name), + crate::util::format_date(&v.created_at), + ] + }) + .collect(); + crate::table::print(&["ID", "LABEL", "FULL NAME", "CREATED AT"], &rows); + } + if body.has_more { + let next = offset.unwrap_or(0) + body.count as u32; + use crossterm::style::Stylize; + eprintln!( + "{}", + format!( + "showing {} results — use --offset {next} for more", + body.count + ) + .dark_grey() + ); + } + } + _ => unreachable!(), + } +} + +pub fn get(view_id: &str, workspace_id: &str, format: &str) { + let api = ApiClient::new(Some(workspace_id)); + + let v: ViewDetail = api.get(&format!("/datasets/{view_id}")); + + match format { + "json" => println!("{}", serde_json::to_string_pretty(&v).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&v).unwrap()), + "table" => { + let created_at = crate::util::format_date(&v.created_at); + let updated_at = crate::util::format_date(&v.updated_at); + println!("id: {}", v.id); + println!("label: {}", v.label); + println!("full_name: views.main.{}", v.table_name); + println!("source_type: {}", v.source_type); + println!("created_at: {created_at}"); + println!("updated_at: {updated_at}"); + if !v.columns.is_empty() { + println!(); + let rows: Vec> = v + .columns + .iter() + .map(|col| { + vec![ + col.name.clone(), + col.data_type.clone(), + col.nullable.to_string(), + ] + }) + .collect(); + crate::table::print(&["COLUMN", "DATA TYPE", "NULLABLE"], &rows); + } + } + _ => unreachable!(), + } +} + +pub fn update( + view_id: &str, + workspace_id: &str, + description: Option<&str>, + name: Option<&str>, + format: &str, +) { + if description.is_none() && name.is_none() { + eprintln!("error: provide at least one of --description or --name."); + std::process::exit(1); + } + + let api = ApiClient::new(Some(workspace_id)); + + let mut body = json!({}); + if let Some(d) = description { + body["label"] = json!(d); + } + if let Some(n) = name { + body["table_name"] = json!(n); + } + + let v: UpdateResponse = api.put(&format!("/datasets/{view_id}"), &body); + + use crossterm::style::Stylize; + eprintln!("{}", "View updated".green()); + match format { + "json" => println!("{}", serde_json::to_string_pretty(&v).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&v).unwrap()), + "table" => { + println!("id: {}", v.id); + println!("label: {}", v.label); + match &v.schema_name { + Some(schema) => { + println!("full_name: views.{}.{}", schema, v.table_name); + } + None => { + println!("table_name: {}", v.table_name); + eprintln!( + "{}", + format!( + "(run `hotdata views {}` to see the qualified name)", + v.id + ) + .dark_grey() + ); + } + } + println!("updated_at: {}", crate::util::format_date(&v.updated_at)); + } + _ => unreachable!(), + } +} + +pub fn refresh(workspace_id: &str, view_id: &str, async_mode: bool) { + use crossterm::style::Stylize; + + let mut body = json!({ + "dataset_id": view_id, + }); + if async_mode { + body["async"] = json!(true); + } + + let api = ApiClient::new(Some(workspace_id)); + let (status, resp_body) = api.post_raw("/refresh", &body); + + if !status.is_success() { + eprintln!("{}", crate::util::api_error(resp_body).red()); + std::process::exit(1); + } + + let parsed: serde_json::Value = serde_json::from_str(&resp_body).unwrap_or_default(); + + if async_mode { + let job_id = parsed["id"].as_str().unwrap_or("unknown"); + println!("{}", "View refresh submitted.".green()); + println!("job_id: {}", job_id); + println!( + "{}", + format!("Use 'hotdata jobs {}' to check status.", job_id).dark_grey() + ); + return; + } + + let id = parsed["id"].as_str().unwrap_or("unknown"); + let version = parsed["version"].as_i64().unwrap_or(0); + let view_status = parsed["status"].as_str().unwrap_or(""); + println!("{}", "View refresh completed.".green()); + println!( + "{}", + format!(" id: {id}, version: {version}, status: {view_status}").dark_grey() + ); +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Mirrors runtimedb's `UpdateDatasetResponse` (see runtimedb/src/http/models.rs). + /// The CLI must deserialize this exact shape — schema_name, source_type, + /// created_at, and columns are NOT in the response. If runtimedb's response + /// gains or loses fields, update this fixture in lockstep. + #[test] + fn update_response_deserializes_runtimedb_payload() { + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "url_test", + "table_name": "url_test", + "latest_version": 3, + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert_eq!(resp.id, "ds_abc123"); + assert_eq!(resp.label, "url_test"); + assert_eq!(resp.table_name, "url_test"); + // The server doesn't currently send schema_name, so we don't synthesize + // one — sandbox-scoped views live under views..
, + // not views.main.*, and a fabricated "main" would mislead users. + assert!(resp.schema_name.is_none()); + assert_eq!(resp.latest_version, Some(3)); + assert!(resp.pinned_version.is_none()); + } + + #[test] + fn update_response_uses_schema_name_when_server_supplies_it() { + // Forward-compat: if runtimedb later includes schema_name, we use it. + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "x", + "schema_name": "sandbox_xyz", + "table_name": "x", + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert_eq!(resp.schema_name.as_deref(), Some("sandbox_xyz")); + } + + #[test] + fn update_response_handles_pinned_version() { + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "x", + "table_name": "x", + "latest_version": 5, + "pinned_version": 2, + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert_eq!(resp.pinned_version, Some(2)); + } + + #[test] + fn update_response_tolerates_missing_latest_version() { + // Defensive: treat latest_version as optional in case the server omits it. + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "x", + "table_name": "x", + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert!(resp.latest_version.is_none()); + } +}