|
| 1 | +use anyhow::Result; |
| 2 | +use serde_yaml::Value; |
| 3 | +use std::collections::HashSet; |
| 4 | +use std::fs::File; |
| 5 | +use std::io::BufReader; |
| 6 | + |
| 7 | +use crate::aggregation_reference::AggregationReference; |
| 8 | +use crate::enums::{CleanupPolicy, QueryLanguage}; |
| 9 | +use crate::promql_schema::PromQLSchema; |
| 10 | +use crate::query_config::QueryConfig; |
| 11 | +use promql_utilities::data_model::KeyByLabelNames; |
| 12 | +use sql_utilities::sqlhelper::{SQLSchema, Table}; |
| 13 | + |
| 14 | +/// Schema configuration that can be either PromQL or SQL format |
| 15 | +#[derive(Debug, Clone)] |
| 16 | +pub enum SchemaConfig { |
| 17 | + PromQL(PromQLSchema), |
| 18 | + SQL(SQLSchema), |
| 19 | + ElasticQueryDSL, |
| 20 | + ElasticSQL, |
| 21 | +} |
| 22 | + |
| 23 | +#[derive(Debug, Clone)] |
| 24 | +pub struct InferenceConfig { |
| 25 | + pub schema: SchemaConfig, |
| 26 | + pub query_configs: Vec<QueryConfig>, |
| 27 | + pub cleanup_policy: CleanupPolicy, |
| 28 | +} |
| 29 | + |
| 30 | +impl InferenceConfig { |
| 31 | + pub fn new(query_language: QueryLanguage, cleanup_policy: CleanupPolicy) -> Self { |
| 32 | + let schema = match query_language { |
| 33 | + QueryLanguage::promql => SchemaConfig::PromQL(PromQLSchema::new()), |
| 34 | + QueryLanguage::sql => SchemaConfig::SQL(SQLSchema::new(Vec::new())), |
| 35 | + QueryLanguage::elastic_querydsl => SchemaConfig::ElasticQueryDSL, |
| 36 | + QueryLanguage::elastic_sql => SchemaConfig::ElasticSQL, |
| 37 | + }; |
| 38 | + Self { |
| 39 | + schema, |
| 40 | + query_configs: Vec::new(), |
| 41 | + cleanup_policy, |
| 42 | + } |
| 43 | + } |
| 44 | + |
| 45 | + pub fn from_yaml_file(yaml_file: &str, query_language: QueryLanguage) -> Result<Self> { |
| 46 | + let file = File::open(yaml_file)?; |
| 47 | + let reader = BufReader::new(file); |
| 48 | + let data: Value = serde_yaml::from_reader(reader)?; |
| 49 | + |
| 50 | + Self::from_yaml_data(&data, query_language) |
| 51 | + } |
| 52 | + |
| 53 | + pub fn from_yaml_data(data: &Value, query_language: QueryLanguage) -> Result<Self> { |
| 54 | + let schema = match query_language { |
| 55 | + QueryLanguage::promql => { |
| 56 | + let promql_schema = Self::parse_promql_schema(data)?; |
| 57 | + SchemaConfig::PromQL(promql_schema) |
| 58 | + } |
| 59 | + QueryLanguage::sql => { |
| 60 | + let sql_schema = Self::parse_sql_schema(data)?; |
| 61 | + SchemaConfig::SQL(sql_schema) |
| 62 | + } |
| 63 | + QueryLanguage::elastic_querydsl => SchemaConfig::ElasticQueryDSL, |
| 64 | + QueryLanguage::elastic_sql => SchemaConfig::ElasticSQL, |
| 65 | + }; |
| 66 | + |
| 67 | + let cleanup_policy = Self::parse_cleanup_policy(data)?; |
| 68 | + let query_configs = Self::parse_query_configs(data, cleanup_policy)?; |
| 69 | + |
| 70 | + Ok(Self { |
| 71 | + schema, |
| 72 | + query_configs, |
| 73 | + cleanup_policy, |
| 74 | + }) |
| 75 | + } |
| 76 | + |
| 77 | + /// Parse PromQL schema from YAML data (metrics: key) |
| 78 | + fn parse_promql_schema(data: &Value) -> Result<PromQLSchema> { |
| 79 | + let mut promql_schema = PromQLSchema::new(); |
| 80 | + if let Some(metrics) = data.get("metrics") { |
| 81 | + if let Some(metrics_map) = metrics.as_mapping() { |
| 82 | + for (metric_name_val, labels_val) in metrics_map { |
| 83 | + if let (Some(metric_name), Some(labels_seq)) = |
| 84 | + (metric_name_val.as_str(), labels_val.as_sequence()) |
| 85 | + { |
| 86 | + let labels: Vec<String> = labels_seq |
| 87 | + .iter() |
| 88 | + .filter_map(|v| v.as_str()) |
| 89 | + .map(|s| s.to_string()) |
| 90 | + .collect(); |
| 91 | + let key_by_label_names = KeyByLabelNames::new(labels); |
| 92 | + promql_schema = |
| 93 | + promql_schema.add_metric(metric_name.to_string(), key_by_label_names); |
| 94 | + } |
| 95 | + } |
| 96 | + } |
| 97 | + } |
| 98 | + Ok(promql_schema) |
| 99 | + } |
| 100 | + |
| 101 | + /// Parse SQL schema from YAML data (tables: key at top level, matching ArroyoSketch format) |
| 102 | + fn parse_sql_schema(data: &Value) -> Result<SQLSchema> { |
| 103 | + let tables_data = data |
| 104 | + .get("tables") |
| 105 | + .and_then(|v| v.as_sequence()) |
| 106 | + .ok_or_else(|| { |
| 107 | + anyhow::anyhow!("Missing or invalid tables field for SQL query language") |
| 108 | + })?; |
| 109 | + |
| 110 | + let mut tables = Vec::new(); |
| 111 | + for table_data in tables_data { |
| 112 | + let name = table_data |
| 113 | + .get("name") |
| 114 | + .and_then(|v| v.as_str()) |
| 115 | + .ok_or_else(|| anyhow::anyhow!("Missing name field in table"))? |
| 116 | + .to_string(); |
| 117 | + |
| 118 | + let time_column = table_data |
| 119 | + .get("time_column") |
| 120 | + .and_then(|v| v.as_str()) |
| 121 | + .ok_or_else(|| anyhow::anyhow!("Missing time_column field in table {}", name))? |
| 122 | + .to_string(); |
| 123 | + |
| 124 | + let value_columns: HashSet<String> = table_data |
| 125 | + .get("value_columns") |
| 126 | + .and_then(|v| v.as_sequence()) |
| 127 | + .ok_or_else(|| anyhow::anyhow!("Missing value_columns field in table {}", name))? |
| 128 | + .iter() |
| 129 | + .filter_map(|v| v.as_str()) |
| 130 | + .map(|s| s.to_string()) |
| 131 | + .collect(); |
| 132 | + |
| 133 | + let metadata_columns: HashSet<String> = table_data |
| 134 | + .get("metadata_columns") |
| 135 | + .and_then(|v| v.as_sequence()) |
| 136 | + .ok_or_else(|| anyhow::anyhow!("Missing metadata_columns field in table {}", name))? |
| 137 | + .iter() |
| 138 | + .filter_map(|v| v.as_str()) |
| 139 | + .map(|s| s.to_string()) |
| 140 | + .collect(); |
| 141 | + |
| 142 | + tables.push(Table::new( |
| 143 | + name, |
| 144 | + time_column, |
| 145 | + value_columns, |
| 146 | + metadata_columns, |
| 147 | + )); |
| 148 | + } |
| 149 | + |
| 150 | + Ok(SQLSchema::new(tables)) |
| 151 | + } |
| 152 | + |
| 153 | + /// Parse cleanup policy from YAML data. Errors if not specified. |
| 154 | + fn parse_cleanup_policy(data: &Value) -> Result<CleanupPolicy> { |
| 155 | + let cleanup_policy_data = data.get("cleanup_policy").ok_or_else(|| { |
| 156 | + anyhow::anyhow!( |
| 157 | + "Missing cleanup_policy section in inference_config.yaml. \ |
| 158 | + Must specify cleanup_policy.name as one of: circular_buffer, read_based, no_cleanup" |
| 159 | + ) |
| 160 | + })?; |
| 161 | + |
| 162 | + let name = cleanup_policy_data |
| 163 | + .get("name") |
| 164 | + .and_then(|v| v.as_str()) |
| 165 | + .ok_or_else(|| { |
| 166 | + anyhow::anyhow!( |
| 167 | + "Missing cleanup_policy.name in inference_config.yaml. \ |
| 168 | + Must be one of: circular_buffer, read_based, no_cleanup" |
| 169 | + ) |
| 170 | + })?; |
| 171 | + |
| 172 | + match name { |
| 173 | + "circular_buffer" => Ok(CleanupPolicy::CircularBuffer), |
| 174 | + "read_based" => Ok(CleanupPolicy::ReadBased), |
| 175 | + "no_cleanup" => Ok(CleanupPolicy::NoCleanup), |
| 176 | + _ => Err(anyhow::anyhow!( |
| 177 | + "Invalid cleanup policy: '{}'. Valid options: circular_buffer, read_based, no_cleanup", |
| 178 | + name |
| 179 | + )), |
| 180 | + } |
| 181 | + } |
| 182 | + |
| 183 | + fn parse_query_configs( |
| 184 | + data: &Value, |
| 185 | + cleanup_policy: CleanupPolicy, |
| 186 | + ) -> Result<Vec<QueryConfig>> { |
| 187 | + let query_configs = if let Some(queries) = data.get("queries").and_then(|v| v.as_sequence()) |
| 188 | + { |
| 189 | + let mut configs = Vec::new(); |
| 190 | + for query_data in queries { |
| 191 | + let query = query_data |
| 192 | + .get("query") |
| 193 | + .and_then(|v| v.as_str()) |
| 194 | + .ok_or_else(|| anyhow::anyhow!("Missing query field"))? |
| 195 | + .to_string(); |
| 196 | + |
| 197 | + let aggregations = if let Some(aggregations_data) = |
| 198 | + query_data.get("aggregations").and_then(|v| v.as_sequence()) |
| 199 | + { |
| 200 | + let mut agg_refs = Vec::new(); |
| 201 | + for agg_data in aggregations_data { |
| 202 | + let aggregation_id = agg_data |
| 203 | + .get("aggregation_id") |
| 204 | + .and_then(|v| v.as_u64()) |
| 205 | + .ok_or_else(|| { |
| 206 | + anyhow::anyhow!("Missing aggregation_id in aggregation") |
| 207 | + })?; |
| 208 | + |
| 209 | + let agg_ref = match cleanup_policy { |
| 210 | + CleanupPolicy::CircularBuffer => { |
| 211 | + let num_aggregates_to_retain = agg_data |
| 212 | + .get("num_aggregates_to_retain") |
| 213 | + .and_then(|v| v.as_u64()); |
| 214 | + AggregationReference::new(aggregation_id, num_aggregates_to_retain) |
| 215 | + } |
| 216 | + CleanupPolicy::ReadBased => { |
| 217 | + let read_count_threshold = agg_data |
| 218 | + .get("read_count_threshold") |
| 219 | + .and_then(|v| v.as_u64()); |
| 220 | + AggregationReference::with_read_count_threshold( |
| 221 | + aggregation_id, |
| 222 | + read_count_threshold, |
| 223 | + ) |
| 224 | + } |
| 225 | + CleanupPolicy::NoCleanup => { |
| 226 | + AggregationReference::new(aggregation_id, None) |
| 227 | + } |
| 228 | + }; |
| 229 | + agg_refs.push(agg_ref); |
| 230 | + } |
| 231 | + agg_refs |
| 232 | + } else { |
| 233 | + Vec::new() |
| 234 | + }; |
| 235 | + |
| 236 | + let config = QueryConfig::new(query).with_aggregations(aggregations); |
| 237 | + configs.push(config); |
| 238 | + } |
| 239 | + configs |
| 240 | + } else { |
| 241 | + Vec::new() |
| 242 | + }; |
| 243 | + Ok(query_configs) |
| 244 | + } |
| 245 | +} |
0 commit comments