|
| 1 | +// extract_k8s.c — K8s manifest and Kustomize file extractor. |
| 2 | +// |
| 3 | +// For CBM_LANG_KUSTOMIZE: walks top-level block_mapping_pair nodes whose key |
| 4 | +// matches "resources", "bases", "patches", "components", or |
| 5 | +// "patchesStrategicMerge", then emits one CBMImport per block_sequence item. |
| 6 | +// |
| 7 | +// For CBM_LANG_K8S: finds apiVersion, kind, and metadata.name scalars in the |
| 8 | +// first document's block_mapping and emits one CBMDefinition with label |
| 9 | +// "Resource" and name "Kind/metadata-name". |
| 10 | + |
| 11 | +#include "cbm.h" |
| 12 | +#include "arena.h" |
| 13 | +#include "helpers.h" |
| 14 | +#include "tree_sitter/api.h" |
| 15 | +#include <stdint.h> |
| 16 | +#include <stdio.h> |
| 17 | +#include <string.h> |
| 18 | + |
| 19 | +// --------------------------------------------------------------------------- |
| 20 | +// Internal helpers |
| 21 | +// --------------------------------------------------------------------------- |
| 22 | + |
| 23 | +// Return the raw source text for a scalar node (plain, single-quoted, or |
| 24 | +// double-quoted). Surrounding quote characters are stripped for quoted forms. |
| 25 | +// Returns NULL for non-scalar node types. |
| 26 | +static const char *get_scalar_text(CBMArena *a, TSNode node, const char *source) { |
| 27 | + const char *type = ts_node_type(node); |
| 28 | + if (strcmp(type, "plain_scalar") == 0) { |
| 29 | + return cbm_node_text(a, node, source); |
| 30 | + } |
| 31 | + if (strcmp(type, "double_quote_scalar") == 0 || strcmp(type, "single_quote_scalar") == 0) { |
| 32 | + const char *raw = cbm_node_text(a, node, source); |
| 33 | + if (!raw) { |
| 34 | + return NULL; |
| 35 | + } |
| 36 | + size_t len = strlen(raw); |
| 37 | + if (len >= 2) { |
| 38 | + return cbm_arena_strndup(a, raw + 1, len - 2); |
| 39 | + } |
| 40 | + return raw; |
| 41 | + } |
| 42 | + return NULL; |
| 43 | +} |
| 44 | + |
| 45 | +// Return true if the key text of a block_mapping_pair matches one of the |
| 46 | +// Kustomize resource-list field names. |
| 47 | +static int is_kustomize_list_key(const char *key) { |
| 48 | + return (strcmp(key, "resources") == 0 || strcmp(key, "bases") == 0 || |
| 49 | + strcmp(key, "patches") == 0 || strcmp(key, "components") == 0 || |
| 50 | + strcmp(key, "patchesStrategicMerge") == 0); |
| 51 | +} |
| 52 | + |
| 53 | +// --------------------------------------------------------------------------- |
| 54 | +// Kustomize extraction |
| 55 | +// --------------------------------------------------------------------------- |
| 56 | + |
| 57 | +// Walk a block_sequence node and emit one CBMImport per block_sequence_item |
| 58 | +// scalar child, using key_name as the local_name. |
| 59 | +static void emit_kustomize_sequence(CBMExtractCtx *ctx, TSNode seq_node, |
| 60 | + const char *key_name) { |
| 61 | + CBMArena *a = ctx->arena; |
| 62 | + uint32_t n = ts_node_child_count(seq_node); |
| 63 | + for (uint32_t i = 0; i < n; i++) { |
| 64 | + TSNode item = ts_node_child(seq_node, i); |
| 65 | + if (strcmp(ts_node_type(item), "block_sequence_item") != 0) { |
| 66 | + continue; |
| 67 | + } |
| 68 | + // block_sequence_item has one named child: the value |
| 69 | + uint32_t ic = ts_node_child_count(item); |
| 70 | + for (uint32_t j = 0; j < ic; j++) { |
| 71 | + TSNode val = ts_node_child(item, j); |
| 72 | + const char *scalar = get_scalar_text(a, val, ctx->source); |
| 73 | + if (!scalar) { |
| 74 | + continue; |
| 75 | + } |
| 76 | + CBMImport imp = { |
| 77 | + .local_name = cbm_arena_strdup(a, key_name), |
| 78 | + .module_path = cbm_arena_strdup(a, scalar), |
| 79 | + }; |
| 80 | + cbm_imports_push(&ctx->result->imports, a, imp); |
| 81 | + } |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +static void extract_kustomize(CBMExtractCtx *ctx) { |
| 86 | + CBMArena *a = ctx->arena; |
| 87 | + |
| 88 | + // Traverse: stream -> document -> block_node -> block_mapping -> block_mapping_pair |
| 89 | + TSNode root = ctx->root; |
| 90 | + uint32_t root_n = ts_node_child_count(root); |
| 91 | + for (uint32_t si = 0; si < root_n; si++) { |
| 92 | + TSNode stream_child = ts_node_child(root, si); |
| 93 | + if (strcmp(ts_node_type(stream_child), "document") != 0) { |
| 94 | + continue; |
| 95 | + } |
| 96 | + // Find block_mapping inside the document (may be wrapped in block_node) |
| 97 | + TSNode mapping = ts_node_named_child(stream_child, 0); |
| 98 | + if (ts_node_is_null(mapping)) { |
| 99 | + continue; |
| 100 | + } |
| 101 | + // Some grammars wrap in block_node |
| 102 | + if (strcmp(ts_node_type(mapping), "block_node") == 0) { |
| 103 | + mapping = ts_node_named_child(mapping, 0); |
| 104 | + } |
| 105 | + if (ts_node_is_null(mapping) || strcmp(ts_node_type(mapping), "block_mapping") != 0) { |
| 106 | + continue; |
| 107 | + } |
| 108 | + |
| 109 | + uint32_t pair_n = ts_node_child_count(mapping); |
| 110 | + for (uint32_t pi = 0; pi < pair_n; pi++) { |
| 111 | + TSNode pair = ts_node_child(mapping, pi); |
| 112 | + if (strcmp(ts_node_type(pair), "block_mapping_pair") != 0) { |
| 113 | + continue; |
| 114 | + } |
| 115 | + |
| 116 | + // First named child = key |
| 117 | + TSNode key_node = ts_node_named_child(pair, 0); |
| 118 | + if (ts_node_is_null(key_node)) { |
| 119 | + continue; |
| 120 | + } |
| 121 | + const char *key_text = get_scalar_text(a, key_node, ctx->source); |
| 122 | + if (!key_text || !is_kustomize_list_key(key_text)) { |
| 123 | + continue; |
| 124 | + } |
| 125 | + |
| 126 | + // Second named child = value (should be a block_sequence or block_node wrapping one) |
| 127 | + TSNode val_node = ts_node_named_child(pair, 1); |
| 128 | + if (ts_node_is_null(val_node)) { |
| 129 | + continue; |
| 130 | + } |
| 131 | + if (strcmp(ts_node_type(val_node), "block_node") == 0) { |
| 132 | + val_node = ts_node_named_child(val_node, 0); |
| 133 | + } |
| 134 | + if (ts_node_is_null(val_node) || |
| 135 | + strcmp(ts_node_type(val_node), "block_sequence") != 0) { |
| 136 | + continue; |
| 137 | + } |
| 138 | + |
| 139 | + emit_kustomize_sequence(ctx, val_node, key_text); |
| 140 | + } |
| 141 | + } |
| 142 | +} |
| 143 | + |
| 144 | +// --------------------------------------------------------------------------- |
| 145 | +// K8s manifest extraction |
| 146 | +// --------------------------------------------------------------------------- |
| 147 | + |
| 148 | +// Descend into the first block_mapping of a document and extract apiVersion, |
| 149 | +// kind, and metadata.name. Returns void; fills kind_buf and meta_name_buf. |
| 150 | +static void extract_k8s_scalars(CBMExtractCtx *ctx, TSNode mapping, |
| 151 | + char *kind_buf, size_t kind_sz, |
| 152 | + char *meta_name_buf, size_t meta_sz) { |
| 153 | + CBMArena *a = ctx->arena; |
| 154 | + kind_buf[0] = '\0'; |
| 155 | + meta_name_buf[0] = '\0'; |
| 156 | + |
| 157 | + uint32_t n = ts_node_child_count(mapping); |
| 158 | + for (uint32_t i = 0; i < n; i++) { |
| 159 | + TSNode pair = ts_node_child(mapping, i); |
| 160 | + if (strcmp(ts_node_type(pair), "block_mapping_pair") != 0) { |
| 161 | + continue; |
| 162 | + } |
| 163 | + TSNode key_node = ts_node_named_child(pair, 0); |
| 164 | + if (ts_node_is_null(key_node)) { |
| 165 | + continue; |
| 166 | + } |
| 167 | + const char *key = get_scalar_text(a, key_node, ctx->source); |
| 168 | + if (!key) { |
| 169 | + continue; |
| 170 | + } |
| 171 | + |
| 172 | + TSNode val_node = ts_node_named_child(pair, 1); |
| 173 | + if (ts_node_is_null(val_node)) { |
| 174 | + continue; |
| 175 | + } |
| 176 | + // Unwrap block_node if present |
| 177 | + if (strcmp(ts_node_type(val_node), "block_node") == 0) { |
| 178 | + val_node = ts_node_named_child(val_node, 0); |
| 179 | + } |
| 180 | + if (ts_node_is_null(val_node)) { |
| 181 | + continue; |
| 182 | + } |
| 183 | + |
| 184 | + if (strcmp(key, "kind") == 0) { |
| 185 | + const char *v = get_scalar_text(a, val_node, ctx->source); |
| 186 | + if (v) { |
| 187 | + snprintf(kind_buf, kind_sz, "%s", v); |
| 188 | + } |
| 189 | + } else if (strcmp(key, "metadata") == 0) { |
| 190 | + // Descend into metadata block_mapping to find "name" |
| 191 | + TSNode meta_mapping = val_node; |
| 192 | + if (strcmp(ts_node_type(meta_mapping), "block_node") == 0) { |
| 193 | + meta_mapping = ts_node_named_child(meta_mapping, 0); |
| 194 | + } |
| 195 | + if (ts_node_is_null(meta_mapping) || |
| 196 | + strcmp(ts_node_type(meta_mapping), "block_mapping") != 0) { |
| 197 | + continue; |
| 198 | + } |
| 199 | + uint32_t mn = ts_node_child_count(meta_mapping); |
| 200 | + for (uint32_t mi = 0; mi < mn; mi++) { |
| 201 | + TSNode mpair = ts_node_child(meta_mapping, mi); |
| 202 | + if (strcmp(ts_node_type(mpair), "block_mapping_pair") != 0) { |
| 203 | + continue; |
| 204 | + } |
| 205 | + TSNode mkey = ts_node_named_child(mpair, 0); |
| 206 | + if (ts_node_is_null(mkey)) { |
| 207 | + continue; |
| 208 | + } |
| 209 | + const char *mkey_text = get_scalar_text(a, mkey, ctx->source); |
| 210 | + if (!mkey_text || strcmp(mkey_text, "name") != 0) { |
| 211 | + continue; |
| 212 | + } |
| 213 | + TSNode mval = ts_node_named_child(mpair, 1); |
| 214 | + if (ts_node_is_null(mval)) { |
| 215 | + continue; |
| 216 | + } |
| 217 | + const char *meta_name = get_scalar_text(a, mval, ctx->source); |
| 218 | + if (meta_name) { |
| 219 | + snprintf(meta_name_buf, meta_sz, "%s", meta_name); |
| 220 | + } |
| 221 | + } |
| 222 | + } |
| 223 | + } |
| 224 | +} |
| 225 | + |
| 226 | +static void extract_k8s_manifest(CBMExtractCtx *ctx) { |
| 227 | + CBMArena *a = ctx->arena; |
| 228 | + |
| 229 | + TSNode root = ctx->root; |
| 230 | + uint32_t root_n = ts_node_child_count(root); |
| 231 | + for (uint32_t si = 0; si < root_n; si++) { |
| 232 | + TSNode stream_child = ts_node_child(root, si); |
| 233 | + if (strcmp(ts_node_type(stream_child), "document") != 0) { |
| 234 | + continue; |
| 235 | + } |
| 236 | + |
| 237 | + TSNode mapping = ts_node_named_child(stream_child, 0); |
| 238 | + if (ts_node_is_null(mapping)) { |
| 239 | + continue; |
| 240 | + } |
| 241 | + if (strcmp(ts_node_type(mapping), "block_node") == 0) { |
| 242 | + mapping = ts_node_named_child(mapping, 0); |
| 243 | + } |
| 244 | + if (ts_node_is_null(mapping) || strcmp(ts_node_type(mapping), "block_mapping") != 0) { |
| 245 | + continue; |
| 246 | + } |
| 247 | + |
| 248 | + char kind_buf[256] = {0}; |
| 249 | + char meta_name_buf[256] = {0}; |
| 250 | + extract_k8s_scalars(ctx, mapping, kind_buf, sizeof(kind_buf), meta_name_buf, |
| 251 | + sizeof(meta_name_buf)); |
| 252 | + |
| 253 | + // Skip malformed manifests (no kind or no metadata.name) |
| 254 | + if (kind_buf[0] == '\0' || meta_name_buf[0] == '\0') { |
| 255 | + continue; |
| 256 | + } |
| 257 | + |
| 258 | + char def_name[512]; |
| 259 | + snprintf(def_name, sizeof(def_name), "%s/%s", kind_buf, meta_name_buf); |
| 260 | + |
| 261 | + CBMDefinition def = {0}; |
| 262 | + def.name = cbm_arena_strdup(a, def_name); |
| 263 | + def.qualified_name = cbm_arena_sprintf(a, "%s.%s", ctx->module_qn, def_name); |
| 264 | + def.label = "Resource"; |
| 265 | + def.file_path = ctx->rel_path; |
| 266 | + def.start_line = ts_node_start_point(mapping).row + 1; |
| 267 | + def.end_line = ts_node_end_point(mapping).row + 1; |
| 268 | + cbm_defs_push(&ctx->result->defs, a, def); |
| 269 | + |
| 270 | + break; // Only the first document per file |
| 271 | + } |
| 272 | +} |
| 273 | + |
| 274 | +// --------------------------------------------------------------------------- |
| 275 | +// Public entry point |
| 276 | +// --------------------------------------------------------------------------- |
| 277 | + |
| 278 | +void cbm_extract_k8s(CBMExtractCtx *ctx) { |
| 279 | + if (ctx->language == CBM_LANG_KUSTOMIZE) { |
| 280 | + extract_kustomize(ctx); |
| 281 | + } else if (ctx->language == CBM_LANG_K8S) { |
| 282 | + extract_k8s_manifest(ctx); |
| 283 | + } |
| 284 | +} |
0 commit comments