Skip to content

Commit b2b48f8

Browse files
author
test
committed
Add 89 new tree-sitter grammars (66 to 155 languages)
Vendor, wire up, and compile 89 new tree-sitter grammars, expanding language support from 66 to 155 languages. All grammars pass security audit (no dangerous patterns in scanner.c files). New programming languages (31): Solidity, Typst, GDScript, Gleam, PowerShell, Pascal, D, Nim, Scheme, Fennel, Fish, AWK, Zsh, Tcl, Ada, Agda, Racket, Odin, ReScript, PureScript, Nickel, Crystal, Teal, Hare, Pony, Luau, Janet, Sway, NASM, Assembly, TLA+, Pkl, Cairo, Move, Squirrel, ISPC, FunC, Smali New config/data/IDL formats (31): Just, Astro, Blade, Go Template, Templ, Liquid, Jinja2, Prisma, Hyprlang, DotEnv, Diff, WGSL, KDL, JSON5, Jsonnet, RON, Thrift, Cap'n Proto, Properties, SSH Config, BibTeX, Starlark, Bicep, CSV, Requirements, HLSL, VHDL, SystemVerilog, DeviceTree, Linker Script, GN, Kconfig, BitBake, TableGen, Slang, LLVM IR, Smithy, WIT, Go Mod, Mermaid, RST, Beancount, Puppet, PO, Regex, JSDoc, gitattributes, gitignore, Apex, SOQL, SOSL Infrastructure: - scripts/new-languages.json: manifest for all new languages - scripts/generate-lang-code.py: generates boilerplate from manifest - scripts/audit-grammar-security.sh: pre-vendoring security scanner - Fixed angle-bracket includes in 18 grammars - Fixed PureScript scanner const mismatch - Fixed VHDL scanner void* API signatures - Fixed RST tree_sitter_rst/ subdirectory include paths - Copied crystal unicode.c (extra scanner dependency) All new languages start with minimal lang specs (module_types only). Function/class/call extraction specs to be refined incrementally.
1 parent c6fc68f commit b2b48f8

584 files changed

Lines changed: 13424608 additions & 2 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

internal/cbm/cbm.h

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,95 @@ typedef enum {
7575
CBM_LANG_FORM,
7676
CBM_LANG_MAGMA,
7777
CBM_LANG_WOLFRAM,
78+
CBM_LANG_SOLIDITY,
79+
CBM_LANG_TYPST,
80+
CBM_LANG_GDSCRIPT,
81+
CBM_LANG_GLEAM,
82+
CBM_LANG_POWERSHELL,
83+
CBM_LANG_PASCAL,
84+
CBM_LANG_DLANG,
85+
CBM_LANG_NIM,
86+
CBM_LANG_SCHEME,
87+
CBM_LANG_FENNEL,
88+
CBM_LANG_FISH,
89+
CBM_LANG_AWK,
90+
CBM_LANG_ZSH,
91+
CBM_LANG_TCL,
92+
CBM_LANG_ADA,
93+
CBM_LANG_AGDA,
94+
CBM_LANG_RACKET,
95+
CBM_LANG_ODIN,
96+
CBM_LANG_RESCRIPT,
97+
CBM_LANG_PURESCRIPT,
98+
CBM_LANG_NICKEL,
99+
CBM_LANG_CRYSTAL,
100+
CBM_LANG_TEAL,
101+
CBM_LANG_HARE,
102+
CBM_LANG_PONY,
103+
CBM_LANG_LUAU,
104+
CBM_LANG_JANET,
105+
CBM_LANG_SWAY,
106+
CBM_LANG_NASM,
107+
CBM_LANG_ASSEMBLY,
108+
CBM_LANG_ASTRO,
109+
CBM_LANG_BLADE,
110+
CBM_LANG_JUST,
111+
CBM_LANG_GOTEMPLATE,
112+
CBM_LANG_TEMPL,
113+
CBM_LANG_LIQUID,
114+
CBM_LANG_JINJA2,
115+
CBM_LANG_PRISMA,
116+
CBM_LANG_HYPRLANG,
117+
CBM_LANG_DOTENV,
118+
CBM_LANG_DIFF,
119+
CBM_LANG_WGSL,
120+
CBM_LANG_KDL,
121+
CBM_LANG_JSON5,
122+
CBM_LANG_JSONNET,
123+
CBM_LANG_RON,
124+
CBM_LANG_THRIFT,
125+
CBM_LANG_CAPNP,
126+
CBM_LANG_PROPERTIES,
127+
CBM_LANG_SSHCONFIG,
128+
CBM_LANG_BIBTEX,
129+
CBM_LANG_STARLARK,
130+
CBM_LANG_BICEP,
131+
CBM_LANG_CSV,
132+
CBM_LANG_REQUIREMENTS,
133+
CBM_LANG_HLSL,
134+
CBM_LANG_VHDL,
135+
CBM_LANG_SYSTEMVERILOG,
136+
CBM_LANG_DEVICETREE,
137+
CBM_LANG_LINKERSCRIPT,
138+
CBM_LANG_GN,
139+
CBM_LANG_KCONFIG,
140+
CBM_LANG_BITBAKE,
141+
CBM_LANG_SMALI,
142+
CBM_LANG_TABLEGEN,
143+
CBM_LANG_ISPC,
144+
CBM_LANG_CAIRO,
145+
CBM_LANG_MOVE,
146+
CBM_LANG_SQUIRREL,
147+
CBM_LANG_FUNC,
148+
CBM_LANG_REGEX,
149+
CBM_LANG_JSDOC,
150+
CBM_LANG_RST,
151+
CBM_LANG_BEANCOUNT,
152+
CBM_LANG_MERMAID,
153+
CBM_LANG_PUPPET,
154+
CBM_LANG_PO,
155+
CBM_LANG_GITATTRIBUTES,
156+
CBM_LANG_GITIGNORE,
157+
CBM_LANG_SLANG,
158+
CBM_LANG_LLVM_IR,
159+
CBM_LANG_SMITHY,
160+
CBM_LANG_WIT,
161+
CBM_LANG_TLAPLUS,
162+
CBM_LANG_PKL,
163+
CBM_LANG_GOMOD,
164+
CBM_LANG_APEX,
165+
CBM_LANG_SOQL,
166+
CBM_LANG_SOSL,
78167
CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool
79168
CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected)
80169
CBM_LANG_COUNT

internal/cbm/grammar_ada.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Vendored tree-sitter grammar: ada
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/ada/parser.c"

internal/cbm/grammar_agda.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Vendored tree-sitter grammar: agda
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/agda/parser.c"
4+
#include "vendored/grammars/agda/scanner.c"

internal/cbm/grammar_apex.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Vendored tree-sitter grammar: apex
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/apex/parser.c"

internal/cbm/grammar_assembly.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Vendored tree-sitter grammar: assembly
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/assembly/parser.c"

internal/cbm/grammar_astro.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Vendored tree-sitter grammar: astro
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/astro/parser.c"
4+
#include "vendored/grammars/astro/scanner.c"

internal/cbm/grammar_awk.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Vendored tree-sitter grammar: awk
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/awk/parser.c"
4+
#include "vendored/grammars/awk/scanner.c"

internal/cbm/grammar_beancount.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Vendored tree-sitter grammar: beancount
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/beancount/parser.c"
4+
#include "vendored/grammars/beancount/scanner.c"

internal/cbm/grammar_bibtex.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Vendored tree-sitter grammar: bibtex
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/bibtex/parser.c"

internal/cbm/grammar_bicep.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Vendored tree-sitter grammar: bicep
2+
// Each grammar compiled as separate unit (conflicting static symbols).
3+
#include "vendored/grammars/bicep/parser.c"
4+
#include "vendored/grammars/bicep/scanner.c"

0 commit comments

Comments
 (0)