Skip to content

Commit 680c63b

Browse files
fix(sanitize): preserve angle brackets inside code blocks and inline code
bluemonday's StrictPolicy treats angle brackets inside markdown code blocks and inline code spans as HTML tags and strips them. This causes content like `mut_raw_ptr<int>` to become `mut_raw_ptr` when read through MCP issue/PR endpoints. The fix protects angle brackets inside fenced code blocks (```) and inline code spans (`) with sentinels before HTML sanitization, then restores them after. Angle brackets outside code are still sanitized normally, preserving XSS protection. Fixes #2202 Signed-off-by: Dayna Blackwell <dayna@blackwell-systems.com>
1 parent 4bded57 commit 680c63b

2 files changed

Lines changed: 205 additions & 1 deletion

File tree

pkg/sanitize/sanitize.go

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,14 @@ var policy *bluemonday.Policy
1212
var policyOnce sync.Once
1313

1414
func Sanitize(input string) string {
15-
return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input)))
15+
cleaned := FilterCodeFenceMetadata(FilterInvisibleCharacters(input))
16+
// Protect angle brackets inside code blocks and inline code spans
17+
// from being stripped by the HTML sanitizer. bluemonday treats <int>,
18+
// <T>, etc. as unknown HTML tags and removes them.
19+
// See https://github.com/github/github-mcp-server/issues/2202
20+
protected := protectCodeAngleBrackets(cleaned)
21+
sanitized := FilterHTMLTags(protected)
22+
return restoreCodeAngleBrackets(sanitized)
1623
}
1724

1825
// FilterInvisibleCharacters removes invisible or control characters that should not appear
@@ -145,6 +152,157 @@ func isSafeCodeFenceToken(token string) bool {
145152
return true
146153
}
147154

155+
// Sentinels used to protect angle brackets inside code from HTML sanitization.
156+
// These are chosen to be unlikely to appear in real content.
157+
const (
158+
ltSentinel = "\x00LT\x00"
159+
gtSentinel = "\x00GT\x00"
160+
)
161+
162+
// protectCodeAngleBrackets replaces < and > inside fenced code blocks and
163+
// inline code spans with sentinels so bluemonday does not strip them as HTML.
164+
func protectCodeAngleBrackets(input string) string {
165+
var b strings.Builder
166+
b.Grow(len(input))
167+
168+
runes := []rune(input)
169+
i := 0
170+
n := len(runes)
171+
172+
for i < n {
173+
// Fenced code block: ``` ... ```
174+
if i+2 < n && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' {
175+
// Find the fence length
176+
fenceStart := i
177+
fenceLen := 0
178+
for i < n && runes[i] == '`' {
179+
fenceLen++
180+
i++
181+
}
182+
// Write opening fence + rest of line (info string)
183+
for range fenceLen {
184+
b.WriteRune('`')
185+
}
186+
for i < n && runes[i] != '\n' {
187+
b.WriteRune(runes[i])
188+
i++
189+
}
190+
if i < n {
191+
b.WriteRune(runes[i]) // newline
192+
i++
193+
}
194+
// Inside fence: protect angle brackets until closing fence
195+
for i < n {
196+
// Check for closing fence
197+
if runes[i] == '`' {
198+
closeLen := 0
199+
j := i
200+
for j < n && runes[j] == '`' {
201+
closeLen++
202+
j++
203+
}
204+
if closeLen >= fenceLen {
205+
for range closeLen {
206+
b.WriteRune('`')
207+
}
208+
i = j
209+
break
210+
}
211+
}
212+
switch runes[i] {
213+
case '<':
214+
b.WriteString(ltSentinel)
215+
case '>':
216+
b.WriteString(gtSentinel)
217+
default:
218+
b.WriteRune(runes[i])
219+
}
220+
i++
221+
}
222+
_ = fenceStart
223+
continue
224+
}
225+
226+
// Inline code: `...`
227+
if runes[i] == '`' {
228+
// Count opening backticks
229+
openLen := 0
230+
j := i
231+
for j < n && runes[j] == '`' {
232+
openLen++
233+
j++
234+
}
235+
// Don't treat ``` as inline code (handled above for fenced blocks)
236+
if openLen >= 3 {
237+
for range openLen {
238+
b.WriteRune('`')
239+
}
240+
i = j
241+
continue
242+
}
243+
// Find matching closing backticks
244+
closeStart := -1
245+
for k := j; k <= n-openLen; k++ {
246+
match := true
247+
for m := range openLen {
248+
if runes[k+m] != '`' {
249+
match = false
250+
break
251+
}
252+
}
253+
if match {
254+
// Verify it's exactly openLen backticks (not more)
255+
if k+openLen < n && runes[k+openLen] == '`' {
256+
continue
257+
}
258+
closeStart = k
259+
break
260+
}
261+
}
262+
if closeStart == -1 {
263+
// No closing backticks found; treat as literal
264+
for range openLen {
265+
b.WriteRune('`')
266+
}
267+
i = j
268+
continue
269+
}
270+
// Write opening backticks
271+
for range openLen {
272+
b.WriteRune('`')
273+
}
274+
// Protect content
275+
for i = j; i < closeStart; i++ {
276+
switch runes[i] {
277+
case '<':
278+
b.WriteString(ltSentinel)
279+
case '>':
280+
b.WriteString(gtSentinel)
281+
default:
282+
b.WriteRune(runes[i])
283+
}
284+
}
285+
// Write closing backticks
286+
for range openLen {
287+
b.WriteRune('`')
288+
}
289+
i = closeStart + openLen
290+
continue
291+
}
292+
293+
b.WriteRune(runes[i])
294+
i++
295+
}
296+
297+
return b.String()
298+
}
299+
300+
// restoreCodeAngleBrackets converts sentinels back to angle brackets.
301+
func restoreCodeAngleBrackets(input string) string {
302+
s := strings.ReplaceAll(input, ltSentinel, "<")
303+
return strings.ReplaceAll(s, gtSentinel, ">")
304+
}
305+
148306
func getPolicy() *bluemonday.Policy {
149307
policyOnce.Do(func() {
150308
p := bluemonday.StrictPolicy()

pkg/sanitize/sanitize_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,49 @@ func TestSanitizeRemovesInvisibleCodeFenceMetadata(t *testing.T) {
300300
result := Sanitize(input)
301301
assert.Equal(t, expected, result)
302302
}
303+
304+
func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) {
305+
tests := []struct {
306+
name string
307+
input string
308+
expected string
309+
}{
310+
{
311+
name: "fenced code block with angle brackets",
312+
input: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
313+
expected: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
314+
},
315+
{
316+
name: "inline code with angle brackets",
317+
input: "Use `Vec<String>` for collections.",
318+
expected: "Use `Vec<String>` for collections.",
319+
},
320+
{
321+
name: "angle brackets outside code are sanitized",
322+
input: "This has <script>alert('xss')</script> in it.",
323+
expected: "This has in it.",
324+
},
325+
{
326+
name: "fenced code block with generic types",
327+
input: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
328+
expected: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
329+
},
330+
{
331+
name: "multiple inline code spans with angle brackets",
332+
input: "Compare `Map<K, V>` and `Set<T>`.",
333+
expected: "Compare `Map<K, V>` and `Set<T>`.",
334+
},
335+
{
336+
name: "no code blocks passes through",
337+
input: "No code here, just text.",
338+
expected: "No code here, just text.",
339+
},
340+
}
341+
342+
for _, tt := range tests {
343+
t.Run(tt.name, func(t *testing.T) {
344+
result := Sanitize(tt.input)
345+
assert.Equal(t, tt.expected, result)
346+
})
347+
}
348+
}

0 commit comments

Comments
 (0)