Skip to content

Commit 3722fe4

Browse files
fix(sanitize): preserve angle brackets inside code blocks and inline code
bluemonday's StrictPolicy treats angle brackets inside markdown code blocks and inline code spans as HTML tags and strips them. This causes content like `mut_raw_ptr<int>` to become `mut_raw_ptr` when read through MCP issue/PR endpoints. The fix protects angle brackets inside fenced code blocks (```) and inline code spans (`) with sentinels before HTML sanitization, then restores them after. Angle brackets outside code are still sanitized normally, preserving XSS protection. Fixes #2202 Signed-off-by: Dayna Blackwell <dayna@blackwell-systems.com>
1 parent 4bded57 commit 3722fe4

2 files changed

Lines changed: 203 additions & 1 deletion

File tree

pkg/sanitize/sanitize.go

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,14 @@ var policy *bluemonday.Policy
1212
var policyOnce sync.Once
1313

1414
func Sanitize(input string) string {
15-
return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input)))
15+
cleaned := FilterCodeFenceMetadata(FilterInvisibleCharacters(input))
16+
// Protect angle brackets inside code blocks and inline code spans
17+
// from being stripped by the HTML sanitizer. bluemonday treats <int>,
18+
// <T>, etc. as unknown HTML tags and removes them.
19+
// See https://github.com/github/github-mcp-server/issues/2202
20+
protected := protectCodeAngleBrackets(cleaned)
21+
sanitized := FilterHTMLTags(protected)
22+
return restoreCodeAngleBrackets(sanitized)
1623
}
1724

1825
// FilterInvisibleCharacters removes invisible or control characters that should not appear
@@ -145,6 +152,155 @@ func isSafeCodeFenceToken(token string) bool {
145152
return true
146153
}
147154

155+
// Sentinels used to protect angle brackets inside code from HTML sanitization.
156+
// These are chosen to be unlikely to appear in real content.
157+
const (
158+
ltSentinel = "\x00LT\x00"
159+
gtSentinel = "\x00GT\x00"
160+
)
161+
162+
// protectCodeAngleBrackets replaces < and > inside fenced code blocks and
163+
// inline code spans with sentinels so bluemonday does not strip them as HTML.
164+
func protectCodeAngleBrackets(input string) string {
165+
var b strings.Builder
166+
b.Grow(len(input))
167+
168+
runes := []rune(input)
169+
i := 0
170+
n := len(runes)
171+
172+
for i < n {
173+
// Fenced code block: ``` ... ```
174+
if i+2 < n && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' {
175+
// Find the fence length
176+
fenceStart := i
177+
fenceLen := 0
178+
for i < n && runes[i] == '`' {
179+
fenceLen++
180+
i++
181+
}
182+
// Write opening fence + rest of line (info string)
183+
for j := 0; j < fenceLen; j++ {
184+
b.WriteRune('`')
185+
}
186+
for i < n && runes[i] != '\n' {
187+
b.WriteRune(runes[i])
188+
i++
189+
}
190+
if i < n {
191+
b.WriteRune(runes[i]) // newline
192+
i++
193+
}
194+
// Inside fence: protect angle brackets until closing fence
195+
for i < n {
196+
// Check for closing fence
197+
if runes[i] == '`' {
198+
closeLen := 0
199+
j := i
200+
for j < n && runes[j] == '`' {
201+
closeLen++
202+
j++
203+
}
204+
if closeLen >= fenceLen {
205+
for k := 0; k < closeLen; k++ {
206+
b.WriteRune('`')
207+
}
208+
i = j
209+
break
210+
}
211+
}
212+
if runes[i] == '<' {
213+
b.WriteString(ltSentinel)
214+
} else if runes[i] == '>' {
215+
b.WriteString(gtSentinel)
216+
} else {
217+
b.WriteRune(runes[i])
218+
}
219+
i++
220+
}
221+
_ = fenceStart
222+
continue
223+
}
224+
225+
// Inline code: `...`
226+
if runes[i] == '`' {
227+
// Count opening backticks
228+
openLen := 0
229+
j := i
230+
for j < n && runes[j] == '`' {
231+
openLen++
232+
j++
233+
}
234+
// Don't treat ``` as inline code (handled above for fenced blocks)
235+
if openLen >= 3 {
236+
for k := 0; k < openLen; k++ {
237+
b.WriteRune('`')
238+
}
239+
i = j
240+
continue
241+
}
242+
// Find matching closing backticks
243+
closeStart := -1
244+
for k := j; k <= n-openLen; k++ {
245+
match := true
246+
for m := 0; m < openLen; m++ {
247+
if runes[k+m] != '`' {
248+
match = false
249+
break
250+
}
251+
}
252+
if match {
253+
// Verify it's exactly openLen backticks (not more)
254+
if k+openLen < n && runes[k+openLen] == '`' {
255+
continue
256+
}
257+
closeStart = k
258+
break
259+
}
260+
}
261+
if closeStart == -1 {
262+
// No closing backticks found; treat as literal
263+
for k := 0; k < openLen; k++ {
264+
b.WriteRune('`')
265+
}
266+
i = j
267+
continue
268+
}
269+
// Write opening backticks
270+
for k := 0; k < openLen; k++ {
271+
b.WriteRune('`')
272+
}
273+
// Protect content
274+
for i = j; i < closeStart; i++ {
275+
if runes[i] == '<' {
276+
b.WriteString(ltSentinel)
277+
} else if runes[i] == '>' {
278+
b.WriteString(gtSentinel)
279+
} else {
280+
b.WriteRune(runes[i])
281+
}
282+
}
283+
// Write closing backticks
284+
for k := 0; k < openLen; k++ {
285+
b.WriteRune('`')
286+
}
287+
i = closeStart + openLen
288+
continue
289+
}
290+
291+
b.WriteRune(runes[i])
292+
i++
293+
}
294+
295+
return b.String()
296+
}
297+
298+
// restoreCodeAngleBrackets converts sentinels back to angle brackets.
299+
func restoreCodeAngleBrackets(input string) string {
300+
s := strings.ReplaceAll(input, ltSentinel, "<")
301+
return strings.ReplaceAll(s, gtSentinel, ">")
302+
}
303+
148304
func getPolicy() *bluemonday.Policy {
149305
policyOnce.Do(func() {
150306
p := bluemonday.StrictPolicy()

pkg/sanitize/sanitize_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,49 @@ func TestSanitizeRemovesInvisibleCodeFenceMetadata(t *testing.T) {
300300
result := Sanitize(input)
301301
assert.Equal(t, expected, result)
302302
}
303+
304+
func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) {
305+
tests := []struct {
306+
name string
307+
input string
308+
expected string
309+
}{
310+
{
311+
name: "fenced code block with angle brackets",
312+
input: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
313+
expected: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
314+
},
315+
{
316+
name: "inline code with angle brackets",
317+
input: "Use `Vec<String>` for collections.",
318+
expected: "Use `Vec<String>` for collections.",
319+
},
320+
{
321+
name: "angle brackets outside code are sanitized",
322+
input: "This has <script>alert('xss')</script> in it.",
323+
expected: "This has in it.",
324+
},
325+
{
326+
name: "fenced code block with generic types",
327+
input: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
328+
expected: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
329+
},
330+
{
331+
name: "multiple inline code spans with angle brackets",
332+
input: "Compare `Map<K, V>` and `Set<T>`.",
333+
expected: "Compare `Map<K, V>` and `Set<T>`.",
334+
},
335+
{
336+
name: "no code blocks passes through",
337+
input: "No code here, just text.",
338+
expected: "No code here, just text.",
339+
},
340+
}
341+
342+
for _, tt := range tests {
343+
t.Run(tt.name, func(t *testing.T) {
344+
result := Sanitize(tt.input)
345+
assert.Equal(t, tt.expected, result)
346+
})
347+
}
348+
}

0 commit comments

Comments
 (0)