diff --git a/pkg/sanitize/sanitize.go b/pkg/sanitize/sanitize.go index e6401e4fb3..504e4fa6bb 100644 --- a/pkg/sanitize/sanitize.go +++ b/pkg/sanitize/sanitize.go @@ -12,7 +12,14 @@ var policy *bluemonday.Policy var policyOnce sync.Once func Sanitize(input string) string { - return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input))) + cleaned := FilterCodeFenceMetadata(FilterInvisibleCharacters(input)) + // Protect angle brackets inside code blocks and inline code spans + // from being stripped by the HTML sanitizer. bluemonday treats , + // , etc. as unknown HTML tags and removes them. + // See https://github.com/github/github-mcp-server/issues/2202 + protected := protectCodeAngleBrackets(cleaned) + sanitized := FilterHTMLTags(protected) + return restoreCodeAngleBrackets(sanitized) } // FilterInvisibleCharacters removes invisible or control characters that should not appear @@ -145,6 +152,157 @@ func isSafeCodeFenceToken(token string) bool { return true } +// Sentinels used to protect angle brackets inside code from HTML sanitization. +// These are chosen to be unlikely to appear in real content. +const ( + ltSentinel = "\x00LT\x00" + gtSentinel = "\x00GT\x00" +) + +// protectCodeAngleBrackets replaces < and > inside fenced code blocks and +// inline code spans with sentinels so bluemonday does not strip them as HTML. +func protectCodeAngleBrackets(input string) string { + var b strings.Builder + b.Grow(len(input)) + + runes := []rune(input) + i := 0 + n := len(runes) + + for i < n { + // Fenced code block: ``` ... ``` + if i+2 < n && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' { + // Find the fence length + fenceStart := i + fenceLen := 0 + for i < n && runes[i] == '`' { + fenceLen++ + i++ + } + // Write opening fence + rest of line (info string) + for range fenceLen { + b.WriteRune('`') + } + for i < n && runes[i] != '\n' { + b.WriteRune(runes[i]) + i++ + } + if i < n { + b.WriteRune(runes[i]) // newline + i++ + } + // Inside fence: protect angle brackets until closing fence + for i < n { + // Check for closing fence + if runes[i] == '`' { + closeLen := 0 + j := i + for j < n && runes[j] == '`' { + closeLen++ + j++ + } + if closeLen >= fenceLen { + for range closeLen { + b.WriteRune('`') + } + i = j + break + } + } + switch runes[i] { + case '<': + b.WriteString(ltSentinel) + case '>': + b.WriteString(gtSentinel) + default: + b.WriteRune(runes[i]) + } + i++ + } + _ = fenceStart + continue + } + + // Inline code: `...` + if runes[i] == '`' { + // Count opening backticks + openLen := 0 + j := i + for j < n && runes[j] == '`' { + openLen++ + j++ + } + // Don't treat ``` as inline code (handled above for fenced blocks) + if openLen >= 3 { + for range openLen { + b.WriteRune('`') + } + i = j + continue + } + // Find matching closing backticks + closeStart := -1 + for k := j; k <= n-openLen; k++ { + match := true + for m := range openLen { + if runes[k+m] != '`' { + match = false + break + } + } + if match { + // Verify it's exactly openLen backticks (not more) + if k+openLen < n && runes[k+openLen] == '`' { + continue + } + closeStart = k + break + } + } + if closeStart == -1 { + // No closing backticks found; treat as literal + for range openLen { + b.WriteRune('`') + } + i = j + continue + } + // Write opening backticks + for range openLen { + b.WriteRune('`') + } + // Protect content + for i = j; i < closeStart; i++ { + switch runes[i] { + case '<': + b.WriteString(ltSentinel) + case '>': + b.WriteString(gtSentinel) + default: + b.WriteRune(runes[i]) + } + } + // Write closing backticks + for range openLen { + b.WriteRune('`') + } + i = closeStart + openLen + continue + } + + b.WriteRune(runes[i]) + i++ + } + + return b.String() +} + +// restoreCodeAngleBrackets converts sentinels back to angle brackets. +func restoreCodeAngleBrackets(input string) string { + s := strings.ReplaceAll(input, ltSentinel, "<") + return strings.ReplaceAll(s, gtSentinel, ">") +} + func getPolicy() *bluemonday.Policy { policyOnce.Do(func() { p := bluemonday.StrictPolicy() diff --git a/pkg/sanitize/sanitize_test.go b/pkg/sanitize/sanitize_test.go index 35b23e6abe..05db3785db 100644 --- a/pkg/sanitize/sanitize_test.go +++ b/pkg/sanitize/sanitize_test.go @@ -300,3 +300,49 @@ func TestSanitizeRemovesInvisibleCodeFenceMetadata(t *testing.T) { result := Sanitize(input) assert.Equal(t, expected, result) } + +func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "fenced code block with angle brackets", + input: "```\nlet ptr: mut_raw_ptr = raw_new int;\n```", + expected: "```\nlet ptr: mut_raw_ptr = raw_new int;\n```", + }, + { + name: "inline code with angle brackets", + input: "Use `Vec` for collections.", + expected: "Use `Vec` for collections.", + }, + { + name: "angle brackets outside code are sanitized", + input: "This has in it.", + expected: "This has in it.", + }, + { + name: "fenced code block with generic types", + input: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.", + expected: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.", + }, + { + name: "multiple inline code spans with angle brackets", + input: "Compare `Map` and `Set`.", + expected: "Compare `Map` and `Set`.", + }, + { + name: "no code blocks passes through", + input: "No code here, just text.", + expected: "No code here, just text.", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := Sanitize(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +}