Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 159 additions & 1 deletion pkg/sanitize/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@ var policy *bluemonday.Policy
var policyOnce sync.Once

func Sanitize(input string) string {
return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input)))
cleaned := FilterCodeFenceMetadata(FilterInvisibleCharacters(input))
// Protect angle brackets inside code blocks and inline code spans
// from being stripped by the HTML sanitizer. bluemonday treats <int>,
// <T>, etc. as unknown HTML tags and removes them.
// See https://github.com/github/github-mcp-server/issues/2202
protected := protectCodeAngleBrackets(cleaned)
sanitized := FilterHTMLTags(protected)
return restoreCodeAngleBrackets(sanitized)
}

// FilterInvisibleCharacters removes invisible or control characters that should not appear
Expand Down Expand Up @@ -145,6 +152,157 @@ func isSafeCodeFenceToken(token string) bool {
return true
}

// Sentinels used to protect angle brackets inside code from HTML sanitization.
// These are chosen to be unlikely to appear in real content.
const (
ltSentinel = "\x00LT\x00"
gtSentinel = "\x00GT\x00"
)

// protectCodeAngleBrackets replaces < and > inside fenced code blocks and
// inline code spans with sentinels so bluemonday does not strip them as HTML.
func protectCodeAngleBrackets(input string) string {
var b strings.Builder
b.Grow(len(input))

runes := []rune(input)
i := 0
n := len(runes)

for i < n {
// Fenced code block: ``` ... ```
if i+2 < n && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' {
// Find the fence length
fenceStart := i
fenceLen := 0
for i < n && runes[i] == '`' {
fenceLen++
i++
}
// Write opening fence + rest of line (info string)
for range fenceLen {
b.WriteRune('`')
}
for i < n && runes[i] != '\n' {
b.WriteRune(runes[i])
i++
}
if i < n {
b.WriteRune(runes[i]) // newline
i++
}
// Inside fence: protect angle brackets until closing fence
for i < n {
// Check for closing fence
if runes[i] == '`' {
closeLen := 0
j := i
for j < n && runes[j] == '`' {
closeLen++
j++
}
if closeLen >= fenceLen {
for range closeLen {
b.WriteRune('`')
}
i = j
break
}
}
switch runes[i] {
case '<':
b.WriteString(ltSentinel)
case '>':
b.WriteString(gtSentinel)
default:
b.WriteRune(runes[i])
}
i++
}
_ = fenceStart
continue
}

// Inline code: `...`
if runes[i] == '`' {
// Count opening backticks
openLen := 0
j := i
for j < n && runes[j] == '`' {
openLen++
j++
}
// Don't treat ``` as inline code (handled above for fenced blocks)
if openLen >= 3 {
for range openLen {
b.WriteRune('`')
}
i = j
continue
}
// Find matching closing backticks
closeStart := -1
for k := j; k <= n-openLen; k++ {
match := true
for m := range openLen {
if runes[k+m] != '`' {
match = false
break
}
}
if match {
// Verify it's exactly openLen backticks (not more)
if k+openLen < n && runes[k+openLen] == '`' {
continue
}
closeStart = k
break
}
}
if closeStart == -1 {
// No closing backticks found; treat as literal
for range openLen {
b.WriteRune('`')
}
i = j
continue
}
// Write opening backticks
for range openLen {
b.WriteRune('`')
}
// Protect content
for i = j; i < closeStart; i++ {
switch runes[i] {
case '<':
b.WriteString(ltSentinel)
case '>':
b.WriteString(gtSentinel)
default:
b.WriteRune(runes[i])
}
}
// Write closing backticks
for range openLen {
b.WriteRune('`')
}
i = closeStart + openLen
continue
}

b.WriteRune(runes[i])
i++
}

return b.String()
}

// restoreCodeAngleBrackets converts sentinels back to angle brackets.
func restoreCodeAngleBrackets(input string) string {
s := strings.ReplaceAll(input, ltSentinel, "<")
return strings.ReplaceAll(s, gtSentinel, ">")
}

func getPolicy() *bluemonday.Policy {
policyOnce.Do(func() {
p := bluemonday.StrictPolicy()
Expand Down
46 changes: 46 additions & 0 deletions pkg/sanitize/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,49 @@ func TestSanitizeRemovesInvisibleCodeFenceMetadata(t *testing.T) {
result := Sanitize(input)
assert.Equal(t, expected, result)
}

func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "fenced code block with angle brackets",
input: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
expected: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
},
{
name: "inline code with angle brackets",
input: "Use `Vec<String>` for collections.",
expected: "Use `Vec<String>` for collections.",
},
{
name: "angle brackets outside code are sanitized",
input: "This has <script>alert('xss')</script> in it.",
expected: "This has in it.",
},
{
name: "fenced code block with generic types",
input: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
expected: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
},
{
name: "multiple inline code spans with angle brackets",
input: "Compare `Map<K, V>` and `Set<T>`.",
expected: "Compare `Map<K, V>` and `Set<T>`.",
},
{
name: "no code blocks passes through",
input: "No code here, just text.",
expected: "No code here, just text.",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := Sanitize(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}