Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions src/tools/fetch/web-fetch-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,23 @@
.trim();
}

function removeUnsafeBlocks(value: string): string {
let current = value;
let previous: string;
do {
previous = current;
current = current
.replace(/<script[\s\S]*?<\/script>/gi, "")

Check failure

Code scanning / CodeQL

Incomplete multi-character sanitization High

This string may still contain
<script
, which may cause an HTML element injection vulnerability.

Check failure

Code scanning / CodeQL

Bad HTML filtering regexp High

This regular expression does not match script end tags like </script >.
Comment on lines +40 to +41
.replace(/<style[\s\S]*?<\/style>/gi, "")

Check failure

Code scanning / CodeQL

Incomplete multi-character sanitization High

This string may still contain
<style
, which may cause an HTML element injection vulnerability.
Comment on lines +40 to +42
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
} while (current !== previous);
return current;
}

export function htmlToMarkdown(html: string): { text: string; title?: string } {
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined;
let text = html
.replace(/<script[\s\S]*?<\/script>/gi, "")
.replace(/<style[\s\S]*?<\/style>/gi, "")
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
let text = removeUnsafeBlocks(html);
text = text.replace(/<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, href, body) => {
const label = normalizeWhitespace(stripTags(body));
if (!label) {
Expand Down
Loading