|
| 1 | +import dataclasses |
| 2 | +import re |
| 3 | +import sys |
| 4 | +from typing import List |
| 5 | + |
| 6 | + |
| 7 | +@dataclasses.dataclass() |
| 8 | +class Header: |
| 9 | + name: str |
| 10 | + level: int |
| 11 | + |
| 12 | + @property |
| 13 | + def slug(self): |
| 14 | + text = self.name.replace(' ', '-') |
| 15 | + # single chars that are removed |
| 16 | + text = re.sub(r'[`~!@#$%^&*()+=<>?,./:;"\'|{}\[\]\\–—]', '', text) |
| 17 | + # CJK punctuations that are removed |
| 18 | + text = re.sub(r'[ 。?!,、;:“”【】()〔〕[]﹃﹄“”‘’﹁﹂—…-~《》〈〉「」]', '', text) |
| 19 | + return text |
| 20 | + |
| 21 | + |
| 22 | +class TOCMaker: |
| 23 | + def __init__( |
| 24 | + self, |
| 25 | + *, |
| 26 | + max_depth=6, |
| 27 | + link_prefix='', |
| 28 | + indentation_size=2, |
| 29 | + list_bullets=('-', '*', '+', '-'), |
| 30 | + header_class=Header, |
| 31 | + ): |
| 32 | + self.max_depth = max_depth |
| 33 | + self.link_prefix = link_prefix |
| 34 | + self.indentation_size = indentation_size |
| 35 | + self.list_bullets = list_bullets |
| 36 | + self.header_class = header_class |
| 37 | + |
| 38 | + def make(self, text): |
| 39 | + headers = self._collect_headers(text) |
| 40 | + return self._make_toc(headers) |
| 41 | + |
| 42 | + def make_from_file(self, fp): |
| 43 | + return self.make(fp.read()) |
| 44 | + |
| 45 | + def _collect_headers(self, text): |
| 46 | + headers = [] |
| 47 | + |
| 48 | + code_blocks = 0 |
| 49 | + for line in text.splitlines(): |
| 50 | + line = line.strip() |
| 51 | + code_blocks += line.count('```') % 2 |
| 52 | + if code_blocks % 2 == 0 and line.startswith('#'): |
| 53 | + header = self._parse_header_from_line(line) |
| 54 | + if header.level <= self.max_depth: |
| 55 | + headers.append(self._parse_header_from_line(line)) |
| 56 | + |
| 57 | + return headers |
| 58 | + |
| 59 | + def _make_toc(self, headers: List[Header]): |
| 60 | + toc = [] |
| 61 | + for header in headers: |
| 62 | + indentation = ' ' * ((header.level - 1) * self.indentation_size) |
| 63 | + bullet = self._get_bullet(header.level) |
| 64 | + toc.append(f'{indentation}{bullet} [{header.name}]({self.link_prefix}#{header.slug})') |
| 65 | + return '\n'.join(toc) |
| 66 | + |
| 67 | + def _get_bullet(self, level): |
| 68 | + if level > len(self.list_bullets): |
| 69 | + return self.list_bullets[-1] |
| 70 | + return self.list_bullets[level - 1] |
| 71 | + |
| 72 | + def _parse_header_from_line(self, line): |
| 73 | + level = 0 |
| 74 | + name = '' |
| 75 | + for char in line: |
| 76 | + if char == '#': |
| 77 | + level += 1 |
| 78 | + else: |
| 79 | + name = line[level + 1:].strip() |
| 80 | + break |
| 81 | + |
| 82 | + return self.header_class( |
| 83 | + name=name, |
| 84 | + level=level |
| 85 | + ) |
| 86 | + |
| 87 | + |
| 88 | +def paste_after(delimiter, content, text): |
| 89 | + result = [] |
| 90 | + for line in text.splitlines(): |
| 91 | + if line.strip() != delimiter: |
| 92 | + result.append(line) |
| 93 | + else: |
| 94 | + result.append(f'{delimiter}\n') |
| 95 | + result.append(f'{content}\n') |
| 96 | + return '\n'.join(result) |
| 97 | + |
| 98 | + raise ValueError(f"Can't find delimiter '{delimiter}'") |
| 99 | + |
| 100 | + |
| 101 | +if __name__ == '__main__': |
| 102 | + with open('questions.md') as fp: |
| 103 | + maker = TOCMaker(link_prefix='questions.md/') |
| 104 | + toc = maker.make_from_file(fp) |
| 105 | + |
| 106 | + with open('README.md', 'r') as fp: |
| 107 | + original = fp.read() |
| 108 | + changed = paste_after('<!-- toc -->', toc, original) |
| 109 | + |
| 110 | + if '--check' in sys.argv: |
| 111 | + if original != changed: |
| 112 | + print('Error') |
| 113 | + sys.exit(1) |
| 114 | + else: |
| 115 | + with open('README.md', 'w') as fp: |
| 116 | + fp.write(changed) |
| 117 | + |
| 118 | + print('Done') |
0 commit comments