44Converts Markdown source files in docs/src/ into a static HTML site in docs/.
55
66Requirements: pip install markdown zstandard
7- Usage: python docs/build.py
7+ Usage: python docs/build.py [--production]
88"""
99
10+ import argparse
1011import concurrent .futures
1112from pathlib import Path
1213import subprocess
1314import tempfile
14- import base64
1515import shutil
1616import html
1717import os
18+ import posixpath
1819import re
1920import threading
21+ from urllib .parse import urljoin , urlsplit
2022
2123try : # Import markdown
2224 import markdown
3537DOCS_DIR = os .path .dirname (os .path .abspath (__file__ ))
3638SRC_DIR = os .path .join (DOCS_DIR , "src" )
3739OUT_DIR = os .path .join (DOCS_DIR , "site" )
40+ SEARCH_INDEX_FILENAME = "search_index.zst"
41+ GIT_META_FILENAME = "git_meta.json"
3842
3943# Minify built HTML using `html-minifier-next` (via `npx`).
4044# Set `MINIFY_HTML = False` to disable. If `npx` or the package is
@@ -257,6 +261,10 @@ def parse_frontmatter(text):
257261
258262# Precompiled pre/code block matcher
259263PRE_CODE_RE = re .compile (r'<pre><code class="language-(\w*)">(.*?)</code></pre>' , flags = re .DOTALL )
264+ URL_ATTR_RE = re .compile (
265+ r'(?P<attr>\b(?:href|src))\s*=\s*(?P<quote>["\']?)(?P<url>[^"\'\s>]+)(?P=quote)' ,
266+ flags = re .IGNORECASE ,
267+ )
260268
261269
262270def highlight_python (code ):
@@ -306,9 +314,108 @@ def convert_markdown(text):
306314 md .reset ()
307315 return html_out , toc_tokens
308316
309- def rewrite_md_links (html_text ):
310- """Rewrite .md links to .html links."""
311- return re .sub (r'href="([^"#]+)\.md(#[^"]*)?"' , lambda m : f'href="{ m .group (1 )} .html{ m .group (2 ) or "" } "' , html_text )
317+ def _resolve_md_target_to_output (md_target , current_slug ):
318+ """Resolve a markdown href target to an output HTML filename."""
319+ raw = str (md_target or "" ).strip ()
320+ if not raw :
321+ return raw
322+
323+ raw = raw .replace ("\\ " , "/" )
324+ candidates = []
325+ if raw .startswith ("/" ):
326+ candidates .append (posixpath .normpath (raw .lstrip ("/" )))
327+ else :
328+ # First try path-as-written (many docs links are already source-root style).
329+ candidates .append (posixpath .normpath (raw ))
330+ # Then try resolving relative to the current source slug directory.
331+ base_dir = ""
332+ if current_slug :
333+ normalized_current = _normalize_slug (current_slug )
334+ base_dir = normalized_current .rsplit ("/" , 1 )[0 ] if "/" in normalized_current else ""
335+ candidates .append (posixpath .normpath (posixpath .join (base_dir , raw )))
336+
337+ # Map through known slugs first (handles flattened output names).
338+ for candidate in candidates :
339+ normalized = _normalize_slug (candidate )
340+ if normalized in SLUG_PAGE_KEYS :
341+ return slug_output_name (normalized )
342+
343+ # Fallback for unknown markdown files: preserve path shape and just switch extension.
344+ return f"{ raw } .html"
345+
346+ def rewrite_md_links (html_text , current_slug ):
347+ """Rewrite .md href targets to their output HTML targets."""
348+ def _repl (m ):
349+ target = m .group (1 )
350+ anchor = m .group (2 ) or ""
351+ resolved = _resolve_md_target_to_output (target , current_slug )
352+ return f'href="{ resolved } { anchor } "'
353+
354+ return re .sub (r'href="([^"#]+)\.md(#[^"]*)?"' , _repl , html_text )
355+
356+ SITE_PATH_PREFIX = "/PyJavaBridge/"
357+ PRODUCTION = False
358+
359+ def _normalized_site_prefix ():
360+ prefix = SITE_PATH_PREFIX .strip ()
361+ if not prefix :
362+ return "/"
363+ if not prefix .startswith ("/" ):
364+ prefix = "/" + prefix
365+ if not prefix .endswith ("/" ):
366+ prefix += "/"
367+ return prefix
368+
369+ def _output_site_prefix ():
370+ """Prefix to emit into generated links/scripts."""
371+ return _normalized_site_prefix () if PRODUCTION else ""
372+
373+ def _output_href (path ):
374+ """Build an internal href for output mode (dev relative vs production prefixed)."""
375+ clean = str (path or "" ).lstrip ("/" )
376+ prefix = _output_site_prefix ()
377+ if not prefix :
378+ return clean
379+ return prefix + clean
380+
381+ def _should_absolutize_url (raw_url ):
382+ """Whether a URL should be rewritten to an absolute site URL."""
383+ if not raw_url :
384+ return False
385+ value = raw_url .strip ()
386+ if not value or value .startswith ("#" ) or value .startswith ("//" ):
387+ return False
388+ if re .match (r'^[a-zA-Z][a-zA-Z0-9+.-]*:' , value ):
389+ return False
390+ return True
391+
392+ def absolutize_links (html_text , page_url ):
393+ """Rewrite internal href/src links in an HTML document to site-absolute paths."""
394+ if not html_text or not page_url :
395+ return html_text
396+
397+ prefix = _normalized_site_prefix ().rstrip ("/" ) or "/"
398+
399+ def _repl (m ):
400+ attr = m .group ("attr" )
401+ quote = m .group ("quote" )
402+ raw_url = m .group ("url" )
403+ if not _should_absolutize_url (raw_url ):
404+ return m .group (0 )
405+ resolved = urljoin (page_url , raw_url )
406+ parts = urlsplit (resolved )
407+ absolute = parts .path or "/"
408+ if prefix != "/" and absolute .startswith ("/" ) and absolute != prefix and not absolute .startswith (prefix + "/" ):
409+ absolute = prefix + absolute
410+ if parts .query :
411+ absolute += f"?{ parts .query } "
412+ if parts .fragment :
413+ absolute += f"#{ parts .fragment } "
414+ if quote :
415+ return f"{ attr } ={ quote } { absolute } { quote } "
416+ return f"{ attr } ={ absolute } "
417+
418+ return URL_ATTR_RE .sub (_repl , html_text )
312419
313420def process_blockquotes (html_text ):
314421 """Convert blockquotes starting with bold markers into styled callouts."""
@@ -496,7 +603,7 @@ def build_toc_sidebar(toc_tokens, current_slug):
496603
497604TEMPLATE = """\
498605 <!DOCTYPE html>
499- <html lang="en">
606+ <html lang="en" data-site-prefix="{site_prefix}" >
500607<head>
501608 <meta charset="UTF-8">
502609 <meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -581,8 +688,6 @@ def build_toc_sidebar(toc_tokens, current_slug):
581688
582689 </button>
583690
584- <script id="git-meta" type="application/json">{git_meta_json}</script>
585- <script id="zstd-data" type="text/plain">{search_index_zstd_b64}</script>
586691 <script src="https://cdn.jsdelivr.net/npm/fzstd@0.1.1/umd/index.js" async></script>
587692 <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
588693 <script src="script.js"></script>
@@ -622,7 +727,7 @@ def build_page(slug):
622727 body_html , toc_tokens = convert_markdown (body_md )
623728
624729 # Post-processing
625- body_html = rewrite_md_links (body_html )
730+ body_html = rewrite_md_links (body_html , slug )
626731 body_html = highlight_code_blocks (body_html )
627732 body_html = process_blockquotes (body_html )
628733 body_html = format_ext_tags (body_html )
@@ -646,12 +751,11 @@ def _safe(s):
646751 page_title = page_title ,
647752 og_title = og_title ,
648753 og_description = og_description ,
754+ site_prefix = _safe (_output_site_prefix ()),
649755 subtitle_html = _safe (subtitle_html ),
650756 body = _safe (body_html ),
651757 sidebar = _safe (sidebar ),
652- search_index_zstd_b64 = _safe (_search_index_zstd_b64 ),
653758 version_options = _safe (VERSION_OPTIONS ),
654- git_meta_json = _git_meta_json ,
655759 )
656760
657761 # Optimize the html
@@ -667,6 +771,11 @@ def _safe(s):
667771 print (f'Failed to minify HTML: { e } ' )
668772
669773 out_name = slug_output_name (slug )
774+ if PRODUCTION :
775+ resolver_base = urljoin ("https://docs.local" , _normalized_site_prefix ())
776+ page_url = urljoin (resolver_base , out_name )
777+ out_html = absolutize_links (out_html , page_url )
778+
670779 out_path = os .path .join (OUT_DIR , out_name )
671780 with open (out_path , "w" , encoding = "utf-8" ) as f :
672781 f .write (out_html )
@@ -772,20 +881,33 @@ def get_all_slugs():
772881
773882 return slugs
774883
775- _search_index_zstd_b64 = ""
776884SEARCH_MAP = {}
777885VERSION_OPTIONS = ""
778- _git_meta_json = '{}'
779886SLUG_PAGE_KEYS = {}
780887
781888WORKERS = 18
782889
783- def main ():
890+ def parse_args (argv = None ):
891+ parser = argparse .ArgumentParser (description = "Build the PyJavaBridge docs site." )
892+ parser .add_argument (
893+ "--production" ,
894+ action = "store_true" ,
895+ help = "Rewrite internal links to site-root absolute paths for deployed docs." ,
896+ )
897+ return parser .parse_args (argv )
898+
899+ def main (argv = None ):
784900 """Build the static documentation site from markdown sources."""
785- global _search_index_zstd_b64 , SLUG_PAGE_KEYS
901+ global SLUG_PAGE_KEYS , PRODUCTION
902+ args = parse_args (argv )
903+ PRODUCTION = bool (args .production )
904+
786905 print ("📖 Building PyJavaBridge docs..." )
787906 print (f" Source: { SRC_DIR } " )
788907 print (f" Output: { OUT_DIR } " )
908+ print (f" Mode: { 'production' if PRODUCTION else 'development' } " )
909+ if PRODUCTION :
910+ print (f" Site prefix: { _normalized_site_prefix ()} " )
789911 print ()
790912
791913 # Copy static assets into output directory. Try DOCS_DIR first, fall back
@@ -816,7 +938,7 @@ def main():
816938 built = 0
817939 search_index = []
818940
819- # Build search index first (needed for inlining into pages)
941+ # Build search index first so we can emit a shared compressed asset.
820942 for slug in slugs :
821943 src = os .path .join (SRC_DIR , f"{ slug } .md" )
822944
@@ -885,7 +1007,7 @@ def main():
8851007 sections .append ({"heading" : current_heading , "text" : ", " .join (table_first_cols )})
8861008
8871009 page_key = slug_page_key (slug )
888- url = slug_output_name (slug )
1010+ url = _output_href ( slug_output_name (slug ) )
8891011 search_index .append ({
8901012 "slug" : page_key ,
8911013 "source_slug" : _normalize_slug (slug ),
@@ -985,10 +1107,7 @@ def words(s):
9851107 item ['backlinks' ] = slug_to_backlinks .get (item ['slug' ], [])
9861108 item ['related' ] = related_map .get (item ['slug' ], [])
9871109
988- # Emit git metadata (repo, commits, tags) for client-side versioning support
989- # Default inline git meta (fallback when git access fails)
990- global _git_meta_json
991- _git_meta_json = '{}'
1110+ # Emit git metadata (repo, commits, tags) for client-side versioning support.
9921111 try :
9931112 repo_root = os .path .dirname (DOCS_DIR )
9941113 # remote URL
@@ -1086,14 +1205,11 @@ def words(s):
10861205
10871206 git_meta = {'repo' : repo_name , 'commits' : commits , 'tags' : tags , 'versions' : versions , 'src_map' : src_map , 'pages_by_commit' : pages_by_commit }
10881207 try :
1089- with open (os .path .join (OUT_DIR , 'git_meta.json' ), 'w' , encoding = 'utf-8' ) as gf :
1208+ with open (os .path .join (OUT_DIR , GIT_META_FILENAME ), 'w' , encoding = 'utf-8' ) as gf :
10901209 json .dump (git_meta , gf , separators = (',' , ':' ))
10911210 except Exception :
10921211 pass
10931212
1094- # Inline JSON for file:// usage (client falls back to this if fetch fails)
1095- _git_meta_json = json .dumps (git_meta , separators = (',' , ':' ))
1096-
10971213 # Pre-render version selector options so the <select> isn't empty before JS runs.
10981214 try :
10991215 global VERSION_OPTIONS
@@ -1133,15 +1249,19 @@ def words(s):
11331249 cctx = zstandard .ZstdCompressor (level = 22 )
11341250
11351251 compressed = cctx .compress (search_json .encode ('utf-8' ))
1136- _search_index_zstd_b64 = base64 .b64encode (compressed ).decode ('ascii' )
1252+ search_index_path = os .path .join (OUT_DIR , SEARCH_INDEX_FILENAME )
1253+ with open (search_index_path , "wb" ) as sf :
1254+ sf .write (compressed )
11371255
11381256 raw_size = len (search_json .encode ('utf-8' ))
11391257 compressed_size = len (compressed )
1140- b64_size = len (_search_index_zstd_b64 )
11411258
1142- print (f" Search index: { raw_size :,} bytes → { compressed_size :,} zstd → { b64_size :,} base64 ({ 100 * b64_size / raw_size :.1f} %)" )
1259+ print (
1260+ f" Search index: { raw_size :,} bytes → { compressed_size :,} zstd "
1261+ f"({ 100 * compressed_size / raw_size :.1f} %) → { SEARCH_INDEX_FILENAME } "
1262+ )
11431263
1144- # Build pages (with search index inlined) — parallelized
1264+ # Build pages — parallelized
11451265 slugs_to_build = []
11461266 for slug in slugs :
11471267 src = os .path .join (SRC_DIR , f"{ slug } .md" )
@@ -1167,4 +1287,3 @@ def words(s):
11671287
11681288if __name__ == "__main__" :
11691289 main ()
1170-
0 commit comments