Skip to content

Commit fe63c82

Browse files
committed
4B - Client mod improvements, robustness fixes, docs SEO and load time improvements
1 parent d567290 commit fe63c82

98 files changed

Lines changed: 508 additions & 2072 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/deploy-pages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
- name: Build docs
4444
run: |
4545
pip install markdown==3.10.2 zstandard==0.25.0
46-
python docs/build.py
46+
python docs/build.py --production
4747
4848
- name: Setup Pages
4949
uses: actions/configure-pages@v5

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Though you do not need to await any methods unless you want to wait for the call
1717

1818
## Documentation
1919

20-
See [The Docs](https://omena0.github.io/PyJavaBridge/index.html)
20+
See [The Docs](https://omena0.github.io/PyJavaBridge/)
2121

2222
For quick searches use the pjb cli script.
2323

changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Client-mod API consolidation release — high-level Python interface, cleaner ex
5151
- Pinned `.github/workflows/deploy-pages.yml` tool dependencies for reproducible docs deployments.
5252
- Added missing extension files to `src/main/resources/python/bridge/MANIFEST` (`loot_table.py`, `placeholder.py`, `scheduler.py`, `schematic.py`, `state_machine.py`, `tab_list.py`).
5353
- Updated stubs to reflect API/type reality in `bridge/__init__.pyi` and `bridge/extensions/__init__.pyi`.
54+
- Moved search index and git metadata from page files into seperate files. For faster loads and better SEO.
55+
- Added `--production` flag, which converts all links to absolute links (`/PyJavaBridge/world.html` instead of `/world.html`) for better SEO.
5456

5557
## 4A
5658

docs/build.py

Lines changed: 149 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,21 @@
44
Converts Markdown source files in docs/src/ into a static HTML site in docs/.
55
66
Requirements: pip install markdown zstandard
7-
Usage: python docs/build.py
7+
Usage: python docs/build.py [--production]
88
"""
99

10+
import argparse
1011
import concurrent.futures
1112
from pathlib import Path
1213
import subprocess
1314
import tempfile
14-
import base64
1515
import shutil
1616
import html
1717
import os
18+
import posixpath
1819
import re
1920
import threading
21+
from urllib.parse import urljoin, urlsplit
2022

2123
try: # Import markdown
2224
import markdown
@@ -35,6 +37,8 @@
3537
DOCS_DIR = os.path.dirname(os.path.abspath(__file__))
3638
SRC_DIR = os.path.join(DOCS_DIR, "src")
3739
OUT_DIR = os.path.join(DOCS_DIR, "site")
40+
SEARCH_INDEX_FILENAME = "search_index.zst"
41+
GIT_META_FILENAME = "git_meta.json"
3842

3943
# Minify built HTML using `html-minifier-next` (via `npx`).
4044
# Set `MINIFY_HTML = False` to disable. If `npx` or the package is
@@ -257,6 +261,10 @@ def parse_frontmatter(text):
257261

258262
# Precompiled pre/code block matcher
259263
PRE_CODE_RE = re.compile(r'<pre><code class="language-(\w*)">(.*?)</code></pre>', flags=re.DOTALL)
264+
URL_ATTR_RE = re.compile(
265+
r'(?P<attr>\b(?:href|src))\s*=\s*(?P<quote>["\']?)(?P<url>[^"\'\s>]+)(?P=quote)',
266+
flags=re.IGNORECASE,
267+
)
260268

261269

262270
def highlight_python(code):
@@ -306,9 +314,108 @@ def convert_markdown(text):
306314
md.reset()
307315
return html_out, toc_tokens
308316

309-
def rewrite_md_links(html_text):
310-
"""Rewrite .md links to .html links."""
311-
return re.sub(r'href="([^"#]+)\.md(#[^"]*)?"', lambda m: f'href="{m.group(1)}.html{m.group(2) or ""}"', html_text)
317+
def _resolve_md_target_to_output(md_target, current_slug):
318+
"""Resolve a markdown href target to an output HTML filename."""
319+
raw = str(md_target or "").strip()
320+
if not raw:
321+
return raw
322+
323+
raw = raw.replace("\\", "/")
324+
candidates = []
325+
if raw.startswith("/"):
326+
candidates.append(posixpath.normpath(raw.lstrip("/")))
327+
else:
328+
# First try path-as-written (many docs links are already source-root style).
329+
candidates.append(posixpath.normpath(raw))
330+
# Then try resolving relative to the current source slug directory.
331+
base_dir = ""
332+
if current_slug:
333+
normalized_current = _normalize_slug(current_slug)
334+
base_dir = normalized_current.rsplit("/", 1)[0] if "/" in normalized_current else ""
335+
candidates.append(posixpath.normpath(posixpath.join(base_dir, raw)))
336+
337+
# Map through known slugs first (handles flattened output names).
338+
for candidate in candidates:
339+
normalized = _normalize_slug(candidate)
340+
if normalized in SLUG_PAGE_KEYS:
341+
return slug_output_name(normalized)
342+
343+
# Fallback for unknown markdown files: preserve path shape and just switch extension.
344+
return f"{raw}.html"
345+
346+
def rewrite_md_links(html_text, current_slug):
347+
"""Rewrite .md href targets to their output HTML targets."""
348+
def _repl(m):
349+
target = m.group(1)
350+
anchor = m.group(2) or ""
351+
resolved = _resolve_md_target_to_output(target, current_slug)
352+
return f'href="{resolved}{anchor}"'
353+
354+
return re.sub(r'href="([^"#]+)\.md(#[^"]*)?"', _repl, html_text)
355+
356+
SITE_PATH_PREFIX = "/PyJavaBridge/"
357+
PRODUCTION = False
358+
359+
def _normalized_site_prefix():
360+
prefix = SITE_PATH_PREFIX.strip()
361+
if not prefix:
362+
return "/"
363+
if not prefix.startswith("/"):
364+
prefix = "/" + prefix
365+
if not prefix.endswith("/"):
366+
prefix += "/"
367+
return prefix
368+
369+
def _output_site_prefix():
370+
"""Prefix to emit into generated links/scripts."""
371+
return _normalized_site_prefix() if PRODUCTION else ""
372+
373+
def _output_href(path):
374+
"""Build an internal href for output mode (dev relative vs production prefixed)."""
375+
clean = str(path or "").lstrip("/")
376+
prefix = _output_site_prefix()
377+
if not prefix:
378+
return clean
379+
return prefix + clean
380+
381+
def _should_absolutize_url(raw_url):
382+
"""Whether a URL should be rewritten to an absolute site URL."""
383+
if not raw_url:
384+
return False
385+
value = raw_url.strip()
386+
if not value or value.startswith("#") or value.startswith("//"):
387+
return False
388+
if re.match(r'^[a-zA-Z][a-zA-Z0-9+.-]*:', value):
389+
return False
390+
return True
391+
392+
def absolutize_links(html_text, page_url):
393+
"""Rewrite internal href/src links in an HTML document to site-absolute paths."""
394+
if not html_text or not page_url:
395+
return html_text
396+
397+
prefix = _normalized_site_prefix().rstrip("/") or "/"
398+
399+
def _repl(m):
400+
attr = m.group("attr")
401+
quote = m.group("quote")
402+
raw_url = m.group("url")
403+
if not _should_absolutize_url(raw_url):
404+
return m.group(0)
405+
resolved = urljoin(page_url, raw_url)
406+
parts = urlsplit(resolved)
407+
absolute = parts.path or "/"
408+
if prefix != "/" and absolute.startswith("/") and absolute != prefix and not absolute.startswith(prefix + "/"):
409+
absolute = prefix + absolute
410+
if parts.query:
411+
absolute += f"?{parts.query}"
412+
if parts.fragment:
413+
absolute += f"#{parts.fragment}"
414+
if quote:
415+
return f"{attr}={quote}{absolute}{quote}"
416+
return f"{attr}={absolute}"
417+
418+
return URL_ATTR_RE.sub(_repl, html_text)
312419

313420
def process_blockquotes(html_text):
314421
"""Convert blockquotes starting with bold markers into styled callouts."""
@@ -496,7 +603,7 @@ def build_toc_sidebar(toc_tokens, current_slug):
496603

497604
TEMPLATE = """\
498605
<!DOCTYPE html>
499-
<html lang="en">
606+
<html lang="en" data-site-prefix="{site_prefix}">
500607
<head>
501608
<meta charset="UTF-8">
502609
<meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -581,8 +688,6 @@ def build_toc_sidebar(toc_tokens, current_slug):
581688
582689
</button>
583690
584-
<script id="git-meta" type="application/json">{git_meta_json}</script>
585-
<script id="zstd-data" type="text/plain">{search_index_zstd_b64}</script>
586691
<script src="https://cdn.jsdelivr.net/npm/fzstd@0.1.1/umd/index.js" async></script>
587692
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
588693
<script src="script.js"></script>
@@ -622,7 +727,7 @@ def build_page(slug):
622727
body_html, toc_tokens = convert_markdown(body_md)
623728

624729
# Post-processing
625-
body_html = rewrite_md_links(body_html)
730+
body_html = rewrite_md_links(body_html, slug)
626731
body_html = highlight_code_blocks(body_html)
627732
body_html = process_blockquotes(body_html)
628733
body_html = format_ext_tags(body_html)
@@ -646,12 +751,11 @@ def _safe(s):
646751
page_title=page_title,
647752
og_title=og_title,
648753
og_description=og_description,
754+
site_prefix=_safe(_output_site_prefix()),
649755
subtitle_html=_safe(subtitle_html),
650756
body=_safe(body_html),
651757
sidebar=_safe(sidebar),
652-
search_index_zstd_b64=_safe(_search_index_zstd_b64),
653758
version_options=_safe(VERSION_OPTIONS),
654-
git_meta_json=_git_meta_json,
655759
)
656760

657761
# Optimize the html
@@ -667,6 +771,11 @@ def _safe(s):
667771
print(f'Failed to minify HTML: {e}')
668772

669773
out_name = slug_output_name(slug)
774+
if PRODUCTION:
775+
resolver_base = urljoin("https://docs.local", _normalized_site_prefix())
776+
page_url = urljoin(resolver_base, out_name)
777+
out_html = absolutize_links(out_html, page_url)
778+
670779
out_path = os.path.join(OUT_DIR, out_name)
671780
with open(out_path, "w", encoding="utf-8") as f:
672781
f.write(out_html)
@@ -772,20 +881,33 @@ def get_all_slugs():
772881

773882
return slugs
774883

775-
_search_index_zstd_b64 = ""
776884
SEARCH_MAP = {}
777885
VERSION_OPTIONS = ""
778-
_git_meta_json = '{}'
779886
SLUG_PAGE_KEYS = {}
780887

781888
WORKERS = 18
782889

783-
def main():
890+
def parse_args(argv=None):
891+
parser = argparse.ArgumentParser(description="Build the PyJavaBridge docs site.")
892+
parser.add_argument(
893+
"--production",
894+
action="store_true",
895+
help="Rewrite internal links to site-root absolute paths for deployed docs.",
896+
)
897+
return parser.parse_args(argv)
898+
899+
def main(argv=None):
784900
"""Build the static documentation site from markdown sources."""
785-
global _search_index_zstd_b64, SLUG_PAGE_KEYS
901+
global SLUG_PAGE_KEYS, PRODUCTION
902+
args = parse_args(argv)
903+
PRODUCTION = bool(args.production)
904+
786905
print("📖 Building PyJavaBridge docs...")
787906
print(f" Source: {SRC_DIR}")
788907
print(f" Output: {OUT_DIR}")
908+
print(f" Mode: {'production' if PRODUCTION else 'development'}")
909+
if PRODUCTION:
910+
print(f" Site prefix: {_normalized_site_prefix()}")
789911
print()
790912

791913
# Copy static assets into output directory. Try DOCS_DIR first, fall back
@@ -816,7 +938,7 @@ def main():
816938
built = 0
817939
search_index = []
818940

819-
# Build search index first (needed for inlining into pages)
941+
# Build search index first so we can emit a shared compressed asset.
820942
for slug in slugs:
821943
src = os.path.join(SRC_DIR, f"{slug}.md")
822944

@@ -885,7 +1007,7 @@ def main():
8851007
sections.append({"heading": current_heading, "text": ", ".join(table_first_cols)})
8861008

8871009
page_key = slug_page_key(slug)
888-
url = slug_output_name(slug)
1010+
url = _output_href(slug_output_name(slug))
8891011
search_index.append({
8901012
"slug": page_key,
8911013
"source_slug": _normalize_slug(slug),
@@ -985,10 +1107,7 @@ def words(s):
9851107
item['backlinks'] = slug_to_backlinks.get(item['slug'], [])
9861108
item['related'] = related_map.get(item['slug'], [])
9871109

988-
# Emit git metadata (repo, commits, tags) for client-side versioning support
989-
# Default inline git meta (fallback when git access fails)
990-
global _git_meta_json
991-
_git_meta_json = '{}'
1110+
# Emit git metadata (repo, commits, tags) for client-side versioning support.
9921111
try:
9931112
repo_root = os.path.dirname(DOCS_DIR)
9941113
# remote URL
@@ -1086,14 +1205,11 @@ def words(s):
10861205

10871206
git_meta = {'repo': repo_name, 'commits': commits, 'tags': tags, 'versions': versions, 'src_map': src_map, 'pages_by_commit': pages_by_commit}
10881207
try:
1089-
with open(os.path.join(OUT_DIR, 'git_meta.json'), 'w', encoding='utf-8') as gf:
1208+
with open(os.path.join(OUT_DIR, GIT_META_FILENAME), 'w', encoding='utf-8') as gf:
10901209
json.dump(git_meta, gf, separators=(',', ':'))
10911210
except Exception:
10921211
pass
10931212

1094-
# Inline JSON for file:// usage (client falls back to this if fetch fails)
1095-
_git_meta_json = json.dumps(git_meta, separators=(',', ':'))
1096-
10971213
# Pre-render version selector options so the <select> isn't empty before JS runs.
10981214
try:
10991215
global VERSION_OPTIONS
@@ -1133,15 +1249,19 @@ def words(s):
11331249
cctx = zstandard.ZstdCompressor(level=22)
11341250

11351251
compressed = cctx.compress(search_json.encode('utf-8'))
1136-
_search_index_zstd_b64 = base64.b64encode(compressed).decode('ascii')
1252+
search_index_path = os.path.join(OUT_DIR, SEARCH_INDEX_FILENAME)
1253+
with open(search_index_path, "wb") as sf:
1254+
sf.write(compressed)
11371255

11381256
raw_size = len(search_json.encode('utf-8'))
11391257
compressed_size = len(compressed)
1140-
b64_size = len(_search_index_zstd_b64)
11411258

1142-
print(f" Search index: {raw_size:,} bytes → {compressed_size:,} zstd → {b64_size:,} base64 ({100*b64_size/raw_size:.1f}%)")
1259+
print(
1260+
f" Search index: {raw_size:,} bytes → {compressed_size:,} zstd "
1261+
f"({100*compressed_size/raw_size:.1f}%) → {SEARCH_INDEX_FILENAME}"
1262+
)
11431263

1144-
# Build pages (with search index inlined) — parallelized
1264+
# Build pages — parallelized
11451265
slugs_to_build = []
11461266
for slug in slugs:
11471267
src = os.path.join(SRC_DIR, f"{slug}.md")
@@ -1167,4 +1287,3 @@ def words(s):
11671287

11681288
if __name__ == "__main__":
11691289
main()
1170-

0 commit comments

Comments
 (0)