Skip to content

Commit d725654

Browse files
authored
Create render_notebooks.py
1 parent e621ca0 commit d725654

1 file changed

Lines changed: 203 additions & 0 deletions

File tree

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Notebook to GitHub-Compatible Format Converter
4+
5+
This script renders XML-format notebooks to standard Jupyter JSON format
6+
with the required widget state metadata for GitHub rendering.
7+
"""
8+
9+
import os
10+
import json
11+
import re
12+
import nbformat
13+
from nbformat.validator import validate
14+
15+
def process_notebooks(directory="."):
16+
"""Find and process all notebook files in the repository"""
17+
notebook_files = []
18+
for root, dirs, files in os.walk(directory):
19+
if '.git' in dirs:
20+
dirs.remove('.git') # Skip git directory
21+
if '.github' in dirs:
22+
dirs.remove('.github') # Skip GitHub directory
23+
for file in files:
24+
if file.endswith('.ipynb'):
25+
notebook_files.append(os.path.join(root, file))
26+
27+
print(f"Found {len(notebook_files)} notebooks to process")
28+
29+
success_count = 0
30+
for nb_path in notebook_files:
31+
if convert_notebook(nb_path):
32+
success_count += 1
33+
34+
print(f"Successfully rendered {success_count} out of {len(notebook_files)} notebooks")
35+
return success_count
36+
37+
def convert_notebook(filepath):
38+
"""Convert a XML notebook to standard Jupyter JSON format"""
39+
print(f"\nProcessing {filepath}")
40+
41+
try:
42+
# Read the notebook content
43+
with open(filepath, 'r', encoding='utf-8') as f:
44+
content = f.read()
45+
46+
# Check if this is a XML notebook
47+
if '<VSCode.Cell' in content:
48+
print(f" Converting from XML format...")
49+
# Extract cells using regex
50+
cells = []
51+
cell_pattern = re.compile(r'<VSCode\.Cell.*?language="(.*?)".*?>(.*?)</VSCode\.Cell>', re.DOTALL)
52+
53+
for match in cell_pattern.finditer(content):
54+
cell_type, cell_content = match.groups()
55+
56+
if cell_type == "markdown":
57+
cells.append(nbformat.v4.new_markdown_cell(
58+
source=cell_content.strip()
59+
))
60+
else: # python, javascript, etc.
61+
cells.append(nbformat.v4.new_code_cell(
62+
source=cell_content.strip()
63+
))
64+
65+
# Create a new notebook
66+
nb = nbformat.v4.new_notebook()
67+
nb.cells = cells
68+
69+
# Add required metadata
70+
nb.metadata = {
71+
"kernelspec": {
72+
"display_name": "Python 3",
73+
"language": "python",
74+
"name": "python3"
75+
},
76+
"language_info": {
77+
"codemirror_mode": {
78+
"name": "ipython",
79+
"version": 3
80+
},
81+
"file_extension": ".py",
82+
"mimetype": "text/x-python",
83+
"name": "python",
84+
"nbconvert_exporter": "python",
85+
"pygments_lexer": "ipython3",
86+
"version": "3.8.10"
87+
},
88+
"widgets": {
89+
"application/vnd.jupyter.widget-state+json": {
90+
"state": {},
91+
"version_major": 2,
92+
"version_minor": 0
93+
}
94+
}
95+
}
96+
97+
# Validate and write the notebook
98+
validate(nb)
99+
with open(filepath, 'w', encoding='utf-8') as f:
100+
nbformat.write(nb, f)
101+
102+
print(f" Successfully rendered {filepath} for GitHub compatibility")
103+
return True
104+
105+
else:
106+
# It's already in JSON format, check if it has widget state
107+
try:
108+
nb_dict = json.loads(content)
109+
110+
# Check if we need to add widget state metadata
111+
if "widgets" not in nb_dict.get("metadata", {}):
112+
print(f" Adding widget state metadata to JSON notebook...")
113+
nb = nbformat.reads(content, as_version=4)
114+
if "metadata" not in nb:
115+
nb.metadata = {}
116+
nb.metadata["widgets"] = {
117+
"application/vnd.jupyter.widget-state+json": {
118+
"state": {},
119+
"version_major": 2,
120+
"version_minor": 0
121+
}
122+
}
123+
124+
# Validate and write the notebook
125+
validate(nb)
126+
with open(filepath, 'w', encoding='utf-8') as f:
127+
nbformat.write(nb, f)
128+
129+
print(f" Successfully added widget state to {filepath}")
130+
return True
131+
else:
132+
print(f" Notebook already in correct format for GitHub, no changes needed")
133+
return True
134+
except json.JSONDecodeError:
135+
print(f" ERROR: {filepath} is not in valid JSON format or XML format")
136+
return False
137+
138+
except Exception as e:
139+
print(f" ERROR processing {filepath}: {str(e)}")
140+
return False
141+
142+
def verify_notebooks(directory="."):
143+
"""Check all notebooks are in valid Jupyter format for GitHub"""
144+
notebook_files = []
145+
for root, dirs, files in os.walk(directory):
146+
if '.git' in dirs:
147+
dirs.remove('.git')
148+
if '.github' in dirs:
149+
dirs.remove('.github')
150+
for file in files:
151+
if file.endswith('.ipynb'):
152+
notebook_files.append(os.path.join(root, file))
153+
154+
print(f"\nVerifying {len(notebook_files)} notebooks for GitHub compatibility")
155+
156+
errors = 0
157+
for nb_path in notebook_files:
158+
print(f"Checking {nb_path}")
159+
try:
160+
with open(nb_path, 'r', encoding='utf-8') as f:
161+
content = f.read()
162+
163+
# Check if it's still in XML format
164+
if '<VSCode.Cell' in content:
165+
print(f" ERROR: {nb_path} is still in XML format")
166+
errors += 1
167+
continue
168+
169+
# Try to load as JSON
170+
try:
171+
nb_dict = json.loads(content)
172+
except json.JSONDecodeError as e:
173+
print(f" ERROR: {nb_path} is not valid JSON: {str(e)}")
174+
errors += 1
175+
continue
176+
177+
# Check for widget state
178+
if "widgets" not in nb_dict.get("metadata", {}):
179+
print(f" WARNING: {nb_path} is missing widget state metadata")
180+
181+
# Validate with nbformat
182+
try:
183+
validate(nb_dict)
184+
print(f" SUCCESS: {nb_path} is valid for GitHub rendering")
185+
except Exception as e:
186+
print(f" ERROR: {nb_path} validation failed: {str(e)}")
187+
errors += 1
188+
189+
except Exception as e:
190+
print(f" ERROR checking {nb_path}: {str(e)}")
191+
errors += 1
192+
193+
if errors > 0:
194+
print(f"\n{errors} notebooks may have issues with GitHub rendering")
195+
else:
196+
print("\nAll notebooks are properly formatted for GitHub rendering")
197+
198+
return errors
199+
200+
if __name__ == "__main__":
201+
print("Rendering notebooks for GitHub compatibility...")
202+
process_notebooks()
203+
verify_notebooks()

0 commit comments

Comments
 (0)