jfdev001 · jfdev001 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,6 +21,9 @@ jobs:
   style:
     name: "style"
     runs-on: ubuntu-latest
+    # needed for gh-pages
+    permissions:
+      contents: write
     steps:
       - uses: actions/checkout@v4
         with:
@@ -70,3 +73,38 @@ jobs:
         if: steps.changes.outputs.python == 'true'
         run: |
           uv run pytest -v tests/
+
+  deploy-docs:
+    name: "deploy docs"
+    if: github.ref == 'refs/heads/main'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: "Install uv"
+        uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+
+      - name: "Install project with docs extras"
+        run: |
+          uv sync --group docs
+
+      - name: "Build Sphinx docs"
+        run: |
+          uv run sphinx-build -b html docs docs/_build
+
+      - name: "Upload Pages artifact"
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: docs/_build
+
+      - name: "Deploy to GitHub Pages"
+        uses: peaceiris/actions-gh-pages@v3
+        if: github.ref == 'refs/heads/main'
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: docs/_build/
diff --git a/.gitignore b/.gitignore
@@ -30,3 +30,4 @@ uv.lock
 
 # dirs
 tmp
+docs/_build/
diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep
diff --git a/docs/api.rst b/docs/api.rst
@@ -0,0 +1,10 @@
+API Layer
+=========
+
+api.routers.transcript
+----------------------
+
+.. automodule:: lingua_loop.api.routers.transcript
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/conf.py b/docs/conf.py
@@ -0,0 +1,23 @@
+"""Sphinx configuration for lingua-loop."""
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
+
+project = "lingua-loop"
+copyright = "2024, Jared Frazier"
+author = "Jared Frazier"
+release = "0.1.4"
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+]
+
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+html_theme = "sphinx_rtd_theme"
+html_static_path = ["_static"]
diff --git a/docs/db.rst b/docs/db.rst
@@ -0,0 +1,27 @@
+Database Layer
+==============
+
+db.models
+---------
+
+.. automodule:: lingua_loop.db.models
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :exclude-members: Base
+
+db.session
+----------
+
+.. automodule:: lingua_loop.db.session
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+db.transcript
+-------------
+
+.. automodule:: lingua_loop.db.transcript
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/exceptions.rst b/docs/exceptions.rst
@@ -0,0 +1,7 @@
+Exceptions
+==========
+
+.. automodule:: lingua_loop.exceptions
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/index.rst b/docs/index.rst
@@ -0,0 +1,23 @@
+Welcome to lingua-loop's documentation!
+=========================================
+
+A web application to train your listening skills by transcribing real speech
+from YouTube videos.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Contents:
+
+   api
+   schemas
+   services
+   db
+   exceptions
+   integrations
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/docs/integrations.rst b/docs/integrations.rst
@@ -0,0 +1,18 @@
+Integrations
+============
+
+integrations.youtube.types
+--------------------------
+
+.. automodule:: lingua_loop.integrations.youtube.types
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+integrations.youtube.wrapper
+----------------------------
+
+.. automodule:: lingua_loop.integrations.youtube.wrapper
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/schemas.rst b/docs/schemas.rst
@@ -0,0 +1,7 @@
+Schemas
+=======
+
+.. automodule:: lingua_loop.schemas.transcript
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/services.rst b/docs/services.rst
@@ -0,0 +1,18 @@
+Services Layer
+==============
+
+services.text_normalization
+---------------------------
+
+.. automodule:: lingua_loop.services.text_normalization
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+services.transcript
+-------------------
+
+.. automodule:: lingua_loop.services.transcript
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/pyproject.toml b/pyproject.toml
@@ -53,3 +53,8 @@ dev = [
     "pytest-skip-slow>=0.0.5",
     "twine>=6.2.0",
 ]
+
+docs = [
+    "sphinx>=8.2.1",
+    "sphinx-rtd-theme>=3.0.2",
+]
diff --git a/src/lingua_loop/api/routers/transcript.py b/src/lingua_loop/api/routers/transcript.py
@@ -1,3 +1,5 @@
+"""API routes for transcript operations."""
+
 from typing import List
 
 from fastapi import APIRouter
@@ -29,6 +31,7 @@ async def get_transcript(
     language_code: SupportedLanguageCodes,
     session=Depends(get_async_session),
 ):
+    """Get a transcript for the given video ID and language code."""
     transcript = await get_or_create_transcript_with_segments(
         video_id=video_id, language_code=language_code, session=session
     )
@@ -44,6 +47,7 @@ async def get_transcript(
 
 
 def _segments_to_schema(segments: List[Segment]) -> List[SegmentSchema]:
+    """Convert Segment ORM models to SegmentSchema instances."""
     segments_as_schema = [
         SegmentSchema(
             start=segment.start, duration=segment.duration, text=segment.text
@@ -58,9 +62,9 @@ async def score_transcription(
     request: ScoreRequest,
     session: AsyncSession = Depends(get_async_session),
 ):
+    """Score a user's transcription against the reference text."""
     await _validate_score_request(request=request, session=session)
 
-    # Score the request
     score, reference_text = await compute_score(
         video_id=request.video_id,
         segment_indices=request.segment_indices,
@@ -75,6 +79,7 @@ async def score_transcription(
 async def _validate_score_request(
     request: ScoreRequest, session: AsyncSession
 ) -> None:
+    """Validate the score request against available segments."""
     transcript = await get_or_create_transcript_with_segments(
         video_id=request.video_id,
         session=session,

diff --git a/src/lingua_loop/constants.py b/src/lingua_loop/constants.py
@@ -1,3 +1,5 @@
+"""Constants for Lingua Loop."""
+
 from pathlib import Path
 
 # Directories

diff --git a/src/lingua_loop/db/models.py b/src/lingua_loop/db/models.py
@@ -1,3 +1,5 @@
+"""SQLAlchemy ORM models for Lingua Loop."""
+
 from datetime import UTC
 from datetime import datetime
 from typing import List
@@ -14,7 +16,7 @@
 
 
 class Base(DeclarativeBase):
-    pass
+    """Base class for all ORM models."""
 
 
 class Transcript(Base):
@@ -40,6 +42,8 @@ class Transcript(Base):
 
 
 class Segment(Base):
+    """Represents a segment within a transcript."""
+
     __tablename__ = "segment"
     id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
     start: Mapped[float]

diff --git a/src/lingua_loop/db/session.py b/src/lingua_loop/db/session.py
@@ -1,3 +1,5 @@
+"""Database session management utilities."""
+
 from collections.abc import AsyncGenerator
 from pathlib import Path
 from typing import Tuple
@@ -17,6 +19,7 @@ def get_engine_and_session_maker(
     db_driver: str = DEFAULT_DB_DRIVER,
     database_path: Path | str = DEFAULT_DATABASE_PATH,
 ) -> Tuple[AsyncEngine, async_sessionmaker[AsyncSession]]:
+    """Create and return the async engine and session maker."""
     sqlalchemy_database_url = f"{db_driver}:///{database_path}"
     async_engine = create_async_engine(sqlalchemy_database_url)
     async_session_maker = async_sessionmaker(
@@ -26,18 +29,22 @@ def get_engine_and_session_maker(
 
 
 async def create_db_and_tables(async_engine: AsyncEngine):
+    """Create all database tables defined in models."""
     async with async_engine.begin() as conn:
         await conn.run_sync(Base.metadata.create_all)
 
 
 async def shutdown(async_engine: AsyncEngine):
+    """Dispose of the async engine."""
     await async_engine.dispose()
 
 
 async def get_async_session(
     request: Request,
 ) -> AsyncGenerator[AsyncSession, None]:
-    # `request` and state populated by lifespan(app)
+    """Provide an async database session for dependency injection."""
+
+    # `state` property of `request` gets populated by main.py::lifespan
     async_session_maker = request.state.async_session_maker
 
     async with async_session_maker() as session:

diff --git a/src/lingua_loop/db/transcript.py b/src/lingua_loop/db/transcript.py
@@ -1,3 +1,5 @@
+"""CRUD operations for transcripts."""
+
 from typing import List
 
 from sqlalchemy import select
@@ -19,6 +21,7 @@
 async def read_or_create_transcript_with_segments(
     video_id: str, language_code: SupportedLanguageCodes, session: AsyncSession
 ) -> Transcript:
+    """Get or create a transcript with all segments for the given video."""
     transcript = await _read_transcript_with_segments(
         video_id=video_id, session=session
     )
@@ -49,7 +52,7 @@ async def _read_transcript_with_segments(
 async def _create_transcript(
     video_id: str, language_code: SupportedLanguageCodes, session: AsyncSession
 ) -> Transcript:
-
+    """Create a new transcript record with segments from YouTube."""
     transcript_list = list_transcripts(video_id=video_id)
     has_transcript = video_has_transcript_in_language(
         transcript_list=transcript_list, language_code=language_code
@@ -78,6 +81,7 @@ async def _create_transcript(
 
 
 def _get_segments(fetched_transcript: FetchedTranscript) -> List[Segment]:
+    """Convert a FetchedTranscript to a list of Segment ORM objects."""
     segments: List[Segment] = []
     snippets = fetched_transcript.snippets
     for snippet in snippets:

diff --git a/src/lingua_loop/exceptions.py b/src/lingua_loop/exceptions.py
@@ -1,15 +1,23 @@
+"""Custom exceptions for Lingua Loop."""
+
 from fastapi import status
 from fastapi.exceptions import HTTPException
 
 
 class TranscriptNotFoundError(Exception):
+    """Raised when a transcript is not found for a given video ID."""
+
     def __init__(self, video_id: str):
+        """Initialize the exception with the video_id."""
         self.video_id = video_id
         super().__init__(f"Transcript not found for video_id={video_id}")
 
 
 class SegmentIndicesError(HTTPException):
+    """Raised when invalid segment indices are provided."""
+
     def __init__(self, segment_indices: list[int]):
+        """Initialize the exception with the invalid segment_indices."""
         status_code = status.HTTP_400_BAD_REQUEST
         detail = f"Invalid segment indices, got {segment_indices}"
         super().__init__(status_code=status_code, detail=detail)
diff --git a/src/lingua_loop/integrations/youtube/types.py b/src/lingua_loop/integrations/youtube/types.py
@@ -1,15 +1,21 @@
+"""YouTube integration type definitions."""
+
 from enum import Enum
 from typing import Dict
 
 
 class SupportedLanguageCodes(str, Enum):
+    """Supported language codes for YouTube transcripts."""
+
     DUTCH = "nl"
     ENGLISH = "en"
     GERMAN = "de"
     ITALIAN = "it"
 
 
 class SupportedLanguages(str, Enum):
+    """Human-readable language names."""
+
     DUTCH = "Dutch"
     ENGLISH = "English"
     GERMAN = "German"