Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ jobs:
style:
name: "style"
runs-on: ubuntu-latest
# needed for gh-pages
permissions:
contents: write
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -70,3 +73,38 @@ jobs:
if: steps.changes.outputs.python == 'true'
run: |
uv run pytest -v tests/

deploy-docs:
name: "deploy docs"
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false

- name: "Install uv"
uses: astral-sh/setup-uv@v7
with:
enable-cache: true

- name: "Install project with docs extras"
run: |
uv sync --group docs

- name: "Build Sphinx docs"
run: |
uv run sphinx-build -b html docs docs/_build

- name: "Upload Pages artifact"
uses: actions/upload-pages-artifact@v3
with:
path: docs/_build

- name: "Deploy to GitHub Pages"
uses: peaceiris/actions-gh-pages@v3
if: github.ref == 'refs/heads/main'
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: docs/_build/
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ uv.lock

# dirs
tmp
docs/_build/
Empty file added docs/_static/.gitkeep
Empty file.
10 changes: 10 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
API Layer
=========

api.routers.transcript
----------------------

.. automodule:: lingua_loop.api.routers.transcript
:members:
:undoc-members:
:show-inheritance:
23 changes: 23 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Sphinx configuration for lingua-loop."""

import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))

project = "lingua-loop"
copyright = "2024, Jared Frazier"
author = "Jared Frazier"
release = "0.1.4"

extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
]

templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
27 changes: 27 additions & 0 deletions docs/db.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Database Layer
==============

db.models
---------

.. automodule:: lingua_loop.db.models
:members:
:undoc-members:
:show-inheritance:
:exclude-members: Base

db.session
----------

.. automodule:: lingua_loop.db.session
:members:
:undoc-members:
:show-inheritance:

db.transcript
-------------

.. automodule:: lingua_loop.db.transcript
:members:
:undoc-members:
:show-inheritance:
7 changes: 7 additions & 0 deletions docs/exceptions.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Exceptions
==========

.. automodule:: lingua_loop.exceptions
:members:
:undoc-members:
:show-inheritance:
23 changes: 23 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Welcome to lingua-loop's documentation!
=========================================

A web application to train your listening skills by transcribing real speech
from YouTube videos.

.. toctree::
:maxdepth: 1
:caption: Contents:

api
schemas
services
db
exceptions
integrations

Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
18 changes: 18 additions & 0 deletions docs/integrations.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Integrations
============

integrations.youtube.types
--------------------------

.. automodule:: lingua_loop.integrations.youtube.types
:members:
:undoc-members:
:show-inheritance:

integrations.youtube.wrapper
----------------------------

.. automodule:: lingua_loop.integrations.youtube.wrapper
:members:
:undoc-members:
:show-inheritance:
7 changes: 7 additions & 0 deletions docs/schemas.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Schemas
=======

.. automodule:: lingua_loop.schemas.transcript
:members:
:undoc-members:
:show-inheritance:
18 changes: 18 additions & 0 deletions docs/services.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Services Layer
==============

services.text_normalization
---------------------------

.. automodule:: lingua_loop.services.text_normalization
:members:
:undoc-members:
:show-inheritance:

services.transcript
-------------------

.. automodule:: lingua_loop.services.transcript
:members:
:undoc-members:
:show-inheritance:
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,8 @@ dev = [
"pytest-skip-slow>=0.0.5",
"twine>=6.2.0",
]

docs = [
"sphinx>=8.2.1",
"sphinx-rtd-theme>=3.0.2",
]
7 changes: 6 additions & 1 deletion src/lingua_loop/api/routers/transcript.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""API routes for transcript operations."""

from typing import List

from fastapi import APIRouter
Expand Down Expand Up @@ -29,6 +31,7 @@ async def get_transcript(
language_code: SupportedLanguageCodes,
session=Depends(get_async_session),
):
"""Get a transcript for the given video ID and language code."""
transcript = await get_or_create_transcript_with_segments(
video_id=video_id, language_code=language_code, session=session
)
Expand All @@ -44,6 +47,7 @@ async def get_transcript(


def _segments_to_schema(segments: List[Segment]) -> List[SegmentSchema]:
"""Convert Segment ORM models to SegmentSchema instances."""
segments_as_schema = [
SegmentSchema(
start=segment.start, duration=segment.duration, text=segment.text
Expand All @@ -58,9 +62,9 @@ async def score_transcription(
request: ScoreRequest,
session: AsyncSession = Depends(get_async_session),
):
"""Score a user's transcription against the reference text."""
await _validate_score_request(request=request, session=session)

# Score the request
score, reference_text = await compute_score(
video_id=request.video_id,
segment_indices=request.segment_indices,
Expand All @@ -75,6 +79,7 @@ async def score_transcription(
async def _validate_score_request(
request: ScoreRequest, session: AsyncSession
) -> None:
"""Validate the score request against available segments."""
transcript = await get_or_create_transcript_with_segments(
video_id=request.video_id,
session=session,
Expand Down
2 changes: 2 additions & 0 deletions src/lingua_loop/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Constants for Lingua Loop."""

from pathlib import Path

# Directories
Expand Down
6 changes: 5 additions & 1 deletion src/lingua_loop/db/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""SQLAlchemy ORM models for Lingua Loop."""

from datetime import UTC
from datetime import datetime
from typing import List
Expand All @@ -14,7 +16,7 @@


class Base(DeclarativeBase):
pass
"""Base class for all ORM models."""


class Transcript(Base):
Expand All @@ -40,6 +42,8 @@ class Transcript(Base):


class Segment(Base):
"""Represents a segment within a transcript."""

__tablename__ = "segment"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
start: Mapped[float]
Expand Down
9 changes: 8 additions & 1 deletion src/lingua_loop/db/session.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Database session management utilities."""

from collections.abc import AsyncGenerator
from pathlib import Path
from typing import Tuple
Expand All @@ -17,6 +19,7 @@ def get_engine_and_session_maker(
db_driver: str = DEFAULT_DB_DRIVER,
database_path: Path | str = DEFAULT_DATABASE_PATH,
) -> Tuple[AsyncEngine, async_sessionmaker[AsyncSession]]:
"""Create and return the async engine and session maker."""
sqlalchemy_database_url = f"{db_driver}:///{database_path}"
async_engine = create_async_engine(sqlalchemy_database_url)
async_session_maker = async_sessionmaker(
Expand All @@ -26,18 +29,22 @@ def get_engine_and_session_maker(


async def create_db_and_tables(async_engine: AsyncEngine):
"""Create all database tables defined in models."""
async with async_engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)


async def shutdown(async_engine: AsyncEngine):
"""Dispose of the async engine."""
await async_engine.dispose()


async def get_async_session(
request: Request,
) -> AsyncGenerator[AsyncSession, None]:
# `request` and state populated by lifespan(app)
"""Provide an async database session for dependency injection."""

# `state` property of `request` gets populated by main.py::lifespan
async_session_maker = request.state.async_session_maker

async with async_session_maker() as session:
Expand Down
6 changes: 5 additions & 1 deletion src/lingua_loop/db/transcript.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""CRUD operations for transcripts."""

from typing import List

from sqlalchemy import select
Expand All @@ -19,6 +21,7 @@
async def read_or_create_transcript_with_segments(
video_id: str, language_code: SupportedLanguageCodes, session: AsyncSession
) -> Transcript:
"""Get or create a transcript with all segments for the given video."""
transcript = await _read_transcript_with_segments(
video_id=video_id, session=session
)
Expand Down Expand Up @@ -49,7 +52,7 @@ async def _read_transcript_with_segments(
async def _create_transcript(
video_id: str, language_code: SupportedLanguageCodes, session: AsyncSession
) -> Transcript:

"""Create a new transcript record with segments from YouTube."""
transcript_list = list_transcripts(video_id=video_id)
has_transcript = video_has_transcript_in_language(
transcript_list=transcript_list, language_code=language_code
Expand Down Expand Up @@ -78,6 +81,7 @@ async def _create_transcript(


def _get_segments(fetched_transcript: FetchedTranscript) -> List[Segment]:
"""Convert a FetchedTranscript to a list of Segment ORM objects."""
segments: List[Segment] = []
snippets = fetched_transcript.snippets
for snippet in snippets:
Expand Down
8 changes: 8 additions & 0 deletions src/lingua_loop/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
"""Custom exceptions for Lingua Loop."""

from fastapi import status
from fastapi.exceptions import HTTPException


class TranscriptNotFoundError(Exception):
"""Raised when a transcript is not found for a given video ID."""

def __init__(self, video_id: str):
"""Initialize the exception with the video_id."""
self.video_id = video_id
super().__init__(f"Transcript not found for video_id={video_id}")


class SegmentIndicesError(HTTPException):
"""Raised when invalid segment indices are provided."""

def __init__(self, segment_indices: list[int]):
"""Initialize the exception with the invalid segment_indices."""
status_code = status.HTTP_400_BAD_REQUEST
detail = f"Invalid segment indices, got {segment_indices}"
super().__init__(status_code=status_code, detail=detail)
6 changes: 6 additions & 0 deletions src/lingua_loop/integrations/youtube/types.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
"""YouTube integration type definitions."""

from enum import Enum
from typing import Dict


class SupportedLanguageCodes(str, Enum):
"""Supported language codes for YouTube transcripts."""

DUTCH = "nl"
ENGLISH = "en"
GERMAN = "de"
ITALIAN = "it"


class SupportedLanguages(str, Enum):
"""Human-readable language names."""

DUTCH = "Dutch"
ENGLISH = "English"
GERMAN = "German"
Expand Down
Loading
Loading