diff --git a/README.md b/README.md index a50d289..db2c0d5 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ Command mode: ```bash canvas-github-agent list-courses canvas-github-agent list-assignments --course-id 12345 +canvas-github-agent list-modules --course-id 12345 canvas-github-agent create-repo --course-id 12345 canvas-github-agent create-repo --course-id 12345 --assignment-id 67890 canvas-github-agent create-repo --course-id 12345 --language r @@ -86,6 +87,7 @@ canvas-github-agent create-repo --course-id 12345 --confirm-type canvas-github-agent ingest-pdf --course-id 12345 --file-path "docs/AAI6660_Spring_2026 (1).pdf" canvas-github-agent list-documents --course-id 12345 canvas-github-agent search-context --course-id 12345 --query "Bayes theorem posterior update" +canvas-github-agent search-modules --course-id 12345 --query "Bayes theorem posterior update" ``` ## API Endpoints @@ -94,6 +96,8 @@ canvas-github-agent search-context --course-id 12345 --query "Bayes theorem post - GET /capabilities - GET /courses - GET /courses/{course_id}/assignments +- GET /courses/{course_id}/modules +- POST /courses/{course_id}/modules/search - POST /courses/{course_id}/documents/ingest - GET /courses/{course_id}/documents - POST /courses/{course_id}/context/search @@ -106,7 +110,7 @@ The `/create` endpoint returns a stable `task_result_v1` payload with service, r The `/tasks` endpoints expose an asynchronous `task_status_v1` lifecycle with `queued`, `running`, `completed`, and `failed` states. -Course PDFs can be ingested with Docling and indexed into a local Chroma store. During assignment creation, the app will search that indexed course context and attach the most relevant excerpts to generated outputs. +Course PDFs can be ingested with Docling and indexed into a local Chroma store. During assignment creation, the app will search both indexed course documents and live Canvas module content, then attach the most relevant excerpts to generated outputs. ## MCP Server @@ -122,6 +126,8 @@ Primary MCP tools: - `list_courses` - `list_assignments` +- `list_course_modules` +- `search_course_modules` - `get_capabilities` - `get_oasf_record` - `ingest_course_document` @@ -150,11 +156,15 @@ Claude Desktop expects absolute paths. Update the template paths and env values, ## Course Context -The repository now supports a local Chroma-backed retrieval store for course reference material such as slide decks. +The repository now supports two deterministic course-context sources: -- Use Docling to parse the PDF into markdown-like text chunks +- live Canvas module content retrieved through the Canvas API +- a local Chroma-backed retrieval store for reference material such as slide decks + +- Read module pages, assignments, and discussion topics from Canvas and rank the most relevant excerpts against the assignment text +- Use Docling to parse PDFs into markdown-like text chunks - Store those chunks in Chroma with course-scoped metadata -- Retrieve relevant excerpts during assignment creation so generated repos and pages can reference the slide deck +- Retrieve relevant excerpts during assignment creation so generated repos and pages can reference both Canvas modules and uploaded course materials Local Chroma data is stored under `.chroma/` by default and is ignored by git. diff --git a/TODO.md b/TODO.md index 4640904..7934c76 100644 --- a/TODO.md +++ b/TODO.md @@ -1 +1,3 @@ # TODO + +- Fix the CLI argument validation in `app/agent.py` so `search-context --course-id 0` is accepted instead of being treated as missing. diff --git a/api.py b/api.py index 60f6ecf..913a762 100644 --- a/api.py +++ b/api.py @@ -73,6 +73,18 @@ def build_capabilities_payload() -> dict[str, Any]: "path": "/courses/{course_id}/assignments", "description": "List assignments for a Canvas course.", }, + { + "name": "list_course_modules", + "method": "GET", + "path": "/courses/{course_id}/modules", + "description": "List Canvas modules and module items for a course.", + }, + { + "name": "search_course_modules", + "method": "POST", + "path": "/courses/{course_id}/modules/search", + "description": "Search Canvas course module content for assignment-relevant context.", + }, { "name": "get_oasf_record", "method": "GET", @@ -148,6 +160,7 @@ def build_capabilities_payload() -> dict[str, Any]: "supported_languages": ["python", "r"], "course_context_backend": "chroma", "course_context_parser": "docling", + "course_context_sources": ["canvas_modules", "chroma_documents"], }, "result_schema": { "name": TASK_RESULT_SCHEMA, @@ -433,6 +446,39 @@ async def get_assignments(course_id: int): raise HTTPException(status_code=500, detail="Failed to fetch assignments.") +@app.get("/courses/{course_id}/modules") +async def get_modules(course_id: int): + """Return Canvas modules for a given course.""" + try: + canvas = CanvasTools() + modules = await canvas.get_course_modules(course_id) + return {"modules": modules} + except HTTPException: + raise + except Exception: + logger.exception("Failed to list modules for course_id=%s", course_id) + raise HTTPException(status_code=500, detail="Failed to fetch modules.") + + +@app.post("/courses/{course_id}/modules/search") +async def search_course_modules(course_id: int, req: CourseContextSearchRequest): + """Search Canvas course modules for assignment-relevant context.""" + try: + canvas = CanvasTools() + results = await canvas.search_course_module_context(course_id, req.query, req.limit) + return { + "course_id": course_id, + "query": req.query, + "limit": req.limit, + "results": results, + } + except HTTPException: + raise + except Exception: + logger.exception("Failed to search modules for course_id=%s", course_id) + raise HTTPException(status_code=500, detail="Failed to search course modules.") + + @app.post("/courses/{course_id}/documents/ingest") async def ingest_course_document(course_id: int, req: CourseDocumentIngestRequest): """Parse a local course PDF and index it into Chroma.""" diff --git a/app/__init__.py b/app/__init__.py index 73b67fd..111bca3 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -5,9 +5,11 @@ ingest_course_pdf, list_course_assignments, list_course_documents, + list_course_modules, list_courses, main, search_course_context, + search_course_modules, ) from .cli import interactive_mode, print_usage @@ -18,7 +20,9 @@ "list_courses", "list_course_assignments", "list_course_documents", + "list_course_modules", "main", "print_usage", "search_course_context", + "search_course_modules", ] diff --git a/app/agent.py b/app/agent.py index 5245a7d..ecaced7 100644 --- a/app/agent.py +++ b/app/agent.py @@ -136,6 +136,32 @@ def validate_notion_config(self) -> list[str]: """Return missing Notion environment variables required for writing flow.""" return get_missing_notion_config() + @staticmethod + def build_context_query(assignment: dict) -> str: + """Build a retrieval query from assignment name and description.""" + assignment_name = assignment.get("name", "").strip() + assignment_description = CanvasGitHubAgent.strip_html(assignment.get("description", "")) + return "\n\n".join(part for part in [assignment_name, assignment_description] if part).strip() + + @staticmethod + def merge_course_context_sources( + document_context: Sequence[dict], + module_context: Sequence[dict], + limit: int = 5, + ) -> list[dict[str, Any]]: + """Interleave document and module context so both sources can contribute.""" + merged: list[dict[str, Any]] = [] + document_items = list(document_context) + module_items = list(module_context) + + while len(merged) < max(limit, 1) and (document_items or module_items): + if document_items and len(merged) < limit: + merged.append(document_items.pop(0)) + if module_items and len(merged) < limit: + merged.append(module_items.pop(0)) + + return merged[: max(limit, 1)] + async def fetch_assignment(self, course_id: int, assignment_id: Optional[int] = None) -> dict: """Fetch assignment details from Canvas.""" if assignment_id: @@ -243,15 +269,16 @@ async def create_notion_page_for_assignment_with_mode( return {"page": page, "assignment": assignment, "course_context": list(course_context or [])} async def fetch_course_context(self, course_id: int, assignment: dict, limit: int = 5) -> list[dict[str, Any]]: - """Retrieve relevant course-document chunks for the given assignment.""" - assignment_name = assignment.get("name", "").strip() - assignment_description = self.strip_html(assignment.get("description", "")) - query = "\n\n".join(part for part in [assignment_name, assignment_description] if part).strip() + """Retrieve relevant course-document and module context for the given assignment.""" + query = self.build_context_query(assignment) if not query: return [] + document_context: list[dict[str, Any]] = [] + module_context: list[dict[str, Any]] = [] + try: - results = await asyncio.to_thread( + document_context = await asyncio.to_thread( self.course_context_tools.search_context, course_id, query, @@ -262,10 +289,27 @@ async def fetch_course_context(self, course_id: int, assignment: dict, limit: in return [] except Exception as error: print(f"\n⚠️ Course context search failed: {error}") - return [] + document_context = [] + + try: + module_context = await self.canvas_tools.search_course_module_context( + course_id, + query, + limit, + ) + except Exception as error: + print(f"\n⚠️ Course module search failed: {error}") + module_context = [] + + if document_context: + print(f"\n📎 Retrieved {len(document_context)} course document matches from your indexed course materials.") + if module_context: + print(f"\n📚 Retrieved {len(module_context)} course module matches from Canvas.") + + results = self.merge_course_context_sources(document_context, module_context, limit) if results: - print(f"\n📎 Retrieved {len(results)} course context matches from your course documents.") + print(f"\n🧩 Using {len(results)} combined course context matches to support this assignment.") return results async def run( @@ -411,6 +455,33 @@ async def list_course_documents(course_id: int): ) +async def list_course_modules(course_id: int): + """List Canvas modules available for a course.""" + tools = CanvasTools() + modules = await tools.get_course_modules(course_id) + + print(f"\n📚 Canvas modules for course {course_id}") + print("-" * 80) + for module in modules: + print(f"{module['name']} | items={len(module.get('items', []))}") + + +async def search_course_modules(course_id: int, query: str, limit: int = 5): + """Search Canvas module content for assignment-relevant context.""" + tools = CanvasTools() + results = await tools.search_course_module_context(course_id, query, limit) + + print(f"\n🔎 Retrieved {len(results)} module context matches") + print("-" * 80) + for index, item in enumerate(results, start=1): + print(f"{index}. {item.get('section_title') or item.get('module_name') or 'Match'}") + print(f" Source: {item.get('document_name', 'Canvas Module')}") + print(f" Type: {item.get('item_type', 'Unknown')}") + if item.get("distance") is not None: + print(f" Distance: {item['distance']:.4f}") + print(f" {item.get('text', '')[:400]}\n") + + async def list_courses(): """Helper function to list available courses.""" canvas_tools = CanvasTools() @@ -463,10 +534,12 @@ async def main(): choices=[ "list-courses", "list-assignments", + "list-modules", "create-repo", "ingest-pdf", "list-documents", "search-context", + "search-modules", ], help="Command to execute", ) @@ -506,6 +579,11 @@ async def main(): print("Error: --course-id is required for list-assignments") return await list_course_assignments(args.course_id) + elif args.command == "list-modules": + if args.course_id is None: + print("Error: --course-id is required for list-modules") + return + await list_course_modules(args.course_id) elif args.command == "create-repo": if not args.course_id: print("Error: --course-id is required for create-repo") @@ -534,6 +612,11 @@ async def main(): print("Error: --course-id and --query are required for search-context") return await search_course_context(args.course_id, args.query, args.limit) + elif args.command == "search-modules": + if args.course_id is None or not args.query: + print("Error: --course-id and --query are required for search-modules") + return + await search_course_modules(args.course_id, args.query, args.limit) def run() -> None: diff --git a/app/mcp_server.py b/app/mcp_server.py index d3f232b..834fac5 100644 --- a/app/mcp_server.py +++ b/app/mcp_server.py @@ -63,6 +63,27 @@ async def list_assignments(course_id: int) -> dict[str, Any]: _raise_tool_error(exc) +@server.tool(description="List Canvas modules and module items for a course.") +async def list_course_modules(course_id: int) -> dict[str, Any]: + """Return Canvas modules for a course.""" + try: + return await api.get_modules(course_id) + except HTTPException as exc: + _raise_tool_error(exc) + + +@server.tool(description="Search Canvas course module content for assignment-relevant context.") +async def search_course_modules(course_id: int, query: str, limit: int = 5) -> dict[str, Any]: + """Search Canvas module content for relevant context.""" + try: + return await api.search_course_modules( + course_id, + api.CourseContextSearchRequest(query=query, limit=limit), + ) + except HTTPException as exc: + _raise_tool_error(exc) + + @server.tool(description="Return the service capabilities payload used for discovery.") async def get_capabilities() -> dict[str, Any]: """Return service capabilities and transport metadata.""" diff --git a/metadata/agent-fact-cards/service.canvas-assignment-workflow.fact-card.json b/metadata/agent-fact-cards/service.canvas-assignment-workflow.fact-card.json index 04ad5a7..9338cbc 100644 --- a/metadata/agent-fact-cards/service.canvas-assignment-workflow.fact-card.json +++ b/metadata/agent-fact-cards/service.canvas-assignment-workflow.fact-card.json @@ -9,6 +9,8 @@ "list_assignments", "fetch_assignment", "infer_assignment_type", + "list_course_modules", + "search_course_modules", "ingest_course_document", "search_course_context", "create_github_repository", @@ -59,6 +61,10 @@ ], "course_context_backend": "chroma", "course_context_parser": "docling", + "course_context_sources": [ + "canvas_modules", + "chroma_documents" + ], "supported_course_document_formats": [ "pdf" ], diff --git a/metadata/oasf-records/service.canvas-assignment-workflow.record.json b/metadata/oasf-records/service.canvas-assignment-workflow.record.json index ca11b6a..ff1d7c1 100644 --- a/metadata/oasf-records/service.canvas-assignment-workflow.record.json +++ b/metadata/oasf-records/service.canvas-assignment-workflow.record.json @@ -53,6 +53,9 @@ "course_context_ingest_endpoint": "http://localhost:8000/courses/{course_id}/documents/ingest", "course_context_parser": "docling", "course_context_search_endpoint": "http://localhost:8000/courses/{course_id}/context/search", + "course_context_sources": "canvas_modules,chroma_documents", + "course_module_listing_endpoint": "http://localhost:8000/courses/{course_id}/modules", + "course_module_search_endpoint": "http://localhost:8000/courses/{course_id}/modules/search", "entrypoint": "app/agent.py", "health_endpoint": "http://localhost:8000/health", "invocation_mode": "supports synchronous request-response and asynchronous task polling", diff --git a/scaffolding/templates.py b/scaffolding/templates.py index 6979c0e..8ae190b 100644 --- a/scaffolding/templates.py +++ b/scaffolding/templates.py @@ -396,6 +396,8 @@ def build_service_fact_card() -> Dict[str, Any]: "list_assignments", "fetch_assignment", "infer_assignment_type", + "list_course_modules", + "search_course_modules", "ingest_course_document", "search_course_context", "create_github_repository", @@ -437,6 +439,7 @@ def build_service_fact_card() -> Dict[str, Any]: "supported_languages": ["python", "r"], "course_context_backend": "chroma", "course_context_parser": "docling", + "course_context_sources": ["canvas_modules", "chroma_documents"], "supported_course_document_formats": ["pdf"], "notebook_support": "python notebook scaffolding for assignments that explicitly require Jupyter notebook submission", }, @@ -509,6 +512,9 @@ def build_service_oasf_record(service_base_url: Optional[str] = None) -> Dict[st "course_context_ingest_endpoint": f"{resolved_service_base_url}/courses/{{course_id}}/documents/ingest", "course_context_parser": "docling", "course_context_search_endpoint": f"{resolved_service_base_url}/courses/{{course_id}}/context/search", + "course_context_sources": "canvas_modules,chroma_documents", + "course_module_listing_endpoint": f"{resolved_service_base_url}/courses/{{course_id}}/modules", + "course_module_search_endpoint": f"{resolved_service_base_url}/courses/{{course_id}}/modules/search", "entrypoint": "app/agent.py", "health_endpoint": f"{resolved_service_base_url}/health", "invocation_mode": "supports synchronous request-response and asynchronous task polling", diff --git a/tests/test_agent.py b/tests/test_agent.py index 10d2c9c..9fd9eb8 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -146,6 +146,7 @@ def test_build_service_fact_card_matches_checked_in_json(self): assert checked_in["metadata"]["architecture"] == "deterministic workflow orchestrator" assert checked_in["interoperability"]["mcp"] == "canvas-github-agent-mcp" assert checked_in["metadata"]["course_context_backend"] == "chroma" + assert "canvas_modules" in checked_in["metadata"]["course_context_sources"] def test_build_service_oasf_record_matches_checked_in_json(self): """The checked-in OASF record should match the generated payload.""" @@ -167,6 +168,7 @@ def test_build_service_oasf_record_matches_checked_in_json(self): assert checked_in["locators"][0]["type"] == "source_code" assert checked_in["locators"][1]["type"] == "url" assert checked_in["annotations"]["course_context_backend"] == "chroma" + assert checked_in["annotations"]["course_module_search_endpoint"] == "http://localhost:8000/courses/{course_id}/modules/search" assert checked_in["annotations"]["mcp_stdio_command"] == "canvas-github-agent-mcp" assert checked_in["annotations"]["task_submission_endpoint"] == "http://localhost:8000/tasks" assert checked_in["annotations"]["task_status_schema"] == "task_status_v1" @@ -521,6 +523,69 @@ def test_normalize_assignment_marks_completed_from_submission(self): assert assignment['workflow_state'] == 'submitted' assert assignment['submitted_at'] == '2026-03-18T11:00:00Z' + def test_get_course_modules_uses_direct_api(self): + """Module listing should use the direct Canvas REST path.""" + from tools.canvas_tools import CanvasTools + + with patch.dict('os.environ', { + 'CANVAS_API_URL': 'https://test.canvas.com', + 'CANVAS_API_TOKEN': 'test_token', + 'CANVAS_USE_MCP': 'true' + }): + tools = CanvasTools() + + with patch.object( + CanvasTools, + '_direct_get_course_modules', + return_value=[{'id': 9, 'name': 'Week 1', 'items': []}], + ) as direct_mock: + result = asyncio.run(tools.get_course_modules(123)) + + assert result == [{'id': 9, 'name': 'Week 1', 'items': []}] + direct_mock.assert_called_once_with(123) + + def test_search_course_module_context_ranks_relevant_items(self): + """Module context search should return only relevant ranked items.""" + from tools.canvas_tools import CanvasTools + + with patch.dict('os.environ', { + 'CANVAS_API_URL': 'https://test.canvas.com', + 'CANVAS_API_TOKEN': 'test_token', + }): + tools = CanvasTools() + modules = [ + { + 'id': 9, + 'name': 'Week 4', + 'items': [ + {'id': 11, 'title': 'Bayes page', 'type': 'Page'}, + {'id': 12, 'title': 'Decision trees', 'type': 'Page'}, + ], + } + ] + contexts = [ + { + 'document_name': 'Canvas Module: Week 4', + 'section_title': 'Bayes theorem', + 'text': 'Posterior update uses Bayes theorem.', + }, + { + 'document_name': 'Canvas Module: Week 4', + 'section_title': 'Decision trees', + 'text': 'Entropy and information gain.', + }, + ] + + with patch.object(CanvasTools, '_direct_get_course_modules', return_value=modules), patch.object( + CanvasTools, + '_module_item_to_context', + side_effect=contexts, + ): + result = asyncio.run(tools.search_course_module_context(123, 'posterior update', limit=2)) + + assert len(result) == 1 + assert result[0]['section_title'] == 'Bayes theorem' + class TestGitHubTools: """Test GitHub tools (mock tests).""" @@ -667,6 +732,51 @@ def test_run_passes_course_context_to_github_generation(self): ) assert result["course_context"] == context + def test_fetch_course_context_merges_document_and_module_sources(self): + from app.agent import CanvasGitHubAgent + + with patch.dict('os.environ', { + 'CANVAS_API_TOKEN': 'test_token', + 'GITHUB_TOKEN': 'test_gh_token', + 'GITHUB_USERNAME': 'testuser' + }): + agent = CanvasGitHubAgent() + assignment = { + "name": "Posterior Homework", + "description": "Implement Bayesian updating.", + } + document_context = [ + { + "document_name": "slides.pdf", + "section_title": "Bayes Review", + "text": "Posterior is proportional to prior times likelihood.", + }, + { + "document_name": "slides.pdf", + "section_title": "Law of Total Probability", + "text": "Marginalization over hypotheses.", + }, + ] + module_context = [ + { + "document_name": "Canvas Module: Week 4", + "section_title": "Bayes theorem page", + "text": "Module walkthrough of posterior updates.", + "item_type": "Page", + } + ] + + agent.course_context_tools.search_context = Mock(return_value=document_context) + agent.canvas_tools.search_course_module_context = AsyncMock(return_value=module_context) + + result = asyncio.run(agent.fetch_course_context(123, assignment, limit=3)) + + assert result == [ + document_context[0], + module_context[0], + document_context[1], + ] + def test_run_routes_writing_to_notion(self): """Run routes writing assignments to Notion page creation path.""" from app.agent import CanvasGitHubAgent diff --git a/tests/test_api.py b/tests/test_api.py index 901694b..195a127 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -19,6 +19,23 @@ async def list_courses(self): async def get_course_assignments(self, course_id): return [{"id": 42, "name": f"Assignment for {course_id}"}] + async def get_course_modules(self, course_id): + return [{"id": 7, "name": f"Module for {course_id}", "items": [{"id": 11, "title": "Bayes Review"}]}] + + async def search_course_module_context(self, course_id, query, limit): + return [ + { + "id": "module:123:7:11", + "course_id": course_id, + "document_name": "Canvas Module: Module for 123", + "module_name": f"Module for {course_id}", + "section_title": "Bayes Review", + "item_type": "Page", + "distance": 0.5, + "text": "Module: Bayes Review\n\nPosterior update explanation.", + } + ][:limit] + class StubAgentSuccess: async def run(self, **kwargs): @@ -64,6 +81,12 @@ async def list_courses(self): async def get_course_assignments(self, course_id): raise RuntimeError("sensitive assignment query failure") + async def get_course_modules(self, course_id): + raise RuntimeError("sensitive module listing failure") + + async def search_course_module_context(self, course_id, query, limit): + raise RuntimeError("sensitive module search failure") + class StubAgentError: async def run(self, **kwargs): @@ -338,6 +361,30 @@ def test_get_assignments_success(monkeypatch): } +def test_get_modules_success(monkeypatch): + monkeypatch.setattr(api, "CanvasTools", StubCanvasTools) + response = asyncio.run(_request("GET", "/courses/123/modules")) + + assert response.status_code == 200 + assert response.json()["modules"][0]["name"] == "Module for 123" + + +def test_search_course_modules_success(monkeypatch): + monkeypatch.setattr(api, "CanvasTools", StubCanvasTools) + response = asyncio.run( + _request( + "POST", + "/courses/123/modules/search", + {"query": "posterior update", "limit": 1}, + ) + ) + + assert response.status_code == 200 + payload = response.json() + assert payload["course_id"] == 123 + assert payload["results"][0]["section_title"] == "Bayes Review" + + def test_create_success(monkeypatch): monkeypatch.setattr(api, "CanvasGitHubAgent", StubAgentSuccess) response = asyncio.run( @@ -412,6 +459,28 @@ def test_get_assignments_sanitizes_internal_errors(monkeypatch): assert response.json()["detail"] == "Failed to fetch assignments." +def test_get_modules_sanitizes_internal_errors(monkeypatch): + monkeypatch.setattr(api, "CanvasTools", StubCanvasToolsError) + response = asyncio.run(_request("GET", "/courses/123/modules")) + + assert response.status_code == 500 + assert response.json()["detail"] == "Failed to fetch modules." + + +def test_search_course_modules_sanitizes_internal_errors(monkeypatch): + monkeypatch.setattr(api, "CanvasTools", StubCanvasToolsError) + response = asyncio.run( + _request( + "POST", + "/courses/123/modules/search", + {"query": "posterior update", "limit": 1}, + ) + ) + + assert response.status_code == 500 + assert response.json()["detail"] == "Failed to search course modules." + + def test_create_sanitizes_internal_errors(monkeypatch): monkeypatch.setattr(api, "CanvasGitHubAgent", StubAgentError) response = asyncio.run(_request("POST", "/create", {"course_id": 123, "language": "python"})) diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 77ca673..771909f 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -28,6 +28,21 @@ async def list_courses(self): async def get_course_assignments(self, course_id): return [{"id": 42, "name": f"Assignment for {course_id}"}] + async def get_course_modules(self, course_id): + return [{"id": 7, "name": f"Module for {course_id}", "items": [{"id": 11, "title": "Bayes Review"}]}] + + async def search_course_module_context(self, course_id, query, limit): + return [ + { + "id": "module:123:7:11", + "course_id": course_id, + "document_name": "Canvas Module: Module for 123", + "section_title": "Bayes Review", + "item_type": "Page", + "text": "Module: Bayes Review\n\nPosterior update explanation.", + } + ][:limit] + class StubAgentSuccess: async def run(self, **kwargs): @@ -84,6 +99,8 @@ async def _exercise_registry(): assert [tool.name for tool in tools] == [ "list_courses", "list_assignments", + "list_course_modules", + "search_course_modules", "get_capabilities", "get_oasf_record", "ingest_course_document", @@ -109,6 +126,29 @@ def test_list_courses_tool(monkeypatch): } +def test_list_course_modules_tool(monkeypatch): + monkeypatch.setattr(api, "CanvasTools", StubCanvasTools) + + result = asyncio.run(mcp_server.server.call_tool("list_course_modules", {"course_id": 123})) + + payload = _decode_tool_payload(result) + assert payload["modules"][0]["name"] == "Module for 123" + + +def test_search_course_modules_tool(monkeypatch): + monkeypatch.setattr(api, "CanvasTools", StubCanvasTools) + + result = asyncio.run( + mcp_server.server.call_tool( + "search_course_modules", + {"course_id": 123, "query": "posterior update", "limit": 1}, + ) + ) + + payload = _decode_tool_payload(result) + assert payload["results"][0]["section_title"] == "Bayes Review" + + def test_create_destination_tool(monkeypatch): monkeypatch.setattr(api, "CanvasGitHubAgent", StubAgentSuccess) diff --git a/tools/canvas_tools.py b/tools/canvas_tools.py index 238c788..c937000 100644 --- a/tools/canvas_tools.py +++ b/tools/canvas_tools.py @@ -4,12 +4,15 @@ """ import os import json +import re import requests from typing import Dict, List, Optional, Any from mcp import ClientSession, StdioServerParameters from mcp.client.stdio import stdio_client from contextlib import asynccontextmanager +from scaffolding.templates import html_to_markdown + class CanvasTools: """Tools for interacting with Canvas LMS via the Smithery Canvas MCP server.""" @@ -69,6 +72,202 @@ def _direct_get_course_assignments(self, course_id: int) -> List[Dict[str, Any]] response.raise_for_status() assignments = response.json() return [self._normalize_assignment(assignment) for assignment in assignments] + + def _normalize_module_item(self, item: Dict[str, Any]) -> Dict[str, Any]: + return { + "id": item.get("id"), + "title": item.get("title") or item.get("page_url") or item.get("type", "Module Item"), + "type": item.get("type", "Unknown"), + "content_id": item.get("content_id"), + "page_url": item.get("page_url"), + "html_url": item.get("html_url"), + "url": item.get("url"), + "external_url": item.get("external_url"), + } + + def _normalize_module(self, module: Dict[str, Any]) -> Dict[str, Any]: + return { + "id": module.get("id"), + "name": module.get("name") or f"Module {module.get('id', '')}".strip(), + "position": module.get("position"), + "items": [self._normalize_module_item(item) for item in module.get("items", [])], + } + + def _direct_get_course_modules(self, course_id: int) -> List[Dict[str, Any]]: + response = requests.get( + f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/modules", + headers=self._canvas_headers(), + params=[("per_page", 100), ("include[]", "items")], + timeout=30, + ) + response.raise_for_status() + modules = response.json() + return [self._normalize_module(module) for module in modules] + + def _direct_get_assignment(self, course_id: int, assignment_id: int) -> Dict[str, Any]: + response = requests.get( + f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/assignments/{assignment_id}", + headers=self._canvas_headers(), + timeout=30, + ) + response.raise_for_status() + return self._normalize_assignment(response.json()) + + def _direct_get_page(self, course_id: int, page_url: str) -> Dict[str, Any]: + response = requests.get( + f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/pages/{page_url}", + headers=self._canvas_headers(), + timeout=30, + ) + response.raise_for_status() + return response.json() + + def _direct_get_discussion_topic(self, course_id: int, topic_id: int) -> Dict[str, Any]: + response = requests.get( + f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/discussion_topics/{topic_id}", + headers=self._canvas_headers(), + timeout=30, + ) + response.raise_for_status() + return response.json() + + @staticmethod + def _query_terms(query: str) -> list[str]: + stopwords = { + "about", "after", "again", "assignment", "before", "being", "from", "have", + "into", "just", "more", "that", "than", "their", "them", "then", "there", + "these", "this", "through", "using", "with", "write", "your", + } + terms = [term for term in re.findall(r"[a-z0-9]{3,}", query.lower()) if term not in stopwords] + return list(dict.fromkeys(terms)) + + def _build_module_context_entry( + self, + *, + course_id: int, + module: Dict[str, Any], + item: Dict[str, Any], + text: str, + ) -> Dict[str, Any]: + section_title = item.get("title") or item.get("page_url") or item.get("type", "Module Item") + document_name = f"Canvas Module: {module.get('name', 'Course Module')}" + return { + "id": f"module:{course_id}:{module.get('id')}:{item.get('id')}", + "course_id": course_id, + "document_id": f"module-{module.get('id')}", + "document_name": document_name, + "module_id": module.get("id"), + "module_name": module.get("name"), + "item_id": item.get("id"), + "item_type": item.get("type"), + "section_title": section_title, + "source_kind": "canvas_module", + "source_path": item.get("html_url") or item.get("external_url") or item.get("url"), + "text": text.strip(), + } + + def _module_item_to_context( + self, + course_id: int, + module: Dict[str, Any], + item: Dict[str, Any], + ) -> Optional[Dict[str, Any]]: + title = item.get("title") or item.get("page_url") or item.get("type", "Module Item") + item_type = item.get("type", "Unknown") + text_parts = [ + f"Module: {module.get('name', 'Course Module')}", + f"Item: {title}", + f"Type: {item_type}", + ] + + if item_type == "Page" and item.get("page_url"): + page = self._direct_get_page(course_id, item["page_url"]) + title = page.get("title") or title + body = html_to_markdown(page.get("body", "")) + if body: + text_parts.append(body) + elif item_type == "Assignment" and item.get("content_id"): + assignment = self._direct_get_assignment(course_id, int(item["content_id"])) + title = assignment.get("name") or title + description = html_to_markdown(assignment.get("description", "")) + if description: + text_parts.append(description) + if assignment.get("due_at"): + text_parts.append(f"Due: {assignment['due_at']}") + elif item_type == "Discussion" and item.get("content_id"): + discussion = self._direct_get_discussion_topic(course_id, int(item["content_id"])) + title = discussion.get("title") or title + message = html_to_markdown(discussion.get("message", "")) + if message: + text_parts.append(message) + elif item_type == "ExternalUrl" and item.get("external_url"): + text_parts.append(f"External URL: {item['external_url']}") + elif item_type == "File": + file_url = item.get("html_url") or item.get("url") + if file_url: + text_parts.append(f"File URL: {file_url}") + + item_with_title = dict(item) + item_with_title["title"] = title + text = "\n\n".join(part.strip() for part in text_parts if part and part.strip()) + if not text.strip(): + return None + return self._build_module_context_entry(course_id=course_id, module=module, item=item_with_title, text=text) + + def _score_module_context(self, query_terms: list[str], entry: Dict[str, Any]) -> int: + haystack = " ".join( + [ + entry.get("document_name", ""), + entry.get("section_title", ""), + entry.get("text", ""), + ] + ).lower() + return sum(1 for term in query_terms if term in haystack) + + def _direct_search_course_module_context( + self, + course_id: int, + query: str, + limit: int = 5, + ) -> List[Dict[str, Any]]: + query_terms = self._query_terms(query) + if not query_terms: + return [] + + modules = self._direct_get_course_modules(course_id) + scored: list[tuple[int, Dict[str, Any]]] = [] + + for module in modules: + for item in module.get("items", []): + try: + entry = self._module_item_to_context(course_id, module, item) + except requests.RequestException: + entry = self._build_module_context_entry( + course_id=course_id, + module=module, + item=item, + text="\n\n".join( + part for part in [ + f"Module: {module.get('name', 'Course Module')}", + f"Item: {item.get('title') or item.get('type', 'Module Item')}", + f"Type: {item.get('type', 'Unknown')}", + ] + if part + ), + ) + + if not entry: + continue + + score = self._score_module_context(query_terms, entry) + if score <= 0: + continue + + entry["distance"] = round(1 / score, 4) + scored.append((score, entry)) + + scored.sort(key=lambda pair: (-pair[0], pair[1].get("document_name", ""), pair[1].get("section_title", ""))) + return [entry for _, entry in scored[: max(limit, 1)]] @asynccontextmanager async def get_canvas_session(self): @@ -176,3 +375,16 @@ async def get_assignment_details(self, course_id: int, assignment_id: int) -> Op if str(assignment.get("id")) == str(assignment_id): return assignment return None + + async def get_course_modules(self, course_id: int) -> List[Dict[str, Any]]: + """Return Canvas modules and normalized module items for a course.""" + return self._direct_get_course_modules(course_id) + + async def search_course_module_context( + self, + course_id: int, + query: str, + limit: int = 5, + ) -> List[Dict[str, Any]]: + """Search Canvas course module content for assignment-relevant context.""" + return self._direct_search_course_module_context(course_id, query, limit)