Open-Source-Legal
diff --git a/‎CHANGELOG.md‎
Lines changed: 18 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎docs/assets/images/screenshots/auto/corpus--agent-management--agent-row.png‎
55.3 KB b/‎docs/assets/images/screenshots/auto/corpus--agent-management--agent-row.png‎
55.3 KB
diff --git a/‎docs/assets/images/screenshots/auto/corpus--chat--conversation-list.png‎
2.36 KB b/‎docs/assets/images/screenshots/auto/corpus--chat--conversation-list.png‎
2.36 KB
diff --git a/‎docs/assets/images/screenshots/auto/corpus--chat--empty.png‎
3.59 KB b/‎docs/assets/images/screenshots/auto/corpus--chat--empty.png‎
3.59 KB
diff --git a/‎docs/assets/images/screenshots/auto/corpus--chat--new-chat.png‎
-114 Bytes b/‎docs/assets/images/screenshots/auto/corpus--chat--new-chat.png‎
-114 Bytes
diff --git a/‎docs/assets/images/screenshots/auto/corpus--create-action-modal--fieldset-default.png‎
48.1 KB b/‎docs/assets/images/screenshots/auto/corpus--create-action-modal--fieldset-default.png‎
48.1 KB
diff --git a/‎docs/assets/images/screenshots/auto/corpus--description-editor--loaded.png‎
53.5 KB b/‎docs/assets/images/screenshots/auto/corpus--description-editor--loaded.png‎
53.5 KB
diff --git a/‎frontend/package.json‎
Lines changed: 1 addition & 1 deletion b/‎frontend/package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎frontend/src/assets/configurations/constants.ts‎
Lines changed: 15 additions & 0 deletions b/‎frontend/src/assets/configurations/constants.ts‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎frontend/src/components/corpuses/CamlArticleEditor.tsx‎
Lines changed: 40 additions & 17 deletions b/‎frontend/src/components/corpuses/CamlArticleEditor.tsx‎
Lines changed: 40 additions & 17 deletions
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- **Mypy: graduated `opencontractserver/users/tasks.py` out of the baseline** (Issue #1333 follow-up): `tasks.py` was the last `opencontractserver.users` module still suppressed in `mypy.ini`. PR #1370 left it untyped because the file is only loaded when `settings.USE_AUTH0=True`, so it never failed at runtime under the test settings; the typing gap kept the package short of the issue's "all four packages at ≥80% return-annotation coverage" Done-When criterion. Added return + parameter annotations to all five Auth0 sync tasks (`get_new_auth0_token`, `apply_data_to_user`, `sync_remote_user`, `ensure_valid_auth0_token`, `get_user_details_async`), introduced a module-level docstring documenting the `USE_AUTH0` gating, and removed the `[mypy-opencontractserver.users.tasks] ignore_errors = True` section. Local `data` rebound from request body (`dict[str, str]`) to response payload (`dict[str, Any]`) was split into two distinctly-named variables (`request_data` / `payload`) so the types are unambiguous; behavior is unchanged. No callers needed updating — `config/graphql_auth0_auth/utils.py` still consumes `sync_remote_user.delay(...)` exactly as before.
+
+### Fixed
+
+- **`test_superuser_sees_all_queryset` miscounts personal corpuses by 1** (Issue #1394, `opencontractserver/tests/test_visibility_managers.py`, `opencontractserver/tests/test_resolvers.py`): Two `VisibleToUserTests.test_superuser_sees_all_queryset` cases asserted that `Corpus.objects.visible_to_user(superuser).count() == 4` (public + private + 2 personal), but the actual count is 5 because the test DB starts with a pre-existing personal corpus owned by django-guardian's `AnonymousUser` (created during fixture setup before/around the username-based skip in `opencontractserver/users/signals.py::user_created_signal`). The assertion is now scoped to corpuses created by the test's two users (`creator__in=[self.user, self.superuser]`), making it resilient to any fixture-level corpuses that exist at test DB init time. Production code is unchanged.
+- **Merged `frontend` Codecov flag drops to ~33% on every commit where Frontend CI's CT job fails** (`frontend/package.json` `test:coverage:ct`): the script chained `playwright test ... && mkdir -p ... && nyc report ...`, so a failing CT run short-circuited before `nyc report` could turn the per-test JSON files in `.nyc_output` into an `lcov.info`. The downstream `Upload CT Coverage to Codecov` step (`if: success() || failure()`) then errored with "No coverage reports found" and `frontend-component` did not upload for that SHA. Codecov's server-side aggregation of the `frontend` flag was left with only `frontend-unit` (~23%) and `frontend-e2e` (~24%), pulling the merged number down to ~33% even though the previous commit was at ~67% — observed on six consecutive main commits 2026-04-26T01:02..02:58Z (`2d7033f8`..`be5bcfc8`) before recovering on `30298391`. Mirrored the existing `test:e2e:coverage` pattern (`; CT_EXIT=$?; nyc report ... || echo "No coverage data to report"; exit $CT_EXIT`) so `nyc report` runs regardless of test outcome and the lcov ships even on red CT runs. `frontend-component` will still report a slightly lower number when tests fail (failed tests register fewer hits), but it will report — keeping the merged `frontend` flag's denominator stable.
+- **`User.__init__` shared-state mutation re-introduced by branch merge** (`opencontractserver/users/models.py:172-180` removed): PR #1374 (commit `50ed6740`) deleted the `User.__init__` override that mutated `Field.validators[0]` on every instantiation, but a subsequent merge (`b68c1cb4 → 6d2cddbf`) resurrected the override along with its mypy-narrowing changes. The current main on commit `6d2cddbf` therefore reproduced the original `#1358` bug: `User(...)` rebound `username_field.validators[0]` and clobbered any third-party validator prepended to the list. Removed the `__init__` override entirely; the class-body declaration `validators=[UserUnicodeUsernameValidator()]` on the `username` field (still present from PR #1374) is the canonical and only declaration. Also dropped the now-unused `Field` import. Regression coverage from PR #1374 (`opencontractserver/tests/test_user_username_validator.py`) was already on main and is what surfaced the regression in CI.
+
 ### Security
 
 - **Cross-corpus structural-annotation leak in `CoreAnnotationVectorStore`** (`opencontractserver/llms/vector_stores/core_vector_stores.py:296-326,371-413`): The corpus-wide retrieval path (`corpus_id` set, `document_id=None`) returned every structural annotation in the database regardless of corpus. Two collaborating defects caused the leak:
@@ -36,6 +46,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - **Known remaining bugs surfaced by mypy** (filed as separate issues per the scope rules of #1332):
     - #1359 — `RemoveLabelsFromLabelsetMutation` calls non-existent `labelset.documents`. Silent runtime failure (swallowed by a broad `except Exception`). Blocks `config.graphql.label_mutations` graduation; one-line fix + test needed.
     - #1360 — `DRFMutation.IOSettings` declares `model: django.db.models.Model = None` and `serializer = None`. Non-trivial refactor of the base mutation class; blocks `config.graphql.base` graduation.
+- **Coverage: raise Corpus Chat & Agent Management component tests** (Issue #1276): added 36 new Playwright CT tests across the four lowest-ROI corpus components to drive coverage toward the ≥60% target. Breakdown:
+  - `frontend/tests/CorpusChat.ct.tsx` (+13 tests): `initialQuery` auto-send, tool-call timeline entries (ASYNC_THOUGHT), ASYNC_SOURCES merge, SYNC_CONTENT rendering, ASYNC_RESUME, ask_document sub-tool approval remapping, unknown-type default branch, back-to-list navigation, server-message-with-sources rendering, title-filter debounce, and additional navigation-header coverage. Extended the shared `StubSocket` in `beforeEach` with new query-triggered frame sequences.
+  - `frontend/tests/CreateCorpusActionModal.ct.tsx` (+8 tests): analyzer-path validation, inline-agent validation (empty name / empty instructions), existing-agent-selection validation, successful inline-agent mutation, backend error toast, analyzer edit-mode pre-population, and legacy trigger-casing normalization fallback.
+  - `frontend/tests/CorpusAgentManagement.ct.tsx` (+8 tests): query loading state, query error state, multi-tool badge overflow, inactive-status badge, update-mutation happy path, create-mutation backend-error toast, tool deselection, and edit-modal cancel.
+  - `frontend/tests/CorpusDescriptionEditor.ct.tsx` (+7 tests): save failure (`ok: false`), save network-error path, reapply of snapshot-less version, twice-click collapse, Cancel Version Edit reset, fetch-md URL failure, and version-count pluralization.
+  - **Follow-up review polish**: moved `DEFAULT_DOCUMENT_AGENT_INSTRUCTIONS` from `CreateCorpusActionModal.tsx` into `frontend/src/assets/configurations/constants.ts` so both default-instruction strings (moderator + document agent) live in the single constants module per the project's no-magic-strings rule.
 - **Return-type annotations across core models and import/export pipeline** (Issue #1334, follow-up to #1331): The mypy gate wired in by #1331 recorded a 7208-error baseline frozen across 357 files. This PR pays down the annotation deficit on the core domain models and the bulk import/export tasks without touching runtime behavior or adding validators. Coverage jumped from the pre-issue numbers to:
   - `opencontractserver/corpuses/` 61.5% → 88.4% (target ≥80%)
   - `opencontractserver/annotations/` 48.1% → 93.8% (target ≥80%)
@@ -86,6 +102,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **`CorpusChat` dropped `SYNC_CONTENT` messages from the visible chat** (Issue #1276, `frontend/src/components/corpuses/CorpusChat.tsx:468-505`): The `SYNC_CONTENT` WebSocket frame is a standalone, non-streaming assistant reply used for synchronous server responses. `ChatTray` (document chat) appends these directly to its `chat` state; the corpus-level chat only forwarded the content to `handleCompleteMessage`, which stores sources in `ChatSourceAtom` but never pushes a message to the visible list. As a result, any `SYNC_CONTENT` the backend sent over the corpus socket rendered nothing. Fixed by mirroring the `ChatTray` pattern — push a new complete assistant message into `chat` before persisting sources/timeline. The fallback `crypto.randomUUID()` is also now captured in a single local variable so the visible chat entry and the `ChatSourceAtom` record share the same id when the server omits `message_id`. New regression test in `frontend/tests/CorpusChat.ct.tsx` ("SYNC_CONTENT renders a complete message immediately") pins the behavior.
+- **CAML article preview crashed when inserting an extract grid embed** (`frontend/src/utils/camlComponents.ts`, `frontend/src/hooks/useCamlComponentRenderer.tsx`, `frontend/src/components/corpuses/CamlArticleEditor.tsx`, `frontend/src/components/corpuses/caml/CamlDirectiveRenderer.tsx`): the editor wrapped each newly-inserted `[component:TYPE ...]` marker in a `::: prose` fence, but `@os-legal/caml`'s parser has no `case "prose"` in `parseBlock`, so the resulting block carried `body` instead of `content`. `ProseBlock` then crashed inside `splitPullquotes(undefined)`, which unmounted the entire editor modal and made the "ArrowDown then Enter inserts the extract-grid component marker" CT test fail. Switched the fence to a project-specific `::: oc-component` block and routed it through `CamlArticle`'s `customBlocks` slot, where the marker text is handed back to the existing `[component:...]` resolver. The keyboard handler in `CamlArticleEditor` was also tightened to read the active picker index from a `useRef` mirror so back-to-back ArrowDown/Enter keystrokes don't observe a stale closure value of `-1` and bail out before insertion.
 - **PR #1177 follow-up: CAML extract embed polish** (Issue #1227):
   - **`fullDatacellList` payload now bounded server-side**: `ExtractType.full_datacell_list` accepts optional `limit` / `offset` arguments and the resolver clamps `limit` to `MAX_FULL_DATACELL_LIST_LIMIT` (`opencontractserver/constants/extracts.py`, currently `500`) after permission filtering (`config/graphql/extract_types.py`). `GET_EXTRACT_GRID_EMBED` passes `limit: EXTRACT_GRID_EMBED_CELL_LIMIT` (mirrored at `500` in `frontend/src/assets/configurations/constants.ts`) so pathological extracts no longer transmit thousands of cells just to trigger the too-many-rows guard (`frontend/src/graphql/queries.ts`, `frontend/src/components/extracts/ExtractGridEmbed.tsx`). Full server-side pagination is still tracked in #1204.
   - **`resolveComponentMarker` now receives a stable React key from both call sites**: `useCamlComponentRenderer` and `CamlDirectiveRenderer` pass the marker string as the `key` argument so multiple `[component:...]` blocks in a single article reconcile correctly without React's "missing key prop" warnings (`frontend/src/hooks/useCamlComponentRenderer.tsx`, `frontend/src/components/corpuses/caml/CamlDirectiveRenderer.tsx`). Added regression tests in `frontend/src/utils/__tests__/camlComponents.test.ts`.
 
@@ -100,7 +100,7 @@
     "test:e2e": "playwright test",
     "test:e2e:coverage": "COVERAGE=true playwright test; E2E_EXIT=$?; mkdir -p coverage/e2e/.nyc_output && nyc report --all --reporter=lcov --reporter=text --report-dir=coverage/e2e --temp-dir=coverage/e2e/.nyc_output || echo 'No coverage data to report'; exit $E2E_EXIT",
     "test:coverage:unit": "vitest run --coverage --watch=false",
-    "test:coverage:ct": "COVERAGE=true playwright test -c playwright-ct.config.ts --reporter=list && mkdir -p coverage/ct/.nyc_output && nyc report --all --reporter=lcov --reporter=text --report-dir=coverage/ct --temp-dir=coverage/ct/.nyc_output",
+    "test:coverage:ct": "COVERAGE=true playwright test -c playwright-ct.config.ts --reporter=list; CT_EXIT=$?; mkdir -p coverage/ct/.nyc_output && nyc report --all --reporter=lcov --reporter=text --report-dir=coverage/ct --temp-dir=coverage/ct/.nyc_output || echo 'No coverage data to report'; exit $CT_EXIT",
     "lint": "prettier . --check --ignore-unknown",
     "fix-styles": "prettier . --check --write --ignore-unknown",
     "prepare": "cd .. && husky install frontend/.husky",
 
@@ -529,3 +529,18 @@ export const TRIGGER_LABELS: Record<string, string> = {
   new_thread: "On Thread",
   new_message: "On Message",
 } as const;
+
+// Default agent task instructions used when creating a thread-moderation
+// CorpusAction (rendered as the placeholder/initial value in the modal).
+export const DEFAULT_MODERATOR_INSTRUCTIONS = `You are a thread moderator for this corpus. Your role is to:
+1. Monitor discussion threads and messages for policy compliance
+2. Take appropriate moderation actions when needed
+3. Respond helpfully to user questions when appropriate
+
+You have access to thread context, messages, and moderation tools. Use them judiciously.`;
+
+// Default agent task instructions used when creating a document-processing
+// CorpusAction (rendered as the initial value when an add/edit document
+// trigger is selected).
+export const DEFAULT_DOCUMENT_AGENT_INSTRUCTIONS =
+  "You are a document processing agent for this corpus.";
@@ -373,6 +373,11 @@ export const CamlArticleEditor: React.FC<CamlArticleEditorProps> = ({
   // Index of the keyboard-focused option within the extract picker dropdown.
   // `-1` means no option is focused (initial state when the dropdown opens).
   const [activeExtractIndex, setActiveExtractIndex] = useState<number>(-1);
+  // Mirror of `activeExtractIndex` in a ref so that the keyboard handler can
+  // read the latest value when consecutive keystrokes (e.g. ArrowDown then
+  // Enter) arrive faster than React schedules a re-render. Without this, the
+  // Enter handler would observe a stale closure value of `-1` and bail out.
+  const activeExtractIndexRef = useRef<number>(-1);
   const textareaRef = useRef<HTMLTextAreaElement>(null);
   const extractPickerRef = useRef<HTMLDivElement>(null);
   const extractPickerTriggerRef = useRef<HTMLButtonElement>(null);
@@ -545,9 +550,11 @@ export const CamlArticleEditor: React.FC<CamlArticleEditorProps> = ({
   }, [showExtractPicker]);
 
   // Reset the keyboard-focused option whenever the picker closes so the next
-  // open starts in a clean state.
+  // open starts in a clean state. The ref mirror is reset alongside state so
+  // the next ArrowDown reads the fresh `-1` value synchronously.
   useEffect(() => {
     if (!showExtractPicker) {
+      activeExtractIndexRef.current = -1;
       setActiveExtractIndex(-1);
     }
   }, [showExtractPicker]);
@@ -600,43 +607,57 @@ export const CamlArticleEditor: React.FC<CamlArticleEditorProps> = ({
       if (!showExtractPicker) return;
       const count = corpusExtracts.length;
 
+      // Helper: update both state (drives render) and ref (drives next
+      // synchronous keystroke). The ref read in the Enter case below would
+      // otherwise observe a stale `-1` if Enter arrives in the same tick as
+      // a preceding ArrowDown.
+      const updateActiveIndex = (next: number) => {
+        activeExtractIndexRef.current = next;
+        setActiveExtractIndex(next);
+      };
+
       switch (event.key) {
         case "Escape":
           event.preventDefault();
           setShowExtractPicker(false);
           extractPickerTriggerRef.current?.focus();
           break;
-        case "ArrowDown":
+        case "ArrowDown": {
           if (count === 0) return;
           event.preventDefault();
-          setActiveExtractIndex((prev) => (prev + 1 >= count ? 0 : prev + 1));
+          const prev = activeExtractIndexRef.current;
+          updateActiveIndex(prev + 1 >= count ? 0 : prev + 1);
           break;
-        case "ArrowUp":
+        }
+        case "ArrowUp": {
           if (count === 0) return;
           event.preventDefault();
           // `prev <= 0` covers both `0` (first item → wrap to last) and `-1`
           // (no item focused → jump to last). This is intentional per WAI-ARIA
           // Authoring Practices for listbox keyboard interaction.
-          setActiveExtractIndex((prev) => (prev <= 0 ? count - 1 : prev - 1));
+          const prev = activeExtractIndexRef.current;
+          updateActiveIndex(prev <= 0 ? count - 1 : prev - 1);
           break;
+        }
         case "Home":
           if (count === 0) return;
           event.preventDefault();
-          setActiveExtractIndex(0);
+          updateActiveIndex(0);
           break;
         case "End":
           if (count === 0) return;
           event.preventDefault();
-          setActiveExtractIndex(count - 1);
+          updateActiveIndex(count - 1);
           break;
         case "Enter": {
           if (count === 0) return;
           // Only act when a menu option is focused — otherwise let the
           // default button behaviour on the trigger toggle the picker.
-          if (activeExtractIndex < 0 || activeExtractIndex >= count) return;
+          const current = activeExtractIndexRef.current;
+          if (current < 0 || current >= count) return;
           event.preventDefault();
           event.stopPropagation();
-          const selected = corpusExtracts[activeExtractIndex];
+          const selected = corpusExtracts[current];
           if (selected) {
             // handleInsertComponent already calls setShowExtractPicker(false)
             // internally, so the picker is closed as part of the insertion.
@@ -649,16 +670,14 @@ export const CamlArticleEditor: React.FC<CamlArticleEditorProps> = ({
           break;
       }
     },
-    [
-      showExtractPicker,
-      corpusExtracts,
-      activeExtractIndex,
-      handleInsertComponent,
-    ]
+    [showExtractPicker, corpusExtracts, handleInsertComponent]
   );
 
   // Markdown renderer with generic component marker interception
-  const renderMarkdownPreview = useCamlComponentRenderer(CAML_COMPONENTS);
+  const {
+    renderMarkdown: renderMarkdownPreview,
+    customBlocks: previewCustomBlocks,
+  } = useCamlComponentRenderer(CAML_COMPONENTS);
 
   const handleClose = () => {
     if (hasChanges) {
@@ -744,7 +763,10 @@ export const CamlArticleEditor: React.FC<CamlArticleEditorProps> = ({
                           key={ext.id}
                           $active={index === activeExtractIndex}
                           aria-selected={false}
-                          onMouseEnter={() => setActiveExtractIndex(index)}
+                          onMouseEnter={() => {
+                            activeExtractIndexRef.current = index;
+                            setActiveExtractIndex(index);
+                          }}
                           onClick={() =>
                             handleInsertComponent("extract-grid", {
                               extractId: ext.id,
@@ -778,6 +800,7 @@ export const CamlArticleEditor: React.FC<CamlArticleEditorProps> = ({
                 <CamlArticle
                   document={parsedDocument}
                   renderMarkdown={renderMarkdownPreview}
+                  customBlocks={previewCustomBlocks}
                 />
               </CamlThemeProvider>
             )}