From a8a964e572b1a3013df5b345d5b7ed108a1acda1 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Sun, 3 May 2026 16:40:17 +0000 Subject: [PATCH] fix(ui): render document content + entity graph; nil-safe embedder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes for issues surfaced when running through the indexed-corpus flow end-to-end. Backend - Add `GET /api/documents/{id}/chunks` returning the ordered chunks for a document (id, chunk_index, content, token_count). The store already exposes `ListChunksByDoc`; this is a thin handler around it. - Add `GET /api/graph?project=...&type=...&limit=...` returning the full entity graph as `{nodes, edges}` matching the UI's existing RawGraphResponse shape, so the same GraphCanvas can render it without a parallel transform. - Skip the embedding phase in `pipeline.indexFile` when `p.embedder` is nil (provider=none / graph-only flow). Previously every CLI index run with `DOCSIQ_LLM_PROVIDER=none` panicked with a nil-pointer in `(*Embedder).EmbedTexts`, contradicting the CLAUDE.md guarantee that the embedder is nil-safe in this mode. Chunks are still persisted; downstream extraction works off raw text rather than vectors. Frontend - New `useDocChunks` hook fetching the chunks endpoint above. - `DocumentView` now renders chunk content. For markdown documents we pipe through `markdown-it` (already a dep) for HTML; otherwise we fall back to a `
` of the raw text. The previous view rendered
  only `title + doc_type · v`, leaving the document body blank — the
  symptom users were hitting after `docsiq index`.
- New `useEntityGraph` hook fetching `/api/graph` and adapting it to
  the existing `GraphData` interface.
- `Graph.tsx` now loads both the entity graph (from the indexing
  pipeline) and the notes graph (wikilinks between authored notes),
  defaults to the entity graph when it has nodes, and exposes a small
  toggle so users can flip between the two. The previous route was
  hard-wired to `useNotesGraph` only, so an indexed corpus with no
  hand-authored notes always showed the empty state.

Adjacent issues surfaced but **not** fixed here (separate PRs):
- `./docsiq index --force` collides on `documents.file_hash UNIQUE`
  because the supersede path bumps version but doesn't release the old
  hash.
- `/api/graph/neighborhood` requires the entity *name* (case-sensitive
  via `GetEntityByName`); UUIDs return 404. Worth accepting either.
---
 internal/api/handlers.go                 | 82 +++++++++++++++++++++++
 internal/api/router.go                   |  2 +
 internal/pipeline/pipeline.go            | 22 ++++---
 ui/src/hooks/api/keys.ts                 |  2 +
 ui/src/hooks/api/useDocs.ts              | 20 ++++++
 ui/src/hooks/api/useGraph.ts             | 26 ++++++++
 ui/src/routes/Graph.tsx                  | 83 +++++++++++++++++++-----
 ui/src/routes/documents/DocumentView.tsx | 57 +++++++++++++---
 8 files changed, 260 insertions(+), 34 deletions(-)

diff --git a/internal/api/handlers.go b/internal/api/handlers.go
index 0cb6046..a7c1bdc 100644
--- a/internal/api/handlers.go
+++ b/internal/api/handlers.go
@@ -191,6 +191,88 @@ func (h *handlers) getDocument(w http.ResponseWriter, r *http.Request) {
 	writeJSON(w, 200, doc)
 }
 
+func (h *handlers) getDocumentChunks(w http.ResponseWriter, r *http.Request) {
+	st, ok := h.resolveStore(w, r)
+	if !ok {
+		return
+	}
+	id := r.PathValue("id")
+	doc, err := st.GetDocument(r.Context(), id)
+	if err != nil {
+		writeError(w, r, 500, err.Error(), err)
+		return
+	}
+	if doc == nil {
+		writeError(w, r, 404, "document not found", nil)
+		return
+	}
+	chunks, err := st.ListChunksByDoc(r.Context(), id)
+	if err != nil {
+		writeError(w, r, 500, err.Error(), err)
+		return
+	}
+	out := make([]map[string]any, 0, len(chunks))
+	for _, c := range chunks {
+		out = append(out, map[string]any{
+			"id":          c.ID,
+			"chunk_index": c.ChunkIndex,
+			"content":     c.Content,
+			"token_count": c.TokenCount,
+		})
+	}
+	writeJSON(w, 200, out)
+}
+
+func (h *handlers) entityGraph(w http.ResponseWriter, r *http.Request) {
+	st, ok := h.resolveStore(w, r)
+	if !ok {
+		return
+	}
+	q := r.URL.Query()
+	limit := intQuery(q.Get("limit"), 500)
+	typ := q.Get("type")
+
+	entities, err := st.ListEntities(r.Context(), typ, limit, 0)
+	if err != nil {
+		writeError(w, r, 500, err.Error(), err)
+		return
+	}
+	rels, err := st.AllRelationships(r.Context())
+	if err != nil {
+		writeError(w, r, 500, err.Error(), err)
+		return
+	}
+
+	nodes := make([]map[string]any, 0, len(entities))
+	keep := make(map[string]bool, len(entities))
+	for _, e := range entities {
+		keep[e.ID] = true
+		nodes = append(nodes, map[string]any{
+			"id":          e.ID,
+			"label":       e.Name,
+			"kind":        "entity",
+			"type":        e.Type,
+			"description": e.Description,
+			"rank":        e.Rank,
+			"community":   e.CommunityID,
+		})
+	}
+	edges := make([]map[string]any, 0)
+	for _, rel := range rels {
+		if !keep[rel.SourceID] || !keep[rel.TargetID] {
+			continue
+		}
+		edges = append(edges, map[string]any{
+			"id":     rel.ID,
+			"source": rel.SourceID,
+			"target": rel.TargetID,
+			"label":  rel.Predicate,
+			"weight": rel.Weight,
+		})
+	}
+	writeJSON(w, 200, map[string]any{"nodes": nodes, "edges": edges})
+}
+
 type searchRequest struct {
 	Query          string `json:"query"`
 	Mode           string `json:"mode"` // local | global
diff --git a/internal/api/router.go b/internal/api/router.go
index d3efad8..c052afe 100644
--- a/internal/api/router.go
+++ b/internal/api/router.go
@@ -169,8 +169,10 @@ func NewRouter(prov llm.Provider, emb *embedder.Embedder, cfg *config.Config, re
 	mux.HandleFunc("GET /api/stats", h.getStats)
 	mux.HandleFunc("GET /api/documents", h.listDocuments)
 	mux.HandleFunc("GET /api/documents/{id}", h.getDocument)
+	mux.HandleFunc("GET /api/documents/{id}/chunks", h.getDocumentChunks)
 	mux.HandleFunc("GET /api/documents/{id}/versions", h.getDocumentVersions)
 	mux.HandleFunc("POST /api/search", h.search)
+	mux.HandleFunc("GET /api/graph", h.entityGraph)
 	mux.HandleFunc("GET /api/graph/neighborhood", h.graphNeighborhood)
 	mux.HandleFunc("GET /api/entities", h.listEntities)
 	mux.HandleFunc("GET /api/communities", h.listCommunities)
diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go
index 75243b0..65e6fd5 100644
--- a/internal/pipeline/pipeline.go
+++ b/internal/pipeline/pipeline.go
@@ -313,15 +313,21 @@ func (p *Pipeline) indexFile(ctx context.Context, path string, opts IndexOptions
 		return fmt.Errorf("batch insert chunks: %w", err)
 	}
 
-	// Phase 1c: Embed chunks
-	vecs, err := p.embedder.EmbedTexts(ctx, texts)
-	if err != nil {
-		return fmt.Errorf("embed: %w", err)
-	}
-	slog.Debug("📊 chunks embedded", "path", path, "chunks", len(vecs))
+	// Phase 1c: Embed chunks. Skip when the embedder is nil (provider=none /
+	// graph-only flow); chunks are still persisted, downstream extraction
+	// uses raw text rather than vectors. CLAUDE.md guarantees this no-op path.
+	if p.embedder != nil {
+		vecs, err := p.embedder.EmbedTexts(ctx, texts)
+		if err != nil {
+			return fmt.Errorf("embed: %w", err)
+		}
+		slog.Debug("📊 chunks embedded", "path", path, "chunks", len(vecs))
 
-	if err := p.store.BatchUpsertEmbeddings(ctx, p.provider.ModelID(), chunkIDs, vecs); err != nil {
-		return fmt.Errorf("batch store embeddings: %w", err)
+		if err := p.store.BatchUpsertEmbeddings(ctx, p.provider.ModelID(), chunkIDs, vecs); err != nil {
+			return fmt.Errorf("batch store embeddings: %w", err)
+		}
+	} else {
+		slog.Debug("⏭️ skipping embedding (provider=none)", "path", path, "chunks", len(texts))
 	}
 
 	// Phase 2: Run graph extraction, claims, and structured doc in parallel
diff --git a/ui/src/hooks/api/keys.ts b/ui/src/hooks/api/keys.ts
index d3f7b50..16b27e4 100644
--- a/ui/src/hooks/api/keys.ts
+++ b/ui/src/hooks/api/keys.ts
@@ -8,6 +8,8 @@ export const qk = {
   notesSearch: (project: string, q: string) => ["notes-search", project, q] as const,
   docs: (project: string) => ["docs", project] as const,
   doc: (project: string, id: string) => ["doc", project, id] as const,
+  docChunks: (project: string, id: string) => ["doc-chunks", project, id] as const,
+  entityGraph: (project: string) => ["entity-graph", project] as const,
   search: (project: string, q: string, mode: string) => ["search", project, q, mode] as const,
   entities: (project: string) => ["entities", project] as const,
   communities: (project: string) => ["communities", project] as const,
diff --git a/ui/src/hooks/api/useDocs.ts b/ui/src/hooks/api/useDocs.ts
index 62927aa..f835966 100644
--- a/ui/src/hooks/api/useDocs.ts
+++ b/ui/src/hooks/api/useDocs.ts
@@ -3,6 +3,13 @@ import { apiFetch } from "@/lib/api-client";
 import { qk } from "./keys";
 import type { Document } from "@/types/api";
 
+export interface DocChunk {
+  id: string;
+  chunk_index: number;
+  content: string;
+  token_count: number;
+}
+
 export function useDocs(project: string) {
   return useQuery({
     queryKey: qk.docs(project),
@@ -22,3 +29,16 @@ export function useDoc(project: string, id: string | undefined) {
     queryFn: () => apiFetch(`/api/documents/${encodeURIComponent(id!)}?project=${encodeURIComponent(project)}`),
   });
 }
+
+export function useDocChunks(project: string, id: string | undefined) {
+  return useQuery({
+    queryKey: qk.docChunks(project, id ?? ""),
+    enabled: !!id,
+    queryFn: async () => {
+      const res = await apiFetch(
+        `/api/documents/${encodeURIComponent(id!)}/chunks?project=${encodeURIComponent(project)}`,
+      );
+      return Array.isArray(res) ? res : [];
+    },
+  });
+}
diff --git a/ui/src/hooks/api/useGraph.ts b/ui/src/hooks/api/useGraph.ts
index 7e70ac9..aa1cc52 100644
--- a/ui/src/hooks/api/useGraph.ts
+++ b/ui/src/hooks/api/useGraph.ts
@@ -32,3 +32,29 @@ export function useNotesGraph(project: string) {
     },
   });
 }
+
+// Entity graph from the indexing pipeline (entities + relationships extracted
+// by the LLM). Distinct from useNotesGraph, which surfaces wikilinks between
+// hand-authored notes.
+export function useEntityGraph(project: string) {
+  return useQuery({
+    queryKey: qk.entityGraph(project),
+    queryFn: async (): Promise => {
+      const res = await apiFetch(
+        `/api/graph?project=${encodeURIComponent(project)}`,
+      );
+      const rawNodes = res?.nodes ?? [];
+      const rawEdges = res?.edges ?? [];
+      const nodes: GraphNode[] = rawNodes.map((n) => ({
+        id: n.id ?? "",
+        label: n.label ?? n.title ?? n.id ?? "",
+        kind: (n.kind as GraphNode["kind"]) ?? "entity",
+      }));
+      const ids = new Set(nodes.map((n) => n.id));
+      const edges: GraphEdge[] = rawEdges
+        .filter((e) => ids.has(e.source) && ids.has(e.target))
+        .map((e) => ({ source: e.source, target: e.target }));
+      return { nodes, edges };
+    },
+  });
+}
diff --git a/ui/src/routes/Graph.tsx b/ui/src/routes/Graph.tsx
index b7b4ab8..da56748 100644
--- a/ui/src/routes/Graph.tsx
+++ b/ui/src/routes/Graph.tsx
@@ -1,43 +1,92 @@
+import { useState } from "react";
 import { GraphCanvas } from "@/components/graph/GraphCanvas";
-import { useNotesGraph } from "@/hooks/api/useGraph";
+import { useEntityGraph, useNotesGraph } from "@/hooks/api/useGraph";
 import { useProjectStore } from "@/stores/project";
 import { EmptyState, ErrorState, LoadingSkeleton } from "@/components/empty";
 
+type View = "entity" | "notes";
+
 export default function Graph() {
   const project = useProjectStore((s) => s.slug);
-  const { data, isLoading, error, refetch } = useNotesGraph(project);
-  const err = error as Error | null | undefined;
+  const entity = useEntityGraph(project);
+  const notes = useNotesGraph(project);
+
+  // Default view: entity graph if it has nodes, else notes graph. Honour
+  // an explicit user toggle once made.
+  const [override, setOverride] = useState(null);
+  const entityHasNodes = (entity.data?.nodes.length ?? 0) > 0;
+  const view: View = override ?? (entityHasNodes ? "entity" : "notes");
+  const active = view === "entity" ? entity : notes;
+  const data = active.data;
+  const err = active.error as Error | null | undefined;
+
+  const Toggle = () => (
+    
+ + +
+ ); - if (isLoading) { + if (active.isLoading) { return ( -
- +
+ +
+ +
); } if (err) { return ( -
- refetch()} - /> +
+ +
+ active.refetch()} + /> +
); } if (!data || data.nodes.length === 0) { return ( -
- +
+ +
+ ` followed by `docsiq index --finalize` to extract entities and relationships." + : "Add markdown notes with [[wikilinks]] under this project to build the notes graph." + } + /> +
); } return (
+
); diff --git a/ui/src/routes/documents/DocumentView.tsx b/ui/src/routes/documents/DocumentView.tsx index 5ef22d0..ed789e9 100644 --- a/ui/src/routes/documents/DocumentView.tsx +++ b/ui/src/routes/documents/DocumentView.tsx @@ -1,28 +1,45 @@ +import { useMemo } from "react"; import { useParams } from "react-router-dom"; -import { useDoc } from "@/hooks/api/useDocs"; +import MarkdownIt from "markdown-it"; +import { useDoc, useDocChunks } from "@/hooks/api/useDocs"; import { useProjectStore } from "@/stores/project"; import { EmptyState, ErrorState, LoadingSkeleton } from "@/components/empty"; import { useDocumentTitle } from "@/hooks/useDocumentTitle"; +const md = new MarkdownIt({ html: false, linkify: true, breaks: false }); + export default function DocumentView() { const { id } = useParams(); const project = useProjectStore((s) => s.slug); const { data, isLoading, error, refetch } = useDoc(project, id); + const { data: chunks, isLoading: chunksLoading } = useDocChunks(project, id); const err = error as Error | null | undefined; const docLabel = data?.title || data?.path; useDocumentTitle(docLabel ? [docLabel, "Documents"] : undefined); + const orderedChunks = useMemo( + () => (chunks ? [...chunks].sort((a, b) => a.chunk_index - b.chunk_index) : []), + [chunks], + ); + + const renderedHTML = useMemo(() => { + if (orderedChunks.length === 0) return ""; + const text = orderedChunks.map((c) => c.content).join("\n\n"); + const isMarkdown = data?.doc_type === "md" || data?.doc_type === "markdown"; + return isMarkdown ? md.render(text) : ""; + }, [orderedChunks, data?.doc_type]); + if (isLoading) { return ( -
+
); } if (err) { return ( -
+
+
); } + return ( -
-

{data.title || data.path}

-
- {data.doc_type} · v{data.version} -
+
+
+

{data.title || data.path}

+
+ {data.doc_type} · v{data.version} + {orderedChunks.length > 0 && ` · ${orderedChunks.length} chunk${orderedChunks.length === 1 ? "" : "s"}`} +
+
+ + {chunksLoading ? ( + + ) : orderedChunks.length === 0 ? ( + + ) : renderedHTML ? ( +
+ ) : ( +
+          {orderedChunks.map((c) => c.content).join("\n\n")}
+        
+ )}
); }