diff --git a/scripts/seed.ts b/scripts/seed.ts index 39894a0..32a06aa 100644 --- a/scripts/seed.ts +++ b/scripts/seed.ts @@ -5,7 +5,7 @@ import path from "path"; import { embedMany } from "ai"; import { adminDb } from "../src/middleware/firebase"; -import { embeddingModel } from "../src/middleware/genai"; +import { embeddingModel, googleOptions } from "../src/middleware/genai"; async function deleteCollection(collectionPath: string) { if (!adminDb) throw new Error("adminDb is null"); @@ -43,23 +43,26 @@ function stringify(obj: any, indent = ""): string { if (typeof obj === "string") return obj; if (typeof obj === "number" || typeof obj === "boolean") return String(obj); + // Skip React elements + if (obj && typeof obj === "object" && obj.$$typeof) return ""; + if (Array.isArray(obj)) { if (obj.every((i) => typeof i === "string")) { return obj.join(", "); } - return obj.map((i) => stringify(i, indent + " ")).join("\n" + indent); + return obj + .map((i) => stringify(i, indent + " ")) + .filter((s) => s !== "") + .join("\n" + indent); } if (typeof obj === "object") { return Object.entries(obj) - .filter( - ([, v]) => - v !== null && - v !== undefined && - v !== "" && - (Array.isArray(v) ? v.length > 0 : true), - ) - .map(([k, v]) => `${indent}${k}: ${stringify(v, indent + " ")}`) + .map(([k, v]) => { + const s = stringify(v, indent + " "); + return s ? `${indent}${k}: ${s}` : ""; + }) + .filter((s) => s !== "") .join("\n"); } @@ -79,7 +82,13 @@ function chunkText(text: string, maxLength = 2000): string[] { async function main() { const reset = process.argv.includes("--reset"); - if (reset) { + const dryRun = process.argv.includes("--dry-run"); + + if (dryRun) { + console.log("Dry run enabled. No changes will be made to the database."); + } + + if (reset && !dryRun) { console.log("Resetting knowledge collection..."); await deleteCollection("knowledge"); } @@ -108,12 +117,16 @@ async function main() { data = dataModule.default || Object.values(dataModule)[0]; } - if (!Array.isArray(data)) { - console.log(`Skipping ${file}: not an array.`); + let items: any[]; + if (Array.isArray(data)) { + items = data; + } else if (data && typeof data === "object") { + items = Object.entries(data); + } else { + console.log(`Skipping ${file}: not an array or object.`); continue; } - const items = data; totalItems += items.length; for (let i = 0; i < items.length; i++) { @@ -124,9 +137,18 @@ async function main() { for (let j = 0; j < chunks.length; j += 10) { const batchChunks = chunks.slice(j, j + 10); + if (dryRun) { + totalChunks += batchChunks.length; + console.log( + `[DRY RUN] Would process ${batchChunks.length} chunks for item ${i} from ${file}`, + ); + continue; + } + const { embeddings } = await embedMany({ model: embeddingModel, values: batchChunks, + providerOptions: googleOptions, }); if (!adminDb) throw new Error("adminDb is null"); diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts index db1d300..3835e68 100644 --- a/src/app/api/chat/route.ts +++ b/src/app/api/chat/route.ts @@ -1,9 +1,7 @@ import { convertToModelMessages, embed, streamText } from "ai"; -import { GoogleEmbeddingModelOptions } from "@ai-sdk/google"; - import { findSimilarChunks } from "@/middleware/firebase"; -import { chatModel, embeddingModel } from "@/middleware/genai"; +import { chatModel, embeddingModel, googleOptions } from "@/middleware/genai"; import { ratelimit } from "@/middleware/upstash"; export async function POST(req: Request) { @@ -43,12 +41,7 @@ export async function POST(req: Request) { const { embedding } = await embed({ model: embeddingModel, value: userQuery, - providerOptions: { - google: { - outputDimensionality: 1536, - taskType: "SEMANTIC_SIMILARITY", - } satisfies GoogleEmbeddingModelOptions, - }, + providerOptions: googleOptions, }); const chunks = await findSimilarChunks(embedding, 4); diff --git a/src/middleware/genai.ts b/src/middleware/genai.ts index 53acee8..fb6f95d 100644 --- a/src/middleware/genai.ts +++ b/src/middleware/genai.ts @@ -1,10 +1,20 @@ -import { createGoogleGenerativeAI } from "@ai-sdk/google"; +import { + createGoogleGenerativeAI, + GoogleEmbeddingModelOptions, +} from "@ai-sdk/google"; const google = createGoogleGenerativeAI({ apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY!, }); const embeddingModel = google.embeddingModel("gemini-embedding-001"); -const chatModel = google("gemini-3.1-flash-lite"); +const chatModel = google("gemini-flash-lite-latest"); -export { google, embeddingModel, chatModel }; +const googleOptions = { + google: { + outputDimensionality: 1536, + taskType: "SEMANTIC_SIMILARITY", + } satisfies GoogleEmbeddingModelOptions, +}; + +export { google, googleOptions, embeddingModel, chatModel };