fix: indexer not saving vectorized items properly

This commit is contained in:
SethBurkart123
2025-05-04 12:01:03 +10:00
parent 40924b5b33
commit 955213d577
7 changed files with 24 additions and 10 deletions
+1 -1
View File
@@ -78,7 +78,7 @@
"codemirror": "^6.0.1", "codemirror": "^6.0.1",
"color": "^5.0.0", "color": "^5.0.0",
"dompurify": "^3.2.4", "dompurify": "^3.2.4",
"embeddia": "^1.1.0", "embeddia": "^1.1.3",
"embla-carousel-autoplay": "^8.5.2", "embla-carousel-autoplay": "^8.5.2",
"embla-carousel-svelte": "^8.5.2", "embla-carousel-svelte": "^8.5.2",
"events": "^3.3.0", "events": "^3.3.0",
@@ -11,6 +11,7 @@ import { waitForElm } from "@/seqta/utils/waitForElm";
import { runIndexing } from "../indexing/indexer"; import { runIndexing } from "../indexing/indexer";
import { initVectorSearch } from "../search/vector/vectorSearch"; import { initVectorSearch } from "../search/vector/vectorSearch";
import { cleanupSearchBar, mountSearchBar } from "./mountSearchBar"; import { cleanupSearchBar, mountSearchBar } from "./mountSearchBar";
import { IndexedDbManager } from 'embeddia';
const settings = defineSettings({ const settings = defineSettings({
searchHotkey: stringSetting({ searchHotkey: stringSetting({
@@ -64,6 +65,12 @@ const globalSearchPlugin: Plugin<typeof settings> = {
run: async (api) => { run: async (api) => {
const appRef = { current: null }; const appRef = { current: null };
await IndexedDbManager.create(
'embeddiaDB',
'embeddiaObjectStore',
{ primaryKey: 'id', autoIncrement: false }
);
initVectorSearch(); initVectorSearch();
if (api.settings.runIndexingOnLoad) { if (api.settings.runIndexingOnLoad) {
@@ -170,7 +170,10 @@ export async function runIndexing(): Promise<void> {
let merged = mergeItems(stored, newItemsRaw); let merged = mergeItems(stored, newItemsRaw);
if (job.purge) merged = job.purge(merged); if (job.purge) merged = job.purge(merged);
await setStoredItems(merged); // Store merged non-vector data console.log(merged);
console.log(merged.length);
await setStoredItems(merged);
await updateLastRunMeta(jobId); await updateLastRunMeta(jobId);
// Hydrate items for vector processing // Hydrate items for vector processing
@@ -192,11 +195,11 @@ export async function runIndexing(): Promise<void> {
allItemsFromJobs.push(...hydratedItems); allItemsFromJobs.push(...hydratedItems);
console.debug( console.debug(
`%c[Indexer] ${job.label}: ${newItemsRaw.length} new items fetched, ${merged.length} total stored (non-vector).`, `%c[Indexer] ${job.label}: ${newItemsRaw.length} new items fetched, ${merged.length} total stored (non-vector).`,
"color: #00c46f", "color: #00c46f",
); );
} catch (err) { } catch (err) {
console.debug(`%c[Indexer] ${job.label} failed:`, "color: red"); console.debug(`%c[Indexer] ${job.label} failed:`, "color: red");
console.error(err); console.error(err);
} }
@@ -155,7 +155,7 @@ export const jobs: Record<string, Job> = {
id: "messages", id: "messages",
label: "Messages", label: "Messages",
renderComponentId: "message", renderComponentId: "message",
frequency: { type: "expiry", afterMs: 1000 * 60 * 5 }, // every 5 minutes frequency: { type: "expiry", afterMs: 1000 }, // every 5 minutes
run: async (ctx) => { run: async (ctx) => {
// Get existing items first // Get existing items first
@@ -258,8 +258,7 @@ export const jobs: Record<string, Job> = {
}, },
purge: (items) => { purge: (items) => {
// Keep messages from the last 30 days const cutoff = Date.now() - 4 * 12 * 30 * 24 * 60 * 60 * 1000;
const cutoff = Date.now() - 30 * 24 * 60 * 60 * 1000;
return items.filter((i) => i.dateAdded >= cutoff); return items.filter((i) => i.dateAdded >= cutoff);
}, },
}, },
@@ -4,7 +4,7 @@ import AssessmentComponent from "../components/AssessmentItem.svelte";
export const renderComponentMap: Record<string, typeof SvelteComponent> = { export const renderComponentMap: Record<string, typeof SvelteComponent> = {
assessment: AssessmentComponent as unknown as typeof SvelteComponent, assessment: AssessmentComponent as unknown as typeof SvelteComponent,
// messages: MessageComponent, message: AssessmentComponent as unknown as typeof SvelteComponent,
// subject: SubjectComponent, // subject: SubjectComponent,
// etc... // etc...
}; };
@@ -86,7 +86,6 @@ async function processItems(items: HydratedIndexItem[], signal: AbortSignal) {
const unprocessedItems = items.filter((item) => { const unprocessedItems = items.filter((item) => {
if (signal.aborted) return false; // Check cancellation during filtering if (signal.aborted) return false; // Check cancellation during filtering
try { try {
// Check if the item ID already exists in the index (loaded or added)
return !vectorIndex!.get({ id: item.id }); return !vectorIndex!.get({ id: item.id });
} catch (e) { } catch (e) {
// If get throws (e.g., item not found), it means it's unprocessed // If get throws (e.g., item not found), it means it's unprocessed
@@ -42,6 +42,8 @@ export class VectorWorkerManager {
// Create the worker // Create the worker
this.worker = vectorWorker(); this.worker = vectorWorker();
console.log('Worker initialized', this.worker);
const timeout = setTimeout(() => { const timeout = setTimeout(() => {
console.error('Vector worker initialization timed out'); console.error('Vector worker initialization timed out');
this.worker?.terminate(); // Clean up worker if it exists this.worker?.terminate(); // Clean up worker if it exists
@@ -140,9 +142,12 @@ export class VectorWorkerManager {
this.cancelAllSearches("Processing started"); this.cancelAllSearches("Processing started");
console.debug(`Sending ${items.length} items to worker for processing.`); console.debug(`Sending ${items.length} items to worker for processing.`);
const serialisableItems = items.map(({ renderComponent, ...rest }) => rest);
this.worker!.postMessage({ this.worker!.postMessage({
type: 'process', type: 'process',
data: { items } data: { items: serialisableItems }
}); });
} }
@@ -173,6 +178,7 @@ export class VectorWorkerManager {
this.searchPromises.set(messageId, { resolve: currentParams.resolve, reject: currentParams.reject, timer: searchTimer }); this.searchPromises.set(messageId, { resolve: currentParams.resolve, reject: currentParams.reject, timer: searchTimer });
console.debug(`Sending search request (ID: ${messageId}) to worker: "${currentParams.query}"`); console.debug(`Sending search request (ID: ${messageId}) to worker: "${currentParams.query}"`);
console.log(this.worker);
this.worker.postMessage({ this.worker.postMessage({
type: "search", type: "search",
data: { query: currentParams.query, topK: currentParams.topK }, data: { query: currentParams.query, topK: currentParams.topK },