fix: indexer not saving vectorized items properly

This commit is contained in:
SethBurkart123
2025-05-04 12:01:03 +10:00
parent 40924b5b33
commit 955213d577
7 changed files with 24 additions and 10 deletions
@@ -11,6 +11,7 @@ import { waitForElm } from "@/seqta/utils/waitForElm";
import { runIndexing } from "../indexing/indexer";
import { initVectorSearch } from "../search/vector/vectorSearch";
import { cleanupSearchBar, mountSearchBar } from "./mountSearchBar";
import { IndexedDbManager } from 'embeddia';
const settings = defineSettings({
searchHotkey: stringSetting({
@@ -64,6 +65,12 @@ const globalSearchPlugin: Plugin<typeof settings> = {
run: async (api) => {
const appRef = { current: null };
await IndexedDbManager.create(
'embeddiaDB',
'embeddiaObjectStore',
{ primaryKey: 'id', autoIncrement: false }
);
initVectorSearch();
if (api.settings.runIndexingOnLoad) {
@@ -170,7 +170,10 @@ export async function runIndexing(): Promise<void> {
let merged = mergeItems(stored, newItemsRaw);
if (job.purge) merged = job.purge(merged);
await setStoredItems(merged); // Store merged non-vector data
console.log(merged);
console.log(merged.length);
await setStoredItems(merged);
await updateLastRunMeta(jobId);
// Hydrate items for vector processing
@@ -192,11 +195,11 @@ export async function runIndexing(): Promise<void> {
allItemsFromJobs.push(...hydratedItems);
console.debug(
`%c[Indexer] ${job.label}: ${newItemsRaw.length} new items fetched, ${merged.length} total stored (non-vector).`,
`%c[Indexer] ${job.label}: ${newItemsRaw.length} new items fetched, ${merged.length} total stored (non-vector).`,
"color: #00c46f",
);
} catch (err) {
console.debug(`%c[Indexer] ${job.label} failed:`, "color: red");
console.debug(`%c[Indexer] ${job.label} failed:`, "color: red");
console.error(err);
}
@@ -155,7 +155,7 @@ export const jobs: Record<string, Job> = {
id: "messages",
label: "Messages",
renderComponentId: "message",
frequency: { type: "expiry", afterMs: 1000 * 60 * 5 }, // every 5 minutes
frequency: { type: "expiry", afterMs: 1000 }, // every 5 minutes
run: async (ctx) => {
// Get existing items first
@@ -258,8 +258,7 @@ export const jobs: Record<string, Job> = {
},
purge: (items) => {
// Keep messages from the last 30 days
const cutoff = Date.now() - 30 * 24 * 60 * 60 * 1000;
const cutoff = Date.now() - 4 * 12 * 30 * 24 * 60 * 60 * 1000;
return items.filter((i) => i.dateAdded >= cutoff);
},
},
@@ -4,7 +4,7 @@ import AssessmentComponent from "../components/AssessmentItem.svelte";
export const renderComponentMap: Record<string, typeof SvelteComponent> = {
assessment: AssessmentComponent as unknown as typeof SvelteComponent,
// messages: MessageComponent,
message: AssessmentComponent as unknown as typeof SvelteComponent,
// subject: SubjectComponent,
// etc...
};
@@ -86,7 +86,6 @@ async function processItems(items: HydratedIndexItem[], signal: AbortSignal) {
const unprocessedItems = items.filter((item) => {
if (signal.aborted) return false; // Check cancellation during filtering
try {
// Check if the item ID already exists in the index (loaded or added)
return !vectorIndex!.get({ id: item.id });
} catch (e) {
// If get throws (e.g., item not found), it means it's unprocessed
@@ -42,6 +42,8 @@ export class VectorWorkerManager {
// Create the worker
this.worker = vectorWorker();
console.log('Worker initialized', this.worker);
const timeout = setTimeout(() => {
console.error('Vector worker initialization timed out');
this.worker?.terminate(); // Clean up worker if it exists
@@ -140,9 +142,12 @@ export class VectorWorkerManager {
this.cancelAllSearches("Processing started");
console.debug(`Sending ${items.length} items to worker for processing.`);
const serialisableItems = items.map(({ renderComponent, ...rest }) => rest);
this.worker!.postMessage({
type: 'process',
data: { items }
data: { items: serialisableItems }
});
}
@@ -173,6 +178,7 @@ export class VectorWorkerManager {
this.searchPromises.set(messageId, { resolve: currentParams.resolve, reject: currentParams.reject, timer: searchTimer });
console.debug(`Sending search request (ID: ${messageId}) to worker: "${currentParams.query}"`);
console.log(this.worker);
this.worker.postMessage({
type: "search",
data: { query: currentParams.query, topK: currentParams.topK },