mirror of
https://github.com/BetterSEQTA/BetterSEQTA-Plus.git
synced 2026-06-06 19:54:39 +00:00
format: run prettify
This commit is contained in:
@@ -11,7 +11,7 @@ import { waitForElm } from "@/seqta/utils/waitForElm";
|
||||
import { runIndexing } from "../indexing/indexer";
|
||||
import { initVectorSearch } from "../search/vector/vectorSearch";
|
||||
import { cleanupSearchBar, mountSearchBar } from "./mountSearchBar";
|
||||
import { IndexedDbManager } from 'embeddia';
|
||||
import { IndexedDbManager } from "embeddia";
|
||||
|
||||
const settings = defineSettings({
|
||||
searchHotkey: stringSetting({
|
||||
@@ -65,12 +65,11 @@ const globalSearchPlugin: Plugin<typeof settings> = {
|
||||
run: async (api) => {
|
||||
const appRef = { current: null };
|
||||
|
||||
await IndexedDbManager.create(
|
||||
'embeddiaDB',
|
||||
'embeddiaObjectStore',
|
||||
{ primaryKey: 'id', autoIncrement: false }
|
||||
);
|
||||
|
||||
await IndexedDbManager.create("embeddiaDB", "embeddiaObjectStore", {
|
||||
primaryKey: "id",
|
||||
autoIncrement: false,
|
||||
});
|
||||
|
||||
initVectorSearch();
|
||||
|
||||
if (api.settings.runIndexingOnLoad) {
|
||||
|
||||
@@ -6,7 +6,7 @@ import { VectorWorkerManager } from "../indexing/worker/vectorWorkerManager";
|
||||
export function mountSearchBar(
|
||||
titleElement: Element,
|
||||
api: any,
|
||||
appRef: { current: any }
|
||||
appRef: { current: any },
|
||||
) {
|
||||
if (titleElement.querySelector(".search-trigger")) {
|
||||
return;
|
||||
|
||||
@@ -15,7 +15,10 @@ async function loadProgress<T = any>(jobId: string): Promise<T | undefined> {
|
||||
return rec?.progress as T | undefined;
|
||||
}
|
||||
|
||||
async function saveProgress<T = any>(jobId: string, progress: T): Promise<void> {
|
||||
async function saveProgress<T = any>(
|
||||
jobId: string,
|
||||
progress: T,
|
||||
): Promise<void> {
|
||||
await put(META_STORE, { jobId, progress }, `progress:${jobId}`);
|
||||
}
|
||||
/* ───────────────────────────────────────────── */
|
||||
@@ -67,7 +70,13 @@ function stopHeartbeat() {
|
||||
localStorage.removeItem(LOCK_KEY);
|
||||
}
|
||||
|
||||
function dispatchProgress(completed: number, total: number, indexing: boolean, status?: string, detail?: string) {
|
||||
function dispatchProgress(
|
||||
completed: number,
|
||||
total: number,
|
||||
indexing: boolean,
|
||||
status?: string,
|
||||
detail?: string,
|
||||
) {
|
||||
const event = new CustomEvent("indexing-progress", {
|
||||
detail: { completed, total, indexing, status, detail },
|
||||
});
|
||||
@@ -79,31 +88,41 @@ export async function loadAllStoredItems(): Promise<HydratedIndexItem[]> {
|
||||
const jobIds = Object.keys(jobs);
|
||||
|
||||
for (const jobId of jobIds) {
|
||||
try {
|
||||
const items = await getAll(jobId) as IndexItem[];
|
||||
const job = jobs[jobId];
|
||||
const renderComponent = renderComponentMap[job.renderComponentId];
|
||||
try {
|
||||
const items = (await getAll(jobId)) as IndexItem[];
|
||||
const job = jobs[jobId];
|
||||
const renderComponent = renderComponentMap[job.renderComponentId];
|
||||
|
||||
if (!renderComponent) {
|
||||
console.warn(`Render component not found for job ${jobId} (ID: ${job.renderComponentId})`);
|
||||
}
|
||||
|
||||
for (const item of items) {
|
||||
// Ensure item has all required fields before pushing
|
||||
if (item && item.id && item.text && item.category && item.actionId && job.renderComponentId) {
|
||||
all.push({
|
||||
...item,
|
||||
renderComponent: renderComponent || undefined, // Assign undefined if not found
|
||||
});
|
||||
} else {
|
||||
console.warn(`Skipping invalid item from job ${jobId}:`, item);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error loading items for job ${jobId}:`, error);
|
||||
if (!renderComponent) {
|
||||
console.warn(
|
||||
`Render component not found for job ${jobId} (ID: ${job.renderComponentId})`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const item of items) {
|
||||
if (
|
||||
item &&
|
||||
item.id &&
|
||||
item.text &&
|
||||
item.category &&
|
||||
item.actionId &&
|
||||
job.renderComponentId
|
||||
) {
|
||||
all.push({
|
||||
...item,
|
||||
renderComponent: renderComponent || undefined,
|
||||
});
|
||||
} else {
|
||||
console.warn(`Skipping invalid item from job ${jobId}:`, item);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error loading items for job ${jobId}:`, error);
|
||||
}
|
||||
}
|
||||
console.debug(`[Indexer] Loaded ${all.length} items from non-vector storage.`);
|
||||
console.debug(
|
||||
`[Indexer] Loaded ${all.length} items from non-vector storage.`,
|
||||
);
|
||||
return all;
|
||||
}
|
||||
|
||||
@@ -129,7 +148,12 @@ export async function runIndexing(): Promise<void> {
|
||||
|
||||
// --- Step 1: Run Fetching/Storing Jobs (Main Thread) ---
|
||||
for (const jobId of jobIds) {
|
||||
dispatchProgress(completedJobs, totalSteps, true, `Running job: ${jobs[jobId].label}`);
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
true,
|
||||
`Running job: ${jobs[jobId].label}`,
|
||||
);
|
||||
const job = jobs[jobId];
|
||||
const lastRun = await getLastRunMeta(jobId);
|
||||
|
||||
@@ -139,26 +163,37 @@ export async function runIndexing(): Promise<void> {
|
||||
"color: gray",
|
||||
);
|
||||
completedJobs++;
|
||||
dispatchProgress(completedJobs, totalSteps, true, `Skipped job: ${job.label}`);
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
true,
|
||||
`Skipped job: ${job.label}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const getStoredItems = async (storeId?: string) => await getAll(storeId ?? jobId);
|
||||
const getStoredItems = async (storeId?: string) =>
|
||||
await getAll(storeId ?? jobId);
|
||||
const setStoredItems = async (items: IndexItem[], storeId?: string) => {
|
||||
const targetStore = storeId ?? jobId;
|
||||
await clear(targetStore);
|
||||
const validItems = items.filter(i => i && i.id);
|
||||
const validItems = items.filter((i) => i && i.id);
|
||||
if (validItems.length !== items.length) {
|
||||
console.warn(`[Indexer Job ${jobId} -> Store ${targetStore}] Filtered out ${items.length - validItems.length} invalid items before storing.`);
|
||||
console.warn(
|
||||
`[Indexer Job ${jobId} -> Store ${targetStore}] Filtered out ${items.length - validItems.length} invalid items before storing.`,
|
||||
);
|
||||
}
|
||||
await Promise.all(validItems.map((i) => put(targetStore, i, i.id)));
|
||||
};
|
||||
const addItem = async (item: IndexItem, storeId?: string) => {
|
||||
const targetStore = storeId ?? jobId;
|
||||
if (item && item.id) {
|
||||
await put(targetStore, item, item.id);
|
||||
await put(targetStore, item, item.id);
|
||||
} else {
|
||||
console.warn(`[Indexer Job ${jobId} -> Store ${targetStore}] Attempted to add invalid item:`, item);
|
||||
console.warn(
|
||||
`[Indexer Job ${jobId} -> Store ${targetStore}] Attempted to add invalid item:`,
|
||||
item,
|
||||
);
|
||||
}
|
||||
};
|
||||
const removeItem = async (id: string, storeId?: string) => {
|
||||
@@ -193,18 +228,30 @@ export async function runIndexing(): Promise<void> {
|
||||
// Hydrate items for vector processing
|
||||
const renderComponent = renderComponentMap[job.renderComponentId];
|
||||
if (!renderComponent) {
|
||||
console.warn(`Render component not found for job ${jobId} (ID: ${job.renderComponentId}) during hydration`);
|
||||
console.warn(
|
||||
`Render component not found for job ${jobId} (ID: ${job.renderComponentId}) during hydration`,
|
||||
);
|
||||
}
|
||||
const hydratedItems = merged
|
||||
.filter(item => item && item.id && item.text && item.category && item.actionId && job.renderComponentId) // Filter invalid before hydrating
|
||||
.map((item) => ({
|
||||
...item,
|
||||
renderComponent: renderComponent || undefined, // Assign undefined if not found
|
||||
}));
|
||||
.filter(
|
||||
(item) =>
|
||||
item &&
|
||||
item.id &&
|
||||
item.text &&
|
||||
item.category &&
|
||||
item.actionId &&
|
||||
job.renderComponentId,
|
||||
) // Filter invalid before hydrating
|
||||
.map((item) => ({
|
||||
...item,
|
||||
renderComponent: renderComponent || undefined, // Assign undefined if not found
|
||||
}));
|
||||
|
||||
if (hydratedItems.length !== merged.length) {
|
||||
console.warn(`[Indexer Job ${jobId}] Filtered out ${merged.length - hydratedItems.length} invalid items during hydration.`);
|
||||
}
|
||||
if (hydratedItems.length !== merged.length) {
|
||||
console.warn(
|
||||
`[Indexer Job ${jobId}] Filtered out ${merged.length - hydratedItems.length} invalid items during hydration.`,
|
||||
);
|
||||
}
|
||||
|
||||
allItemsFromJobs.push(...hydratedItems);
|
||||
|
||||
@@ -218,7 +265,12 @@ export async function runIndexing(): Promise<void> {
|
||||
}
|
||||
|
||||
completedJobs++;
|
||||
dispatchProgress(completedJobs, totalSteps, true, `Finished job: ${job.label}`);
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
true,
|
||||
`Finished job: ${job.label}`,
|
||||
);
|
||||
}
|
||||
|
||||
// --- Step 2: Delegate Vectorization to Worker (Off Main Thread) ---
|
||||
@@ -233,54 +285,113 @@ export async function runIndexing(): Promise<void> {
|
||||
const workerManager = VectorWorkerManager.getInstance();
|
||||
// Pass a progress callback to the worker manager
|
||||
await workerManager.processItems(allItemsFromJobs, (progress) => {
|
||||
// Update overall progress based on worker feedback
|
||||
let detailMessage = progress.message || '';
|
||||
if (progress.status === 'processing' && progress.total && progress.processed !== undefined) {
|
||||
detailMessage = `Vectorizing: ${progress.processed} / ${progress.total}`;
|
||||
// You could potentially update the 'completed' count more granularly here
|
||||
// For simplicity, we'll just update the detail message
|
||||
} else if (progress.status === 'complete') {
|
||||
detailMessage = "Vectorization complete";
|
||||
// Mark the vectorization step as complete
|
||||
dispatchProgress(totalSteps, totalSteps, true, "Vectorization finished");
|
||||
} else if (progress.status === 'error') {
|
||||
detailMessage = `Vectorization error: ${progress.message}`;
|
||||
dispatchProgress(completedJobs, totalSteps, true, "Vectorization failed", detailMessage); // Show error
|
||||
} else if (progress.status === 'started') {
|
||||
detailMessage = `Vectorization started for ${progress.total} items`;
|
||||
} else if (progress.status === 'cancelled') {
|
||||
detailMessage = `Vectorization cancelled: ${progress.message}`;
|
||||
dispatchProgress(completedJobs, totalSteps, true, "Vectorization cancelled", detailMessage);
|
||||
}
|
||||
// Update overall progress based on worker feedback
|
||||
let detailMessage = progress.message || "";
|
||||
if (
|
||||
progress.status === "processing" &&
|
||||
progress.total &&
|
||||
progress.processed !== undefined
|
||||
) {
|
||||
detailMessage = `Vectorizing: ${progress.processed} / ${progress.total}`;
|
||||
// You could potentially update the 'completed' count more granularly here
|
||||
// For simplicity, we'll just update the detail message
|
||||
} else if (progress.status === "complete") {
|
||||
detailMessage = "Vectorization complete";
|
||||
// Mark the vectorization step as complete
|
||||
dispatchProgress(
|
||||
totalSteps,
|
||||
totalSteps,
|
||||
true,
|
||||
"Vectorization finished",
|
||||
);
|
||||
} else if (progress.status === "error") {
|
||||
detailMessage = `Vectorization error: ${progress.message}`;
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
true,
|
||||
"Vectorization failed",
|
||||
detailMessage,
|
||||
); // Show error
|
||||
} else if (progress.status === "started") {
|
||||
detailMessage = `Vectorization started for ${progress.total} items`;
|
||||
} else if (progress.status === "cancelled") {
|
||||
detailMessage = `Vectorization cancelled: ${progress.message}`;
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
true,
|
||||
"Vectorization cancelled",
|
||||
detailMessage,
|
||||
);
|
||||
}
|
||||
|
||||
// Update the status detail
|
||||
dispatchProgress(completedJobs, totalSteps, true, "Vectorization in progress", detailMessage);
|
||||
// Update the status detail
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
true,
|
||||
"Vectorization in progress",
|
||||
detailMessage,
|
||||
);
|
||||
|
||||
// When worker signals completion of *its* task, mark the final step complete
|
||||
if (progress.status === 'complete') {
|
||||
completedJobs++; // Increment completion count *after* vectorization finishes
|
||||
dispatchProgress(completedJobs, totalSteps, false, "Indexing finished"); // Set indexing to false
|
||||
} else if (progress.status === 'error' || progress.status === 'cancelled') {
|
||||
// Don't increment completed count on failure/cancel, just stop indexing indicator
|
||||
dispatchProgress(completedJobs, totalSteps, false, "Indexing stopped due to error/cancel");
|
||||
}
|
||||
// When worker signals completion of *its* task, mark the final step complete
|
||||
if (progress.status === "complete") {
|
||||
completedJobs++; // Increment completion count *after* vectorization finishes
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
false,
|
||||
"Indexing finished",
|
||||
); // Set indexing to false
|
||||
} else if (
|
||||
progress.status === "error" ||
|
||||
progress.status === "cancelled"
|
||||
) {
|
||||
// Don't increment completed count on failure/cancel, just stop indexing indicator
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
false,
|
||||
"Indexing stopped due to error/cancel",
|
||||
);
|
||||
}
|
||||
});
|
||||
console.debug("%c[Indexer] Vectorization task sent to worker.", "color: green");
|
||||
console.debug(
|
||||
"%c[Indexer] Vectorization task sent to worker.",
|
||||
"color: green",
|
||||
);
|
||||
// Note: runIndexing might return *before* vectorization is complete now.
|
||||
// The progress updates will signal the true end state.
|
||||
} catch (error) {
|
||||
console.error(`%c[Indexer] ❌ Failed to send items to vector worker:`, "color: red", error);
|
||||
dispatchProgress(completedJobs, totalSteps, false, "Vectorization failed", String(error)); // Stop indexing indicator
|
||||
console.error(
|
||||
`%c[Indexer] ❌ Failed to send items to vector worker:`,
|
||||
"color: red",
|
||||
error,
|
||||
);
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
false,
|
||||
"Vectorization failed",
|
||||
String(error),
|
||||
); // Stop indexing indicator
|
||||
}
|
||||
|
||||
} else {
|
||||
console.debug("%c[Indexer] No items to send for vectorization.", "color: gray");
|
||||
console.debug(
|
||||
"%c[Indexer] No items to send for vectorization.",
|
||||
"color: gray",
|
||||
);
|
||||
// If no vectorization needed, indexing is done here.
|
||||
completedJobs++; // Count the "skipped" vectorization step
|
||||
dispatchProgress(completedJobs, totalSteps, false, "Indexing finished (no vectorization needed)");
|
||||
dispatchProgress(
|
||||
completedJobs,
|
||||
totalSteps,
|
||||
false,
|
||||
"Indexing finished (no vectorization needed)",
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// Stop heartbeat ONLY when all jobs *and* the vectorization dispatch are done.
|
||||
// The actual *completion* of vectorization is now asynchronous.
|
||||
stopHeartbeat();
|
||||
@@ -292,10 +403,10 @@ function mergeItems(existing: IndexItem[], incoming: IndexItem[]): IndexItem[] {
|
||||
const map = new Map<string, IndexItem>();
|
||||
// Prioritize incoming items if IDs clash
|
||||
for (const item of existing) {
|
||||
if (item && item.id) map.set(item.id, item);
|
||||
if (item && item.id) map.set(item.id, item);
|
||||
}
|
||||
for (const item of incoming) {
|
||||
if (item && item.id) map.set(item.id, item);
|
||||
if (item && item.id) map.set(item.id, item);
|
||||
}
|
||||
return Array.from(map.values());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,4 +5,4 @@ import { assessmentsJob } from "./jobs/assessments";
|
||||
export const jobs: Record<string, Job> = {
|
||||
messages: messagesJob,
|
||||
assessments: assessmentsJob,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -49,21 +49,27 @@ const fetchNotifications = async () => {
|
||||
const fetchAssessmentName = async (
|
||||
assessmentId: number,
|
||||
metaclassId: number,
|
||||
programmeId: number
|
||||
programmeId: number,
|
||||
): Promise<string> => {
|
||||
const searchAssessment = (data: any): string | null => {
|
||||
// Search syllabus
|
||||
for (const item of data.syllabus || []) {
|
||||
const found = (item.assessments || []).find((a: any) => a.id === assessmentId);
|
||||
const found = (item.assessments || []).find(
|
||||
(a: any) => a.id === assessmentId,
|
||||
);
|
||||
if (found) return found.title;
|
||||
}
|
||||
|
||||
// Search pending
|
||||
const foundPending = (data.pending || []).find((a: any) => a.id === assessmentId);
|
||||
const foundPending = (data.pending || []).find(
|
||||
(a: any) => a.id === assessmentId,
|
||||
);
|
||||
if (foundPending) return foundPending.title;
|
||||
|
||||
// Search tasks
|
||||
const foundTask = (data.tasks || []).find((a: any) => a.id === assessmentId);
|
||||
const foundTask = (data.tasks || []).find(
|
||||
(a: any) => a.id === assessmentId,
|
||||
);
|
||||
if (foundTask) return foundTask.title;
|
||||
|
||||
return null;
|
||||
@@ -88,11 +94,17 @@ const fetchAssessmentName = async (
|
||||
if (title) return title;
|
||||
|
||||
// Try from /upcoming if not found in /past
|
||||
const upcomingPayload = await fetchAssessments("/seqta/student/assessment/list/upcoming");
|
||||
const foundUpcoming = (upcomingPayload || []).find((a: any) => a.id === assessmentId);
|
||||
const upcomingPayload = await fetchAssessments(
|
||||
"/seqta/student/assessment/list/upcoming",
|
||||
);
|
||||
const foundUpcoming = (upcomingPayload || []).find(
|
||||
(a: any) => a.id === assessmentId,
|
||||
);
|
||||
if (foundUpcoming) return foundUpcoming.title;
|
||||
|
||||
throw new Error(`Assessment with ID ${assessmentId} not found in past or upcoming.`);
|
||||
throw new Error(
|
||||
`Assessment with ID ${assessmentId} not found in past or upcoming.`,
|
||||
);
|
||||
};
|
||||
|
||||
/* ------------- Job ------------- */
|
||||
@@ -103,9 +115,10 @@ export const assessmentsJob: Job = {
|
||||
frequency: { type: "expiry", afterMs: 15 * 60 * 1000 },
|
||||
|
||||
run: async (ctx) => {
|
||||
const progress =
|
||||
(await ctx.getProgress<AssessmentsProgress>()) ?? { lastTs: 0 };
|
||||
|
||||
const progress = (await ctx.getProgress<AssessmentsProgress>()) ?? {
|
||||
lastTs: 0,
|
||||
};
|
||||
|
||||
let notifications: Notification[];
|
||||
try {
|
||||
notifications = await fetchNotifications();
|
||||
@@ -113,25 +126,33 @@ export const assessmentsJob: Job = {
|
||||
console.error("[Assessments job] fetch failed:", e);
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
const notificationIsIndexed = async (id: string): Promise<boolean> => {
|
||||
const [inAssessments, inMessages] = await Promise.all([
|
||||
ctx.getStoredItems("assessments").then((items) => items.some((i) => i.id === id)),
|
||||
ctx.getStoredItems("messages").then((items) => items.some((i) => i.id === id)),
|
||||
ctx
|
||||
.getStoredItems("assessments")
|
||||
.then((items) => items.some((i) => i.id === id)),
|
||||
ctx
|
||||
.getStoredItems("messages")
|
||||
.then((items) => items.some((i) => i.id === id)),
|
||||
]);
|
||||
return inAssessments || inMessages;
|
||||
};
|
||||
|
||||
|
||||
const items: IndexItem[] = [];
|
||||
|
||||
|
||||
for (const notif of notifications) {
|
||||
const id = notif.notificationID.toString();
|
||||
if (await notificationIsIndexed(id)) continue;
|
||||
|
||||
|
||||
if (notif.type === "coneqtassessments") {
|
||||
const a = notif.coneqtAssessments;
|
||||
|
||||
const content = await fetchAssessmentName(a.assessmentID, a.metaclassID, a.programmeID);
|
||||
const content = await fetchAssessmentName(
|
||||
a.assessmentID,
|
||||
a.metaclassID,
|
||||
a.programmeID,
|
||||
);
|
||||
items.push({
|
||||
id,
|
||||
text: a.title,
|
||||
@@ -168,11 +189,11 @@ export const assessmentsJob: Job = {
|
||||
actionId: "message",
|
||||
renderComponentId: "message",
|
||||
},
|
||||
"messages"
|
||||
"messages",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (items.length) {
|
||||
const latest = Math.max(
|
||||
...items.map((i) => i.dateAdded),
|
||||
@@ -190,4 +211,4 @@ export const assessmentsJob: Job = {
|
||||
date.setHours(0, 0, 0, 0);
|
||||
return items.filter((i) => i.dateAdded >= date.getTime());
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
@@ -49,12 +49,12 @@ export const messagesJob: Job = {
|
||||
|
||||
run: async (ctx) => {
|
||||
const limit = 100;
|
||||
const progress =
|
||||
(await ctx.getProgress<MessagesProgress>()) ?? { offset: 0, done: false };
|
||||
const progress = (await ctx.getProgress<MessagesProgress>()) ?? {
|
||||
offset: 0,
|
||||
done: false,
|
||||
};
|
||||
|
||||
const existingIds = new Set(
|
||||
(await ctx.getStoredItems()).map((i) => i.id),
|
||||
);
|
||||
const existingIds = new Set((await ctx.getStoredItems()).map((i) => i.id));
|
||||
|
||||
let consecutiveExisting = 0;
|
||||
|
||||
@@ -129,4 +129,4 @@ export const messagesJob: Job = {
|
||||
const fourYears = Date.now() - 4 * 365 * 24 * 60 * 60 * 1000;
|
||||
return items.filter((i) => i.dateAdded >= fourYears);
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
@@ -36,4 +36,4 @@ export interface Job {
|
||||
renderComponentId: string;
|
||||
run: (ctx: JobContext) => Promise<IndexItem[]>;
|
||||
purge?: (items: IndexItem[]) => IndexItem[];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
export function htmlToPlainText(rawHtml: string): string {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(rawHtml, 'text/html');
|
||||
const doc = parser.parseFromString(rawHtml, "text/html");
|
||||
const { body } = doc;
|
||||
|
||||
body.querySelectorAll('script,style,template,noscript,meta,link').forEach(el => el.remove());
|
||||
body
|
||||
.querySelectorAll("script,style,template,noscript,meta,link")
|
||||
.forEach((el) => el.remove());
|
||||
|
||||
body.querySelectorAll('.forward').forEach(el => {
|
||||
body.querySelectorAll(".forward").forEach((el) => {
|
||||
let n: ChildNode | null = el;
|
||||
while (n) {
|
||||
const next = n.nextSibling as ChildNode | null;
|
||||
@@ -14,19 +16,19 @@ export function htmlToPlainText(rawHtml: string): string {
|
||||
}
|
||||
});
|
||||
|
||||
let text = body.innerText || '';
|
||||
let text = body.innerText || "";
|
||||
|
||||
text = text
|
||||
.replace(/\u00A0/g, ' ')
|
||||
.replace(/[ \t]{2,}/g, ' ')
|
||||
.replace(/\r\n|\r/g, '\n')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.replace(/^[.\w#][^{]{0,100}\{[^}]*\}$/gm, '')
|
||||
.split('\n')
|
||||
.map(line => line.trimEnd())
|
||||
.filter(line => line.trim().length > 0 || line === '')
|
||||
.join('\n')
|
||||
.replace(/\u00A0/g, " ")
|
||||
.replace(/[ \t]{2,}/g, " ")
|
||||
.replace(/\r\n|\r/g, "\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.replace(/^[.\w#][^{]{0,100}\{[^}]*\}$/gm, "")
|
||||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.filter((line) => line.trim().length > 0 || line === "")
|
||||
.join("\n")
|
||||
.trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
import {
|
||||
EmbeddingIndex,
|
||||
getEmbedding,
|
||||
initializeModel,
|
||||
} from "embeddia";
|
||||
import { EmbeddingIndex, getEmbedding, initializeModel } from "embeddia";
|
||||
import type { HydratedIndexItem } from "../types";
|
||||
|
||||
let vectorIndex: EmbeddingIndex | null = null;
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { refreshVectorCache } from '../../search/vector/vectorSearch';
|
||||
import type { HydratedIndexItem } from '../types';
|
||||
import vectorWorker from './vectorWorker.ts?inlineWorker';
|
||||
import type { SearchResult } from 'embeddia';
|
||||
import { refreshVectorCache } from "../../search/vector/vectorSearch";
|
||||
import type { HydratedIndexItem } from "../types";
|
||||
import vectorWorker from "./vectorWorker.ts?inlineWorker";
|
||||
import type { SearchResult } from "embeddia";
|
||||
|
||||
export type ProgressCallback = (data: {
|
||||
status: 'started' | 'processing' | 'complete' | 'error' | 'cancelled';
|
||||
status: "started" | "processing" | "complete" | "error" | "cancelled";
|
||||
total?: number;
|
||||
processed?: number;
|
||||
message?: string;
|
||||
@@ -16,10 +16,21 @@ export class VectorWorkerManager {
|
||||
private isInitialized = false;
|
||||
private readyPromise: Promise<void> | null = null; // To await initialization
|
||||
private progressCallback: ProgressCallback | null = null;
|
||||
private searchPromises = new Map<string, { resolve: (value: SearchResult[]) => void, reject: (reason?: any) => void, timer: NodeJS.Timeout }>();
|
||||
private searchPromises = new Map<
|
||||
string,
|
||||
{
|
||||
resolve: (value: SearchResult[]) => void;
|
||||
reject: (reason?: any) => void;
|
||||
timer: NodeJS.Timeout;
|
||||
}
|
||||
>();
|
||||
private debounceTimer: NodeJS.Timeout | null = null;
|
||||
private lastSearchParams: { query: string; topK: number; resolve: (results: SearchResult[]) => void, reject: (reason?: any) => void } | null = null;
|
||||
|
||||
private lastSearchParams: {
|
||||
query: string;
|
||||
topK: number;
|
||||
resolve: (results: SearchResult[]) => void;
|
||||
reject: (reason?: any) => void;
|
||||
} | null = null;
|
||||
|
||||
private constructor() {
|
||||
// Start initialization immediately, but allow awaiting it
|
||||
@@ -39,101 +50,115 @@ export class VectorWorkerManager {
|
||||
if (this.readyPromise) return this.readyPromise;
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
// Create the worker
|
||||
this.worker = vectorWorker();
|
||||
// Create the worker
|
||||
this.worker = vectorWorker();
|
||||
|
||||
console.log('Worker initialized', this.worker);
|
||||
console.log("Worker initialized", this.worker);
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
console.error('Vector worker initialization timed out');
|
||||
this.worker?.terminate(); // Clean up worker if it exists
|
||||
this.worker = null;
|
||||
this.isInitialized = false; // Ensure state reflects failure
|
||||
this.readyPromise = null; // Allow retrying init later
|
||||
reject(new Error('Worker initialization timed out'));
|
||||
}, 10000); // Increased timeout
|
||||
const timeout = setTimeout(() => {
|
||||
console.error("Vector worker initialization timed out");
|
||||
this.worker?.terminate(); // Clean up worker if it exists
|
||||
this.worker = null;
|
||||
this.isInitialized = false; // Ensure state reflects failure
|
||||
this.readyPromise = null; // Allow retrying init later
|
||||
reject(new Error("Worker initialization timed out"));
|
||||
}, 10000); // Increased timeout
|
||||
|
||||
// Set up message handling
|
||||
this.worker!.addEventListener('message', (e) => {
|
||||
const { type, data } = e.data;
|
||||
console.debug("Message from vector worker:", type, data);
|
||||
// Set up message handling
|
||||
this.worker!.addEventListener("message", (e) => {
|
||||
const { type, data } = e.data;
|
||||
console.debug("Message from vector worker:", type, data);
|
||||
|
||||
switch (type) {
|
||||
case 'ready':
|
||||
this.isInitialized = true;
|
||||
clearTimeout(timeout);
|
||||
console.debug('Vector worker initialized and ready.');
|
||||
resolve(); // Resolve the init promise
|
||||
break;
|
||||
switch (type) {
|
||||
case "ready":
|
||||
this.isInitialized = true;
|
||||
clearTimeout(timeout);
|
||||
console.debug("Vector worker initialized and ready.");
|
||||
resolve(); // Resolve the init promise
|
||||
break;
|
||||
|
||||
case 'progress':
|
||||
if (this.progressCallback) {
|
||||
this.progressCallback(data);
|
||||
case "progress":
|
||||
if (this.progressCallback) {
|
||||
this.progressCallback(data);
|
||||
|
||||
if (data.status === 'complete') {
|
||||
refreshVectorCache();
|
||||
}
|
||||
if (data.status === "complete") {
|
||||
refreshVectorCache();
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'searchResults':
|
||||
const searchInfo = this.searchPromises.get(data.messageId);
|
||||
if (searchInfo) {
|
||||
clearTimeout(searchInfo.timer); // Clear timeout on success
|
||||
searchInfo.resolve(data.results);
|
||||
this.searchPromises.delete(data.messageId);
|
||||
} else {
|
||||
console.warn('Received search results for unknown messageId:', data.messageId);
|
||||
}
|
||||
break;
|
||||
case "searchResults":
|
||||
const searchInfo = this.searchPromises.get(data.messageId);
|
||||
if (searchInfo) {
|
||||
clearTimeout(searchInfo.timer); // Clear timeout on success
|
||||
searchInfo.resolve(data.results);
|
||||
this.searchPromises.delete(data.messageId);
|
||||
} else {
|
||||
console.warn(
|
||||
"Received search results for unknown messageId:",
|
||||
data.messageId,
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'searchError':
|
||||
const errorInfo = this.searchPromises.get(data.messageId);
|
||||
if (errorInfo) {
|
||||
clearTimeout(errorInfo.timer); // Clear timeout on error
|
||||
errorInfo.reject(new Error(data.error));
|
||||
this.searchPromises.delete(data.messageId);
|
||||
} else {
|
||||
console.warn('Received search error for unknown messageId:', data.messageId);
|
||||
}
|
||||
break;
|
||||
case "searchError":
|
||||
const errorInfo = this.searchPromises.get(data.messageId);
|
||||
if (errorInfo) {
|
||||
clearTimeout(errorInfo.timer); // Clear timeout on error
|
||||
errorInfo.reject(new Error(data.error));
|
||||
this.searchPromises.delete(data.messageId);
|
||||
} else {
|
||||
console.warn(
|
||||
"Received search error for unknown messageId:",
|
||||
data.messageId,
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'searchCancelled':
|
||||
const cancelledInfo = this.searchPromises.get(data.messageId);
|
||||
if (cancelledInfo) {
|
||||
clearTimeout(cancelledInfo.timer); // Clear timeout on cancel
|
||||
// Reject with a specific cancellation error or resolve with empty? Let's reject.
|
||||
cancelledInfo.reject(new Error('Search cancelled by worker'));
|
||||
this.searchPromises.delete(data.messageId);
|
||||
} else {
|
||||
console.debug('Received cancellation for unknown messageId:', data.messageId);
|
||||
}
|
||||
break;
|
||||
case "searchCancelled":
|
||||
const cancelledInfo = this.searchPromises.get(data.messageId);
|
||||
if (cancelledInfo) {
|
||||
clearTimeout(cancelledInfo.timer); // Clear timeout on cancel
|
||||
// Reject with a specific cancellation error or resolve with empty? Let's reject.
|
||||
cancelledInfo.reject(new Error("Search cancelled by worker"));
|
||||
this.searchPromises.delete(data.messageId);
|
||||
} else {
|
||||
console.debug(
|
||||
"Received cancellation for unknown messageId:",
|
||||
data.messageId,
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
console.warn('Unknown message from worker:', type, data);
|
||||
}
|
||||
});
|
||||
default:
|
||||
console.warn("Unknown message from worker:", type, data);
|
||||
}
|
||||
});
|
||||
|
||||
// Initialize the worker
|
||||
this.worker!.postMessage({ type: 'init' });
|
||||
// Initialize the worker
|
||||
this.worker!.postMessage({ type: "init" });
|
||||
});
|
||||
}
|
||||
|
||||
// Ensures worker is ready before proceeding
|
||||
private async ensureReady() {
|
||||
if (!this.readyPromise) {
|
||||
// If init wasn't called or failed, try again
|
||||
console.warn("Worker not initialized, attempting init...");
|
||||
this.readyPromise = this.initWorker();
|
||||
// If init wasn't called or failed, try again
|
||||
console.warn("Worker not initialized, attempting init...");
|
||||
this.readyPromise = this.initWorker();
|
||||
}
|
||||
await this.readyPromise;
|
||||
if (!this.isInitialized || !this.worker) {
|
||||
throw new Error("Vector Worker is not available after initialization attempt.");
|
||||
throw new Error(
|
||||
"Vector Worker is not available after initialization attempt.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async processItems(items: HydratedIndexItem[], onProgress?: ProgressCallback) {
|
||||
async processItems(
|
||||
items: HydratedIndexItem[],
|
||||
onProgress?: ProgressCallback,
|
||||
) {
|
||||
await this.ensureReady(); // Wait for worker to be ready
|
||||
|
||||
this.progressCallback = onProgress || null;
|
||||
@@ -146,13 +171,16 @@ export class VectorWorkerManager {
|
||||
const serialisableItems = items.map(({ renderComponent, ...rest }) => rest);
|
||||
|
||||
this.worker!.postMessage({
|
||||
type: 'process',
|
||||
data: { items: serialisableItems }
|
||||
type: "process",
|
||||
data: { items: serialisableItems },
|
||||
});
|
||||
}
|
||||
|
||||
// Public search method
|
||||
public async search(query: string, topK: number = 10): Promise<SearchResult[]> {
|
||||
public async search(
|
||||
query: string,
|
||||
topK: number = 10,
|
||||
): Promise<SearchResult[]> {
|
||||
await this.ensureReady();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
@@ -167,54 +195,62 @@ export class VectorWorkerManager {
|
||||
// Set a timeout for the search operation itself
|
||||
const searchTimeout = 10000; // e.g., 10 seconds
|
||||
const searchTimer = setTimeout(() => {
|
||||
if (this.searchPromises.has(messageId)) {
|
||||
console.error(`Search timed out for messageId: ${messageId}`);
|
||||
currentParams.reject(new Error(`Search timed out after ${searchTimeout}ms`));
|
||||
this.searchPromises.delete(messageId);
|
||||
}
|
||||
if (this.searchPromises.has(messageId)) {
|
||||
console.error(`Search timed out for messageId: ${messageId}`);
|
||||
currentParams.reject(
|
||||
new Error(`Search timed out after ${searchTimeout}ms`),
|
||||
);
|
||||
this.searchPromises.delete(messageId);
|
||||
}
|
||||
}, searchTimeout);
|
||||
|
||||
this.searchPromises.set(messageId, {
|
||||
resolve: currentParams.resolve,
|
||||
reject: currentParams.reject,
|
||||
timer: searchTimer,
|
||||
});
|
||||
|
||||
this.searchPromises.set(messageId, { resolve: currentParams.resolve, reject: currentParams.reject, timer: searchTimer });
|
||||
|
||||
console.debug(`Sending search request (ID: ${messageId}) to worker: "${currentParams.query}"`);
|
||||
console.debug(
|
||||
`Sending search request (ID: ${messageId}) to worker: "${currentParams.query}"`,
|
||||
);
|
||||
console.log(this.worker);
|
||||
this.worker.postMessage({
|
||||
type: "search",
|
||||
data: { query: currentParams.query, topK: currentParams.topK },
|
||||
messageId
|
||||
messageId,
|
||||
});
|
||||
} else if (this.lastSearchParams) {
|
||||
// This case might happen if ensureReady failed but didn't throw
|
||||
console.error("Worker unavailable when trying to send search request.");
|
||||
this.lastSearchParams.reject(new Error("Worker unavailable for search"));
|
||||
this.lastSearchParams = null;
|
||||
this.debounceTimer = null;
|
||||
// This case might happen if ensureReady failed but didn't throw
|
||||
console.error("Worker unavailable when trying to send search request.");
|
||||
this.lastSearchParams.reject(
|
||||
new Error("Worker unavailable for search"),
|
||||
);
|
||||
this.lastSearchParams = null;
|
||||
this.debounceTimer = null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Method to cancel all pending/debounced searches
|
||||
private cancelAllSearches(reason: string = "Cancelled") {
|
||||
if (this.debounceTimer) {
|
||||
clearTimeout(this.debounceTimer);
|
||||
this.debounceTimer = null;
|
||||
if (this.lastSearchParams) {
|
||||
this.lastSearchParams.reject(new Error(`Search cancelled: ${reason}`));
|
||||
this.lastSearchParams = null;
|
||||
}
|
||||
}
|
||||
// We might also want to tell the worker to cancel its *current* search
|
||||
// if it supports it, but this requires worker modification.
|
||||
// For now, just reject pending promises in the manager.
|
||||
for (const [messageId, promiseInfo] of this.searchPromises.entries()) {
|
||||
clearTimeout(promiseInfo.timer);
|
||||
promiseInfo.reject(new Error(`Search cancelled: ${reason}`));
|
||||
this.searchPromises.delete(messageId);
|
||||
}
|
||||
if (this.debounceTimer) {
|
||||
clearTimeout(this.debounceTimer);
|
||||
this.debounceTimer = null;
|
||||
if (this.lastSearchParams) {
|
||||
this.lastSearchParams.reject(new Error(`Search cancelled: ${reason}`));
|
||||
this.lastSearchParams = null;
|
||||
}
|
||||
}
|
||||
// We might also want to tell the worker to cancel its *current* search
|
||||
// if it supports it, but this requires worker modification.
|
||||
// For now, just reject pending promises in the manager.
|
||||
for (const [messageId, promiseInfo] of this.searchPromises.entries()) {
|
||||
clearTimeout(promiseInfo.timer);
|
||||
promiseInfo.reject(new Error(`Search cancelled: ${reason}`));
|
||||
this.searchPromises.delete(messageId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
terminate() {
|
||||
console.debug("Terminating Vector Worker Manager...");
|
||||
this.cancelAllSearches("Worker terminated"); // Cancel pending searches
|
||||
@@ -229,4 +265,4 @@ export class VectorWorkerManager {
|
||||
// Clear the static instance? Or assume app lifecycle handles this?
|
||||
// VectorWorkerManager.instance = null; // Uncomment if needed
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { EmbeddingIndex, getEmbedding, initializeModel } from 'embeddia';
|
||||
import type { HydratedIndexItem } from '../../indexing/types';
|
||||
import type { SearchResult } from 'embeddia';
|
||||
import { EmbeddingIndex, getEmbedding, initializeModel } from "embeddia";
|
||||
import type { HydratedIndexItem } from "../../indexing/types";
|
||||
import type { SearchResult } from "embeddia";
|
||||
|
||||
let vectorIndex: EmbeddingIndex | null = null;
|
||||
|
||||
@@ -10,7 +10,7 @@ export async function initVectorSearch() {
|
||||
vectorIndex = new EmbeddingIndex([]);
|
||||
vectorIndex.preloadIndexedDB();
|
||||
} catch (e) {
|
||||
console.error('Error initializing vector search', e);
|
||||
console.error("Error initializing vector search", e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,17 +18,20 @@ export interface VectorSearchResult extends SearchResult {
|
||||
object: HydratedIndexItem & { embedding: number[] };
|
||||
}
|
||||
|
||||
export async function searchVectors(query: string, topK: number = 10): Promise<VectorSearchResult[]> {
|
||||
export async function searchVectors(
|
||||
query: string,
|
||||
topK: number = 10,
|
||||
): Promise<VectorSearchResult[]> {
|
||||
if (!vectorIndex) await initVectorSearch();
|
||||
|
||||
|
||||
const queryEmbedding = await getEmbedding(query.slice(0, 100));
|
||||
|
||||
const results = await vectorIndex!.search(queryEmbedding, {
|
||||
const results = await vectorIndex!.search(queryEmbedding, {
|
||||
topK,
|
||||
useStorage: 'indexedDB',
|
||||
dedupeEntries: true
|
||||
useStorage: "indexedDB",
|
||||
dedupeEntries: true,
|
||||
});
|
||||
|
||||
|
||||
return results as VectorSearchResult[];
|
||||
}
|
||||
|
||||
@@ -36,4 +39,4 @@ export async function refreshVectorCache() {
|
||||
if (!vectorIndex) await initVectorSearch();
|
||||
vectorIndex!.clearIndexedDBCache();
|
||||
vectorIndex!.preloadIndexedDB();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,4 +4,3 @@ import type { HydratedIndexItem } from "../../indexing/types";
|
||||
export interface VectorSearchResult extends SearchResult {
|
||||
object: HydratedIndexItem & { embedding: number[] };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user