update search to hopefully index correctly this time

This commit is contained in:
StroepWafel
2026-01-22 18:26:45 +10:30
parent 9a002d18b0
commit 9b52bae404
11 changed files with 811 additions and 150 deletions
+60 -23
View File
@@ -42,32 +42,69 @@ const settings = defineSettings({
if (confirmed) {
try {
// Dynamically import the worker manager to avoid loading heavy dependencies
// Dynamically import modules to avoid loading heavy dependencies
const { VectorWorkerManager } = await import("./src/indexing/worker/vectorWorkerManager");
const workerManager = VectorWorkerManager.getInstance();
await workerManager.resetWorker();
console.log("Vector worker reset successfully");
} catch (e) {
console.warn("Failed to reset vector worker:", e);
}
const { resetDatabase } = await import("./src/indexing/db");
// Delete both 'embeddiaDB' and 'betterseqta-index' using native IndexedDB APIs
const deleteDb = (dbName: string) => {
return new Promise<void>((resolve, reject) => {
const req = indexedDB.deleteDatabase(dbName);
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
req.onblocked = () => {
reject(new Error(`One database is open, failed to remove: ${dbName}`));
};
});
};
try {
await deleteDb("embeddiaDB");
await deleteDb("betterseqta-index");
alert("Search index and storage have been reset.");
// Reset vector worker first
try {
const workerManager = VectorWorkerManager.getInstance();
await workerManager.resetWorker();
console.log("Vector worker reset successfully");
} catch (e) {
console.warn("Failed to reset vector worker:", e);
}
// Close all database connections properly before deletion
try {
await resetDatabase();
console.log("betterseqta-index database closed and reset");
} catch (e) {
console.warn("Failed to reset betterseqta-index database:", e);
}
// Wait a bit for connections to fully close
await new Promise(resolve => setTimeout(resolve, 100));
// Delete embeddiaDB (vector search database)
const deleteDb = (dbName: string) => {
return new Promise<void>((resolve, reject) => {
const req = indexedDB.deleteDatabase(dbName);
req.onsuccess = () => {
console.log(`Successfully deleted database: ${dbName}`);
resolve();
};
req.onerror = () => {
console.error(`Error deleting database ${dbName}:`, req.error);
reject(req.error);
};
req.onblocked = () => {
console.warn(`Database ${dbName} deletion blocked - connections still open`);
// Wait and retry once
setTimeout(() => {
const retryReq = indexedDB.deleteDatabase(dbName);
retryReq.onsuccess = () => {
console.log(`Successfully deleted database on retry: ${dbName}`);
resolve();
};
retryReq.onerror = () => reject(retryReq.error);
retryReq.onblocked = () => {
reject(new Error(`One database is open, failed to remove: ${dbName}. Please close other tabs and try again.`));
};
}, 500);
};
});
};
try {
await deleteDb("embeddiaDB");
await deleteDb("betterseqta-index");
alert("Search index and storage have been reset successfully.");
} catch (e) {
alert("Failed to reset one or more databases: " + String(e) + "\n\nTry closing other browser tabs and try again.");
}
} catch (e) {
alert("Failed to reset one or more databases: " + String(e));
alert("Failed to reset index: " + String(e));
}
}
},
@@ -168,6 +168,9 @@
term,
commandsFuse,
commandIdToItemMap,
dynamicContentFuse,
dynamicIdToItemMap,
true, // sortByRecent
);
} else {
combinedResults = [];
@@ -50,31 +50,68 @@ const settings = defineSettings({
if (confirmed) {
try {
// Reset the vector worker first
const workerManager = VectorWorkerManager.getInstance();
await workerManager.resetWorker();
console.log("Vector worker reset successfully");
} catch (e) {
console.warn("Failed to reset vector worker:", e);
}
// Import resetDatabase function to properly close connections
const { resetDatabase } = await import("../indexing/db");
// Delete both 'embeddiaDB' and 'betterseqta-index' using native IndexedDB APIs
const deleteDb = (dbName: string) => {
return new Promise<void>((resolve, reject) => {
const req = indexedDB.deleteDatabase(dbName);
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
req.onblocked = () => {
reject(new Error(`One database is open, failed to remove: ${dbName}`));
};
});
};
try {
await deleteDb("embeddiaDB");
await deleteDb("betterseqta-index");
alert("Search index and storage have been reset.");
// Reset the vector worker first
try {
const workerManager = VectorWorkerManager.getInstance();
await workerManager.resetWorker();
console.log("Vector worker reset successfully");
} catch (e) {
console.warn("Failed to reset vector worker:", e);
}
// Close all database connections properly before deletion
try {
await resetDatabase();
console.log("betterseqta-index database closed and reset");
} catch (e) {
console.warn("Failed to reset betterseqta-index database:", e);
}
// Wait a bit for connections to fully close
await new Promise(resolve => setTimeout(resolve, 100));
// Delete embeddiaDB (vector search database)
const deleteDb = (dbName: string) => {
return new Promise<void>((resolve, reject) => {
const req = indexedDB.deleteDatabase(dbName);
req.onsuccess = () => {
console.log(`Successfully deleted database: ${dbName}`);
resolve();
};
req.onerror = () => {
console.error(`Error deleting database ${dbName}:`, req.error);
reject(req.error);
};
req.onblocked = () => {
console.warn(`Database ${dbName} deletion blocked - connections still open`);
// Wait and retry once
setTimeout(() => {
const retryReq = indexedDB.deleteDatabase(dbName);
retryReq.onsuccess = () => {
console.log(`Successfully deleted database on retry: ${dbName}`);
resolve();
};
retryReq.onerror = () => reject(retryReq.error);
retryReq.onblocked = () => {
reject(new Error(`One database is open, failed to remove: ${dbName}. Please close other tabs and try again.`));
};
}, 500);
};
});
};
try {
await deleteDb("embeddiaDB");
await deleteDb("betterseqta-index");
alert("Search index and storage have been reset successfully.");
} catch (e) {
alert("Failed to reset one or more databases: " + String(e) + "\n\nTry closing other browser tabs and try again.");
}
} catch (e) {
alert("Failed to reset one or more databases: " + String(e));
alert("Failed to reset index: " + String(e));
}
}
},
@@ -59,69 +59,150 @@ export const actionMap: Record<string, ActionHandler<any>> = {
}) as ActionHandler<any>,
assessment: (async (item: IndexItem & { metadata: AssessmentMetadata }) => {
console.debug("[Assessment Action] Navigating to assessment:", item.id, item.metadata);
// Deep clone the entire item to avoid Firefox XrayWrapper issues
// Firefox XrayWrapper prevents direct access to nested properties
let itemClone: IndexItem & { metadata: AssessmentMetadata };
let metadata: AssessmentMetadata;
if (item.metadata?.isMessageBased) {
try {
// First try to clone the entire item
itemClone = JSON.parse(JSON.stringify(item));
metadata = itemClone.metadata || {};
} catch (e) {
console.warn("[Assessment Action] Failed to clone item, trying to clone metadata separately:", e);
try {
// If full clone fails, try cloning just metadata
metadata = JSON.parse(JSON.stringify(item.metadata || {}));
itemClone = { ...item, metadata };
} catch (e2) {
console.warn("[Assessment Action] Failed to clone metadata, using direct access:", e2);
itemClone = item;
metadata = item.metadata || {} as AssessmentMetadata;
}
}
// Try to extract metadata values using multiple methods to handle XrayWrapper
const getMetadataValue = (key: string, altKey?: string): any => {
try {
// Try direct access first
const value = metadata[key];
if (value !== undefined && value !== null) {
return value;
}
if (altKey) {
const altValue = metadata[altKey];
if (altValue !== undefined && altValue !== null) {
return altValue;
}
}
// Try accessing via Object.keys iteration (works around XrayWrapper)
try {
const keys = Object.keys(metadata);
for (const k of keys) {
if (k === key || k === altKey) {
const val = metadata[k];
if (val !== undefined && val !== null) {
return val;
}
}
}
} catch (e) {
// Object.keys might fail on XrayWrapper, that's okay
}
return undefined;
} catch (e) {
console.warn(`[Assessment Action] Failed to access metadata.${key}:`, e);
return undefined;
}
};
// Log everything for debugging
console.log("[Assessment Action] Item ID:", itemClone.id);
try {
console.log("[Assessment Action] Metadata keys:", Object.keys(metadata));
console.log("[Assessment Action] Full metadata (stringified):", JSON.stringify(metadata, null, 2));
} catch (e) {
console.warn("[Assessment Action] Could not stringify metadata:", e);
console.log("[Assessment Action] Metadata (direct):", metadata);
}
if (getMetadataValue('isMessageBased')) {
window.location.hash = `#?page=/messages`;
await waitForElm('[class*="Viewer__Viewer___"] > div', true, 20);
// Select the specific direct message
ReactFiber.find('[class*="Viewer__Viewer___"] > div').setState({
selected: new Set([item.metadata.messageId]),
selected: new Set([getMetadataValue('messageId')]),
});
} else {
// Use the correct URL format: /assessments/{programmeId}:{metaclassId}&item={assessmentId}
// Convert to numbers to handle string/number inconsistencies
let programmeId = item.metadata?.programmeId;
let metaclassId = item.metadata?.metaclassId;
let assessmentId = item.metadata?.assessmentId;
// Extract values - check both camelCase and PascalCase, and try multiple access methods
let programmeId = getMetadataValue('programmeId', 'programmeID');
let metaclassId = getMetadataValue('metaclassId', 'metaclassID');
let assessmentId = getMetadataValue('assessmentId', 'assessmentID');
// Fallback: try to extract assessmentId from item ID if metadata is missing
if (!assessmentId && item.id && item.id.startsWith('assignment-')) {
const extractedId = item.id.replace('assignment-', '');
if ((assessmentId === undefined || assessmentId === null) && itemClone.id && itemClone.id.startsWith('assignment-')) {
const extractedId = itemClone.id.replace('assignment-', '');
assessmentId = Number(extractedId) || extractedId;
console.debug("[Assessment Action] Extracted assessmentId from item ID:", assessmentId);
console.log("[Assessment Action] Extracted assessmentId from item ID:", assessmentId);
}
// Convert to numbers for consistency
programmeId = Number(programmeId) || programmeId;
metaclassId = Number(metaclassId) || metaclassId;
assessmentId = Number(assessmentId) || assessmentId;
// Convert to numbers, but preserve 0 as valid
if (programmeId !== undefined && programmeId !== null && programmeId !== '') {
const num = Number(programmeId);
programmeId = isNaN(num) ? programmeId : num;
}
if (metaclassId !== undefined && metaclassId !== null && metaclassId !== '') {
const num = Number(metaclassId);
metaclassId = isNaN(num) ? metaclassId : num;
}
if (assessmentId !== undefined && assessmentId !== null && assessmentId !== '') {
const num = Number(assessmentId);
assessmentId = isNaN(num) ? assessmentId : num;
}
// Check if values exist (including 0, which is a valid ID)
const hasProgrammeId = programmeId !== undefined && programmeId !== null && programmeId !== '';
const hasMetaclassId = metaclassId !== undefined && metaclassId !== null && metaclassId !== '';
const hasAssessmentId = assessmentId !== undefined && assessmentId !== null && assessmentId !== '';
// Use typeof check to properly handle 0
const hasProgrammeId = programmeId !== undefined && programmeId !== null && programmeId !== '' && typeof programmeId === 'number';
const hasMetaclassId = metaclassId !== undefined && metaclassId !== null && metaclassId !== '' && typeof metaclassId === 'number';
const hasAssessmentId = assessmentId !== undefined && assessmentId !== null && assessmentId !== '' && typeof assessmentId === 'number';
console.log("[Assessment Action] Extracted values:", {
programmeId,
metaclassId,
assessmentId,
hasProgrammeId,
hasMetaclassId,
hasAssessmentId,
programmeIdType: typeof programmeId,
metaclassIdType: typeof metaclassId,
assessmentIdType: typeof assessmentId,
});
if (hasProgrammeId && hasMetaclassId && hasAssessmentId) {
const url = `#?page=/assessments/${programmeId}:${metaclassId}&item=${assessmentId}`;
console.debug("[Assessment Action] Navigating to:", url, {
programmeId,
metaclassId,
assessmentId,
rawMetadata: item.metadata,
});
console.log("[Assessment Action] Navigating to:", url);
window.location.hash = url;
} else {
// Fallback: try to navigate to assessments page if metadata is incomplete
console.warn("[Assessment Action] Missing required metadata:", {
console.error("[Assessment Action] Missing required metadata:", {
programmeId,
metaclassId,
assessmentId,
hasProgrammeId,
hasMetaclassId,
hasAssessmentId,
fullMetadata: item.metadata,
itemId: item.id,
itemKeys: Object.keys(item),
metadataKeys: Object.keys(metadata),
metadataString: JSON.stringify(metadata),
itemId: itemClone.id,
});
// If we at least have an assessmentId, try to navigate to the general assessments page
// The user can then find it manually
if (hasAssessmentId) {
console.info("[Assessment Action] Attempting to navigate to assessments page with item filter");
window.location.hash = `#?page=/assessments/upcoming&item=${assessmentId}`;
} else {
console.warn("[Assessment Action] No valid assessment ID, redirecting to upcoming");
window.location.hash = `#?page=/assessments/upcoming`;
}
}
@@ -213,25 +213,54 @@ export async function clear(store: string): Promise<void> {
}
export async function resetDatabase(): Promise<void> {
// Close cached database connection
if (cachedDb) {
cachedDb.close();
try {
cachedDb.close();
} catch (e) {
console.warn("[DB] Error closing cached database:", e);
}
cachedDb = null;
}
// Close pending database promise
if (dbPromise) {
try {
const db = await dbPromise;
db.close();
} catch (e) {}
} catch (e) {
// Database might not be open yet, that's okay
}
dbPromise = null;
}
// Wait a bit for connections to fully close
await new Promise(resolve => setTimeout(resolve, 100));
return new Promise((resolve, reject) => {
const req = indexedDB.deleteDatabase(DB_NAME);
req.onsuccess = () => {
localStorage.removeItem(VERSION_KEY);
resolve();
};
req.onerror = () => reject(req.error);
req.onerror = () => {
console.error("[DB] Error deleting database:", req.error);
reject(req.error);
};
req.onblocked = () => {
console.warn("[DB] Database deletion blocked - waiting for connections to close");
// Wait a bit longer and try again
setTimeout(() => {
const retryReq = indexedDB.deleteDatabase(DB_NAME);
retryReq.onsuccess = () => {
localStorage.removeItem(VERSION_KEY);
resolve();
};
retryReq.onerror = () => reject(retryReq.error);
retryReq.onblocked = () => {
reject(new Error(`Database is still open. Please close other tabs/windows and try again.`));
};
}, 500);
};
});
}
@@ -406,8 +406,17 @@ export async function runIndexing(): Promise<void> {
} else if (renderComponentMap[item.renderComponentId]) {
renderComponent = renderComponentMap[item.renderComponentId];
}
// Create a new object instead of modifying the existing one
return { ...item, renderComponent };
// Deep clone to avoid Firefox XrayWrapper issues with nested objects like metadata
// Use JSON serialization to ensure all nested properties are accessible
try {
const cloned = JSON.parse(JSON.stringify(item));
cloned.renderComponent = renderComponent;
return cloned;
} catch (e) {
// Fallback to shallow copy if deep clone fails
console.warn("[Indexer] Failed to deep clone item, using shallow copy:", e);
return { ...item, renderComponent };
}
} catch (error) {
// Fallback: return item as-is if modification fails (Firefox XrayWrapper)
console.warn("[Indexer] Failed to add render component to item (Firefox XrayWrapper):", error);
@@ -151,16 +151,38 @@ export const assignmentsJob: Job = {
// Fetch past assessments
const past = await fetchPastAssessments(student, subjects);
// Create a lookup map from subject code to programme/metaclass
const subjectLookup = new Map<string, { programme: number; metaclass: number }>();
subjects.forEach((s: any) => {
if (s.code && s.programme && s.metaclass) {
subjectLookup.set(s.code, { programme: s.programme, metaclass: s.metaclass });
}
});
// Combine and deduplicate
const allAssessments = new Map<number, any>();
upcoming.forEach((a: any) => {
if (a && a.id) {
// Normalize field names - handle both programme/programmeID and metaclass/metaclassID
// Prioritize capital ID fields (programmeID, metaclassID) as that's what the API returns
let programme = a.programmeID || a.programme;
let metaclass = a.metaclassID || a.metaclass;
// If missing, try to get from subject lookup
if ((!programme || !metaclass) && a.code) {
const subjectInfo = subjectLookup.get(a.code);
if (subjectInfo) {
programme = programme || subjectInfo.programme;
metaclass = metaclass || subjectInfo.metaclass;
}
}
allAssessments.set(a.id, {
...a,
programme: a.programme || a.programmeID,
metaclass: a.metaclass || a.metaclassID,
programme,
metaclass,
programmeID: programme, // Ensure both formats are available
metaclassID: metaclass,
isUpcoming: true,
});
}
@@ -168,11 +190,33 @@ export const assignmentsJob: Job = {
past.forEach((a: any) => {
if (a && a.id) {
// Prioritize capital ID fields (programmeID, metaclassID) as that's what the API returns
let programme = a.programmeID || a.programme;
let metaclass = a.metaclassID || a.metaclass;
const existing = allAssessments.get(a.id);
if (existing) {
Object.assign(existing, a);
// Merge past assessment data, ensuring programme/metaclass are preserved
// Use existing values if new ones are missing
programme = programme || existing.programme || existing.programmeID;
metaclass = metaclass || existing.metaclass || existing.metaclassID;
Object.assign(existing, {
...a,
programme,
metaclass,
programmeID: programme,
metaclassID: metaclass,
});
} else {
allAssessments.set(a.id, { ...a, isUpcoming: false });
allAssessments.set(a.id, {
...a,
programme,
metaclass,
programmeID: programme,
metaclassID: metaclass,
isUpcoming: false
});
}
}
});
@@ -182,6 +226,9 @@ export const assignmentsJob: Job = {
// Process assessments in batches to avoid overwhelming the API
const assessmentArray = Array.from(allAssessments.values());
const pastCount = assessmentArray.filter(a => !a.isUpcoming).length;
const upcomingCount = assessmentArray.filter(a => a.isUpcoming).length;
console.debug(`[Assignments job] Processing ${assessmentArray.length} total assessments (${upcomingCount} upcoming, ${pastCount} past)`);
const batchSize = 15; // Increased batch size for better performance
// Skip fetching assessment details - the API endpoint doesn't exist or returns 404
@@ -196,21 +243,25 @@ export const assignmentsJob: Job = {
batch.map(async (assessment) => {
const id = `assignment-${assessment.id}`;
if (existingIds.has(id) || processedIds.has(id)) {
// Skip if already processed in this batch
if (processedIds.has(id)) {
return null;
}
processedIds.add(id);
// Process all assessments (both new and existing) to ensure metadata is up-to-date
// The indexer's merge logic will handle updates properly
// Skip fetching details - API endpoint doesn't exist
const description = "";
const subjectName = assessment.subject || assessment.code || "Unknown Subject";
const dueDate = assessment.due ? new Date(assessment.due).getTime() : null;
// Normalize programme and metaclass IDs - handle both camelCase and PascalCase
const programmeId = assessment.programme || assessment.programmeID;
const metaclassId = assessment.metaclass || assessment.metaclassID;
// Prioritize capital ID fields (programmeID, metaclassID) as that's what the API returns
const programmeId = assessment.programmeID || assessment.programme;
const metaclassId = assessment.metaclassID || assessment.metaclass;
// Validate that we have the required IDs for navigation
if (!programmeId || !metaclassId || !assessment.id) {
@@ -218,6 +269,37 @@ export const assignmentsJob: Job = {
programmeId,
metaclassId,
assessmentId: assessment.id,
programmeID: assessment.programmeID,
metaclassID: assessment.metaclassID,
programme: assessment.programme,
metaclass: assessment.metaclass,
assessment,
});
return null;
}
// Convert to numbers, preserving 0 as valid
let finalProgrammeId: number | undefined;
let finalMetaclassId: number | undefined;
if (programmeId !== undefined && programmeId !== null && programmeId !== '') {
const num = Number(programmeId);
finalProgrammeId = isNaN(num) ? undefined : num;
}
if (metaclassId !== undefined && metaclassId !== null && metaclassId !== '') {
const num = Number(metaclassId);
finalMetaclassId = isNaN(num) ? undefined : num;
}
// Final validation - check for actual numbers (including 0)
if (finalProgrammeId === undefined || finalMetaclassId === undefined || !assessment.id) {
console.error(`[Assignments job] ❌ Skipping assignment ${assessment.id} - invalid IDs after conversion:`, {
programmeId: finalProgrammeId,
metaclassId: finalMetaclassId,
assessmentId: assessment.id,
rawProgrammeId: programmeId,
rawMetaclassId: metaclassId,
assessment,
});
return null;
@@ -231,11 +313,14 @@ export const assignmentsJob: Job = {
dateAdded: dueDate || Date.now(),
metadata: {
assessmentId: assessment.id,
assessmentID: assessment.id, // Store both variants for compatibility
subject: subjectName,
subjectCode: assessment.code,
dueDate: assessment.due,
programmeId: Number(programmeId) || programmeId, // Ensure it's a number
metaclassId: Number(metaclassId) || metaclassId, // Ensure it's a number
programmeId: finalProgrammeId,
programmeID: finalProgrammeId, // Store both variants for compatibility
metaclassId: finalMetaclassId,
metaclassID: finalMetaclassId, // Store both variants for compatibility
submitted: assessment.submitted || false,
isUpcoming: assessment.isUpcoming || false,
term: assessment.term,
@@ -245,6 +330,16 @@ export const assignmentsJob: Job = {
renderComponentId: "assessment",
};
console.debug(`[Assignments job] ✅ Created item for assignment ${assessment.id}:`, {
id: item.id,
programmeId: item.metadata.programmeId,
programmeID: item.metadata.programmeID,
metaclassId: item.metadata.metaclassId,
metaclassID: item.metadata.metaclassID,
assessmentId: item.metadata.assessmentId,
assessmentID: item.metadata.assessmentID,
});
return item;
})
);
@@ -262,7 +357,9 @@ export const assignmentsJob: Job = {
}
}
console.debug(`[Assignments job] Indexed ${items.length} assignment items`);
const newItemsCount = items.filter(item => !existingIds.has(item.id)).length;
const updatedItemsCount = items.length - newItemsCount;
console.debug(`[Assignments job] Indexed ${items.length} assignment items (${newItemsCount} new, ${updatedItemsCount} updated)`);
return items;
},
@@ -617,8 +617,15 @@ export const messagesJob: Job = {
} else if (renderComponentMap[item.renderComponentId]) {
renderComponent = renderComponentMap[item.renderComponentId];
}
// Create a new object instead of modifying the existing one
return { ...item, renderComponent };
// Deep clone to avoid Firefox XrayWrapper issues with nested objects like metadata
try {
const cloned = JSON.parse(JSON.stringify(item));
cloned.renderComponent = renderComponent;
return cloned;
} catch (e) {
// Fallback to shallow copy if deep clone fails
return { ...item, renderComponent };
}
} catch (error) {
// Fallback: return item as-is if modification fails (Firefox XrayWrapper)
return item;
@@ -385,8 +385,15 @@ export const notificationsJob: Job = {
} else if (renderComponentMap[item.renderComponentId]) {
renderComponent = renderComponentMap[item.renderComponentId];
}
// Create a new object instead of modifying the existing one
return { ...item, renderComponent };
// Deep clone to avoid Firefox XrayWrapper issues with nested objects like metadata
try {
const cloned = JSON.parse(JSON.stringify(item));
cloned.renderComponent = renderComponent;
return cloned;
} catch (e) {
// Fallback to shallow copy if deep clone fails
return { ...item, renderComponent };
}
} catch (error) {
// Fallback: return item as-is if modification fails (Firefox XrayWrapper)
return item;
@@ -0,0 +1,280 @@
import type { IndexItem } from "../indexing/types";
import type { CombinedResult } from "../core/types";
import { searchVectors, type VectorSearchResult } from "./vector/vectorSearch";
import { jobs } from "../indexing/jobs";
/**
* Hybrid Search Implementation
*
* Flow:
* 1. BM25 (Fuse.js) gets top N results fast
* 2. Vector search reranks by semantic similarity
* 3. Apply optional boosting (recency, popularity, tags)
*/
export interface HybridSearchOptions {
/** Maximum number of BM25 results to retrieve before reranking */
bm25TopK?: number;
/** Maximum number of final results to return */
finalLimit?: number;
/** Whether to apply recency boost */
recencyBoost?: boolean;
/** Weight for BM25 scores (0-1) */
bm25Weight?: number;
/** Weight for vector similarity scores (0-1) */
vectorWeight?: number;
/** Weight for recency boost */
recencyWeight?: number;
}
const DEFAULT_OPTIONS: Required<HybridSearchOptions> = {
bm25TopK: 50, // Get top 50 from BM25, then rerank
finalLimit: 10,
recencyBoost: true,
bm25Weight: 0.4, // 40% BM25, 60% vector
vectorWeight: 0.6,
recencyWeight: 0.1,
};
/**
* Normalizes a score to 0-1 range
*/
function normalizeScore(score: number, min: number, max: number): number {
if (max === min) return 0.5;
return Math.max(0, Math.min(1, (score - min) / (max - min)));
}
/**
* Calculates recency boost based on item age
*/
function calculateRecencyBoost(item: IndexItem, now: number): number {
const ageInDays = (now - item.dateAdded) / (1000 * 60 * 60 * 24);
// Exponential decay: newer items get higher boost
// Items from today get boost of 1, items from 30 days ago get ~0.03
return 1 / (1 + ageInDays / 7); // Half-life of 7 days
}
/**
* Calculates popularity boost (can be extended with click tracking, etc.)
*/
function calculatePopularityBoost(item: IndexItem): number {
// For now, boost based on category and metadata
let boost = 0;
// Boost assignments/assessments
if (item.category === "assignments") {
boost += 0.1;
}
// Boost upcoming items
if (item.metadata?.isUpcoming) {
boost += 0.15;
}
// Boost items with subject codes (more structured)
if (item.metadata?.subjectCode) {
boost += 0.05;
}
return Math.min(boost, 0.3); // Cap at 0.3
}
/**
* Reranks BM25 results using vector search
*/
export async function hybridSearch(
bm25Results: CombinedResult[],
query: string,
options: HybridSearchOptions = {},
): Promise<CombinedResult[]> {
const opts = { ...DEFAULT_OPTIONS, ...options };
const trimmedQuery = query.trim().toLowerCase();
// If no BM25 results, return empty
if (bm25Results.length === 0) {
return [];
}
// Limit BM25 results to top K
const topBm25Results = bm25Results.slice(0, opts.bm25TopK);
// Get vector search results for reranking
// We'll search the full index and then filter to our BM25 results
let vectorResults: VectorSearchResult[] = [];
if (trimmedQuery.length > 2) {
try {
// Get more vector results than BM25 results to ensure coverage
// This allows us to find semantic matches that BM25 might have missed
const vectorSearchResults = await searchVectors(trimmedQuery, opts.bm25TopK * 2);
// Create a map of item ID to vector similarity
const vectorMap = new Map<string, number>();
vectorSearchResults.forEach(v => {
// Use the highest similarity if item appears multiple times
const existing = vectorMap.get(v.object.id);
if (!existing || v.similarity > existing) {
vectorMap.set(v.object.id, v.similarity);
}
});
// Now rerank BM25 results with vector scores
const now = Date.now();
const rerankedResults = topBm25Results.map(result => {
const item = result.item;
// Normalize BM25 score to 0-1
// Fuse.js scores: lower is better (0 = perfect match)
// We need to invert: higher score = better match
// Result.score is typically 0-100, where higher = better
// So we normalize it to 0-1
const normalizedBm25Score = Math.max(0, Math.min(1, result.score / 100));
// Get vector similarity (0-1, already normalized)
// If item wasn't in vector results, use a default low score
const vectorSimilarity = vectorMap.get(item.id) || 0.3; // Default to 0.3 if not found
// Calculate recency boost (0-1 range)
const recencyBoost = opts.recencyBoost
? calculateRecencyBoost(item, now) * opts.recencyWeight
: 0;
// Calculate popularity boost (0-1 range)
const popularityBoost = calculatePopularityBoost(item);
// Apply job-specific boost if available
const job = jobs[item.category];
let jobBoost = 0;
if (job && typeof job.boostCriteria === 'function') {
const boost = job.boostCriteria(item, trimmedQuery);
if (boost) {
jobBoost = boost / 100; // Normalize boost to 0-1
}
}
// Combine scores using weighted average
// BM25 and vector are weighted, boosts are additive
const hybridScore =
(normalizedBm25Score * opts.bm25Weight) +
(vectorSimilarity * opts.vectorWeight) +
recencyBoost +
popularityBoost +
jobBoost;
return {
...result,
score: hybridScore * 100, // Scale back to 0-100 for consistency
// Store component scores for debugging (optional, can be removed in production)
_hybridScores: {
bm25: normalizedBm25Score,
vector: vectorSimilarity,
recency: recencyBoost,
popularity: popularityBoost,
jobBoost: jobBoost,
final: hybridScore,
},
};
});
// Sort by hybrid score descending
rerankedResults.sort((a, b) => b.score - a.score);
// Return top results
return rerankedResults.slice(0, opts.finalLimit);
} catch (e) {
console.warn("[Hybrid Search] Vector reranking failed, using BM25 only:", e);
// Fallback to BM25 only
return topBm25Results.slice(0, opts.finalLimit);
}
}
// If query is too short for vector search, just return BM25 results
return topBm25Results.slice(0, opts.finalLimit);
}
/**
* Enhanced hybrid search that also includes vector-only results not found by BM25
*/
export async function hybridSearchWithExpansion(
bm25Results: CombinedResult[],
query: string,
allItems: IndexItem[],
options: HybridSearchOptions = {},
): Promise<CombinedResult[]> {
const opts = { ...DEFAULT_OPTIONS, ...options };
const trimmedQuery = query.trim().toLowerCase();
// First, rerank BM25 results
const rerankedBm25 = await hybridSearch(bm25Results, query, options);
// If query is too short, skip vector expansion
if (trimmedQuery.length <= 2) {
return rerankedBm25;
}
// Get vector search results
let vectorResults: VectorSearchResult[] = [];
try {
vectorResults = await searchVectors(trimmedQuery, opts.bm25TopK);
} catch (e) {
console.warn("[Hybrid Search] Vector search failed:", e);
return rerankedBm25;
}
// Find vector results that weren't in BM25 results
const bm25Ids = new Set(bm25Results.map(r => r.item.id));
const vectorOnlyResults: CombinedResult[] = [];
const now = Date.now();
vectorResults.forEach(v => {
if (!bm25Ids.has(v.object.id)) {
// This is a semantic match that BM25 missed
const item = v.object;
// Calculate boosts
const recencyBoost = opts.recencyBoost
? calculateRecencyBoost(item, now) * opts.recencyWeight
: 0;
const popularityBoost = calculatePopularityBoost(item);
// Vector-only results get lower base score but high vector similarity
const vectorScore = v.similarity * opts.vectorWeight + recencyBoost + popularityBoost;
// Apply job-specific boost if available
const job = jobs[item.category];
let jobBoost = 0;
if (job && typeof job.boostCriteria === 'function') {
const boost = job.boostCriteria(item, trimmedQuery);
if (boost) {
jobBoost = boost / 100; // Normalize boost
}
}
vectorOnlyResults.push({
id: item.id,
type: "dynamic" as const,
score: (vectorScore + jobBoost) * 100,
item,
_hybridScores: {
bm25: 0,
vector: v.similarity,
recency: recencyBoost,
popularity: popularityBoost,
final: vectorScore + jobBoost,
},
});
}
});
// Combine reranked BM25 results with vector-only results
const allResults = [...rerankedBm25, ...vectorOnlyResults];
// Sort by score and return top results
allResults.sort((a, b) => b.score - a.score);
return allResults.slice(0, opts.finalLimit);
}
@@ -6,6 +6,7 @@ import type { IndexItem } from "../indexing/types";
import { searchVectors } from "./vector/vectorSearch";
import type { VectorSearchResult } from "./vector/vectorTypes";
import { jobs } from "../indexing/jobs";
import { hybridSearchWithExpansion } from "./hybridSearch";
// Search result cache for better performance
const searchCache = new Map<string, { results: CombinedResult[]; timestamp: number }>();
@@ -56,12 +57,12 @@ export function createSearchIndexes() {
],
includeScore: true,
includeMatches: true,
threshold: 0.35, // Slightly more permissive
minMatchCharLength: 2,
distance: 50, // Reduced from 100 for better performance
threshold: 0.5, // More permissive for better partial word matching (increased from 0.4)
minMatchCharLength: 2, // Minimum 2 characters for Fuse.js matches (substring fallback handles shorter queries)
distance: 100, // Increased to allow matches across longer strings
useExtendedSearch: true,
ignoreLocation: false,
findAllMatches: false, // Performance optimization
ignoreLocation: true, // Allow matches anywhere in the string for better partial word matching
findAllMatches: true, // Enable to find all matches for better partial word support
shouldSort: true,
};
@@ -136,10 +137,40 @@ export function searchDynamicItems(
}
const now = Date.now();
// Increase limit for better results, then trim later
const queryLower = query.toLowerCase();
const queryTrimmed = query.trim();
// For short queries (3 chars or less), use a more permissive approach
const isShortQuery = queryTrimmed.length <= 3;
const searchLimit = Math.min(limit * 3, 50);
// First, try Fuse.js search
const searchResults = dynamicContentFuse.search(query, { limit: searchLimit });
// For short queries, always do a simple substring match to supplement Fuse.js results
// This ensures we catch partial word matches like "SAT" in "SAT 1: Differential Calculus"
let additionalMatches: IndexItem[] = [];
if (isShortQuery) {
// Always do substring search for short queries to catch partial word matches
for (const item of dynamicIdToItemMap.values()) {
const textLower = item.text.toLowerCase();
const contentLower = (item.content || '').toLowerCase();
const subjectNameLower = (item.metadata?.subjectName || '').toLowerCase();
const subjectCodeLower = (item.metadata?.subjectCode || '').toLowerCase();
// Check if query appears anywhere in the text, content, or metadata
if (textLower.includes(queryLower) ||
contentLower.includes(queryLower) ||
subjectNameLower.includes(queryLower) ||
subjectCodeLower.includes(queryLower)) {
// Only add if not already in Fuse.js results
if (!searchResults.find(r => r.item.id === item.id)) {
additionalMatches.push(item);
}
}
}
}
const results = searchResults.map((result: FuseResult<IndexItem>) => {
const item = result.item;
const fuseScore = 10 * (1 - (result.score || 0.5));
@@ -151,13 +182,16 @@ export function searchDynamicItems(
const recencyBoost = sortByRecent ? 1 / (ageInDays + 1) : 0;
score += recencyBoost;
// Boost for exact text matches
if (item.text.toLowerCase().includes(query.toLowerCase())) {
score += 2;
// Boost for exact text matches (especially at the start)
const textLower = item.text.toLowerCase();
if (textLower.startsWith(queryLower)) {
score += 5; // Strong boost for prefix matches
} else if (textLower.includes(queryLower)) {
score += 2; // Boost for substring matches
}
// Boost for category matches
if (item.category.toLowerCase().includes(query.toLowerCase())) {
if (item.category.toLowerCase().includes(queryLower)) {
score += 1;
}
@@ -170,6 +204,32 @@ export function searchDynamicItems(
};
});
// Add additional matches from simple substring search
additionalMatches.forEach((item) => {
// Check if already in results
if (!results.find(r => r.id === item.id)) {
const textLower = item.text.toLowerCase();
let score = 5; // Base score for substring matches
// Boost for prefix matches
if (textLower.startsWith(queryLower)) {
score += 5;
}
// Recency boost
const ageInDays = (now - item.dateAdded) / (1000 * 60 * 60 * 24);
const recencyBoost = sortByRecent ? 1 / (ageInDays + 1) : 0;
score += recencyBoost;
results.push({
id: item.id,
type: "dynamic" as const,
score,
item,
});
}
});
// Sort by score and return top results
return results.sort((a, b) => b.score - a.score).slice(0, limit);
}
@@ -178,6 +238,9 @@ export async function performSearch(
query: string,
commandsFuse: Fuse<StaticCommandItem>,
commandIdToItemMap: Map<string, StaticCommandItem>,
dynamicContentFuse?: Fuse<IndexItem>,
dynamicIdToItemMap?: Map<string, IndexItem>,
sortByRecent: boolean = true,
): Promise<CombinedResult[]> {
const trimmedQuery = query.trim().toLowerCase();
@@ -189,64 +252,75 @@ export async function performSearch(
}
}
// Get all results first
// Step 1: Get command results (these don't need hybrid search)
const commandResults = searchCommands(
commandsFuse,
trimmedQuery,
commandIdToItemMap,
);
// Get vector results in parallel (only for queries longer than 3 chars for performance)
let vectorResults: VectorSearchResult[] = [];
if (trimmedQuery.length > 3) {
try {
vectorResults = await searchVectors(trimmedQuery, 15); // Reduced from 20 for performance
} catch (e) {
console.warn("[Search] Vector search failed:", e);
// Step 2: Get BM25 results for dynamic items
let dynamicResults: CombinedResult[] = [];
if (dynamicContentFuse && dynamicIdToItemMap) {
// Get BM25 results first (fast text-based search)
const bm25Results = searchDynamicItems(
dynamicContentFuse,
trimmedQuery,
dynamicIdToItemMap,
50, // Get top 50 for reranking
sortByRecent,
);
// Step 3: Apply hybrid search (BM25 + Vector reranking + boosting)
if (trimmedQuery.length > 2 && bm25Results.length > 0) {
try {
// Get all items for expansion
const allItems = Array.from(dynamicIdToItemMap.values());
// Apply hybrid search with expansion
dynamicResults = await hybridSearchWithExpansion(
bm25Results,
trimmedQuery,
allItems,
{
bm25TopK: 50,
finalLimit: 20, // Return top 20 after reranking
recencyBoost: sortByRecent,
bm25Weight: 0.4, // 40% BM25, 60% vector
vectorWeight: 0.6,
recencyWeight: 0.1,
},
);
} catch (e) {
console.warn("[Search] Hybrid search failed, using BM25 only:", e);
// Fallback to BM25 only
dynamicResults = bm25Results.slice(0, 20);
}
} else {
// For very short queries or no BM25 results, use BM25 only
dynamicResults = bm25Results.slice(0, 20);
}
}
// Create a map to store our final results, using ID as key to avoid duplicates
const resultMap = new Map<string, CombinedResult>();
// Step 4: Combine command and dynamic results
const allResults = [...commandResults, ...dynamicResults];
// Add command results first (they keep their original scores)
commandResults.forEach((r) => resultMap.set(r.id, r));
// Process vector results
const seenIds = new Set<string>();
commandResults.forEach((r) => seenIds.add(r.id));
vectorResults.forEach((v) => {
const id = v.object.id;
if (!seenIds.has(id)) {
// This is a semantic match that Fuse missed - add it with the vector similarity as score
let score = v.similarity * 0.5; // High base score for semantic matches
const job = jobs[v.object.category];
if (job && typeof job.boostCriteria === 'function') {
const boost = job.boostCriteria(v.object, trimmedQuery);
if (boost) {
score += boost;
}
}
resultMap.set(id, {
id,
type: "dynamic" as const,
score,
item: v.object,
});
seenIds.add(id);
// Sort by score (commands typically have higher priority)
allResults.sort((a, b) => {
// Commands always come first if scores are similar
if (a.type === "command" && b.type === "dynamic") {
return b.score - a.score - 10; // Commands get +10 boost
}
if (a.type === "dynamic" && b.type === "command") {
return b.score - a.score + 10; // Commands get +10 boost
}
return b.score - a.score;
});
// Convert to array and sort by score
const results = Array.from(resultMap.values());
results.sort((a, b) => b.score - a.score);
// Cache results for queries longer than 2 chars
if (trimmedQuery.length > 2) {
setCachedResults(trimmedQuery, results);
setCachedResults(trimmedQuery, allResults);
}
return results;
return allResults;
}