Feat: Updated global search

- Add shared SEQTA fetch layer, extract helpers, passive JSON capture, many new index jobs
- Schema version + shared reset; auto-reset on extension update; fix manual reset (no dynamic import)
- Stabilize hybrid search: lexical title scoring, stale-query guard, vector guardrails, drop route from Fuse keys
- Improve passive titles/routing; tighten message/people handling
- Fix assignment/course indexing (lenient envelopes, subjects mode:list, student id fallback)
- Top bar: indexing label + stacked progress bar under quick search trigger
This commit is contained in:
2026-04-30 17:09:34 +09:30
parent a875f35f1a
commit 710c03f463
32 changed files with 4100 additions and 398 deletions
+20 -64
View File
@@ -6,6 +6,7 @@ import {
hotkeySetting, hotkeySetting,
} from "../../core/settingsHelpers"; } from "../../core/settingsHelpers";
import styles from "./src/core/styles.css?inline"; import styles from "./src/core/styles.css?inline";
import { resetSearchIndexes } from "./src/indexing/resetIndexes";
// Platform-aware default hotkey // Platform-aware default hotkey
const getDefaultHotkey = () => { const getDefaultHotkey = () => {
@@ -34,78 +35,33 @@ const settings = defineSettings({
title: "Index on Page Load", title: "Index on Page Load",
description: "Run content indexing when SEQTA loads", description: "Run content indexing when SEQTA loads",
}), }),
passiveIndexing: booleanSetting({
default: true,
title: "Index Browsed Content",
description:
"Capture safe text from SEQTA pages you visit so they're searchable. Sensitive routes (settings, files, login) are always excluded.",
}),
resetIndex: buttonSetting({ resetIndex: buttonSetting({
title: "Reset Index", title: "Reset Index",
description: "Reset the search index and storage", description: "Reset the search index and storage",
trigger: async () => { trigger: async () => {
const confirmed = confirm("Are you sure you want to reset the search index and storage?"); const confirmed = confirm(
"Are you sure you want to reset the search index and storage?",
if (confirmed) { );
try { if (!confirmed) return;
// Dynamically import modules to avoid loading heavy dependencies
const { VectorWorkerManager } = await import("./src/indexing/worker/vectorWorkerManager");
const { resetDatabase } = await import("./src/indexing/db");
// Reset vector worker first
try {
const workerManager = VectorWorkerManager.getInstance();
await workerManager.resetWorker();
console.log("Vector worker reset successfully");
} catch (e) {
console.warn("Failed to reset vector worker:", e);
}
// Close all database connections properly before deletion
try {
await resetDatabase();
console.log("betterseqta-index database closed and reset");
} catch (e) {
console.warn("Failed to reset betterseqta-index database:", e);
}
// Wait a bit for connections to fully close
await new Promise(resolve => setTimeout(resolve, 100));
// Delete embeddiaDB (vector search database)
const deleteDb = (dbName: string) => {
return new Promise<void>((resolve, reject) => {
const req = indexedDB.deleteDatabase(dbName);
req.onsuccess = () => {
console.log(`Successfully deleted database: ${dbName}`);
resolve();
};
req.onerror = () => {
console.error(`Error deleting database ${dbName}:`, req.error);
reject(req.error);
};
req.onblocked = () => {
console.warn(`Database ${dbName} deletion blocked - connections still open`);
// Wait and retry once
setTimeout(() => {
const retryReq = indexedDB.deleteDatabase(dbName);
retryReq.onsuccess = () => {
console.log(`Successfully deleted database on retry: ${dbName}`);
resolve();
};
retryReq.onerror = () => reject(retryReq.error);
retryReq.onblocked = () => {
reject(new Error(`One database is open, failed to remove: ${dbName}. Please close other tabs and try again.`));
};
}, 500);
};
});
};
try { try {
await deleteDb("embeddiaDB"); // `resetSearchIndexes` is a tiny statically-imported helper: no
await deleteDb("betterseqta-index"); // dynamic chunks to chase, so the button keeps working even when
// the settings page has been open across an extension update.
await resetSearchIndexes();
alert("Search index and storage have been reset successfully."); alert("Search index and storage have been reset successfully.");
} catch (e) { } catch (e) {
alert("Failed to reset one or more databases: " + String(e) + "\n\nTry closing other browser tabs and try again."); alert(
} "Failed to reset index: " +
} catch (e) { String(e) +
alert("Failed to reset index: " + String(e)); "\n\nTry closing other browser tabs and try again.",
} );
} }
}, },
}), }),
@@ -48,6 +48,13 @@
let calculatorResult = $state<string | null>(null); let calculatorResult = $state<string | null>(null);
let resultsList = $state<HTMLUListElement>(); let resultsList = $state<HTMLUListElement>();
// Monotonic counter so a slow async search (vector reranking) cannot
// overwrite results from a newer keystroke. Without this guard, the user
// observes results "flickering" — e.g. typing `world w` finds the assessment
// but `world wa` triggers a new search whose vector pass returns later than
// the `world w` pass and clobbers the more relevant matches.
let searchRequestId = 0;
const updateCalculatorState = (hasResult: string | null) => { const updateCalculatorState = (hasResult: string | null) => {
calculatorResult = hasResult; calculatorResult = hasResult;
}; };
@@ -166,9 +173,10 @@
}); });
const term = searchTerm.trim().toLowerCase(); const term = searchTerm.trim().toLowerCase();
const requestId = ++searchRequestId;
if (commandsFuse && dynamicContentFuse) { if (commandsFuse && dynamicContentFuse) {
combinedResults = await doSearch( const results = await doSearch(
term, term,
commandsFuse, commandsFuse,
commandIdToItemMap, commandIdToItemMap,
@@ -176,7 +184,16 @@
dynamicIdToItemMap, dynamicIdToItemMap,
true, // sortByRecent true, // sortByRecent
); );
// Drop the result if the user has typed since this search started, or
// if the current term no longer matches what we searched for. This
// keeps the visible list anchored to the latest query.
if (requestId !== searchRequestId) return;
if (searchTerm.trim().toLowerCase() !== term) return;
combinedResults = results;
} else { } else {
if (requestId !== searchRequestId) return;
combinedResults = []; combinedResults = [];
} }
@@ -0,0 +1,89 @@
<script lang="ts">
import HighlightedText from '../../utils/HighlightedText.svelte';
import type { DynamicContentItem } from '../../utils/dynamicItems';
import type { FuseResultMatch } from '../../core/types';
const { item, isSelected, searchTerm, matches, onclick } = $props<{
item: DynamicContentItem;
isSelected: boolean;
searchTerm: string;
matches?: readonly FuseResultMatch[];
onclick: () => void;
}>();
const categoryLabel = (category: string): string => {
if (!category) return '';
return category.charAt(0).toUpperCase() + category.slice(1);
};
const gradientForCategory = (category: string): string => {
switch (category) {
case 'courses':
return 'from-[#7c5fe0] to-[#4d2bb8]';
case 'notices':
return 'from-[#f6c453] to-[#d39007]';
case 'documents':
return 'from-[#4FBBFE] to-[#2090F3]';
case 'folio':
return 'from-[#22c55e] to-[#0f9b3a]';
case 'portals':
return 'from-[#22d3ee] to-[#0e7490]';
case 'reports':
return 'from-[#f97316] to-[#c2410c]';
case 'goals':
return 'from-[#10b981] to-[#047857]';
case 'passive':
return 'from-[#6b7280] to-[#374151]';
default:
return 'from-[#4FBBFE] to-[#2090F3]';
}
};
const fallbackIcon = (category: string): string => {
switch (category) {
case 'courses':
return '\ueb4d';
case 'notices':
return '\ueb24';
case 'documents':
return '\ueb6f';
case 'folio':
return '\ueb16';
case 'portals':
return '\ueb01';
case 'reports':
return '\ueb70';
case 'goals':
return '\uea15';
case 'passive':
return '\ueb71';
default:
return '\ue924';
}
};
</script>
<button
class="w-full flex flex-col px-2 py-1.5 rounded-lg select-none cursor-pointer group transition-colors duration-100 ring-0 dark:ring-zinc-600/50
{isSelected ? 'bg-zinc-900/5 dark:bg-white/10 text-zinc-900 dark:text-white dark:ring-[1px] dark:shadow' : 'hover:bg-zinc-500/5 dark:hover:bg-white/5 text-zinc-800 dark:text-zinc-200'}"
onclick={onclick}
>
<div class="flex items-center w-full">
<div
class="flex-none scale-90 w-8 h-8 text-xl font-IconFamily flex items-center justify-center text-white rounded-md bg-gradient-to-br {gradientForCategory(item.category)}"
>
{item.metadata?.icon || fallbackIcon(item.category)}
</div>
<span class="ml-4 text-lg truncate">
<HighlightedText text={item.text} term={searchTerm} matches={matches} />
</span>
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
{item.metadata?.subjectCode || categoryLabel(item.category)}
</span>
</div>
{#if item.content}
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
<HighlightedText text={item.content} term={searchTerm} matches={matches} />
</div>
{/if}
</button>
@@ -1,6 +1,7 @@
import { settingsState } from "@/seqta/utils/listeners/SettingsState"; import { settingsState } from "@/seqta/utils/listeners/SettingsState";
import { loadHomePage } from "@/seqta/utils/Loaders/LoadHomePage"; import { loadHomePage } from "@/seqta/utils/Loaders/LoadHomePage";
import { waitForElm } from "@/seqta/utils/waitForElm"; import { waitForElm } from "@/seqta/utils/waitForElm";
import { getCurrentStudentId } from "../indexing/api";
export interface BaseCommandItem { export interface BaseCommandItem {
id: string; id: string;
@@ -23,13 +24,19 @@ async function getCurrentLesson() {
const todayFormatted = formatDate(date); const todayFormatted = formatDate(date);
try { try {
const student = await getCurrentStudentId();
if (typeof student !== "number") {
alert("Could not determine the active SEQTA student.");
return null;
}
const response = await fetch(`${location.origin}/seqta/student/load/timetable?`, { const response = await fetch(`${location.origin}/seqta/student/load/timetable?`, {
method: "POST", method: "POST",
credentials: "include",
headers: { "Content-Type": "application/json" }, headers: { "Content-Type": "application/json" },
body: JSON.stringify({ body: JSON.stringify({
from: todayFormatted, from: todayFormatted,
until: todayFormatted, until: todayFormatted,
student: 69, student,
}), }),
}); });
@@ -15,6 +15,10 @@ import { cleanupSearchBar, mountSearchBar } from "./mountSearchBar";
import { IndexedDbManager } from "embeddia"; import { IndexedDbManager } from "embeddia";
import { VectorWorkerManager } from "../indexing/worker/vectorWorkerManager"; import { VectorWorkerManager } from "../indexing/worker/vectorWorkerManager";
import { checkAndHandleUpdate } from "../utils/versionCheck"; import { checkAndHandleUpdate } from "../utils/versionCheck";
import {
getStoredPassiveItems,
installPassiveObserver,
} from "../indexing/passiveObserver";
// Platform-aware default hotkey // Platform-aware default hotkey
const getDefaultHotkey = () => { const getDefaultHotkey = () => {
@@ -43,6 +47,12 @@ const settings = defineSettings({
title: "Index on Page Load", title: "Index on Page Load",
description: "Run content indexing when SEQTA loads", description: "Run content indexing when SEQTA loads",
}), }),
passiveIndexing: booleanSetting({
default: true,
title: "Index Browsed Content",
description:
"Capture safe text from SEQTA pages you visit so they're searchable. Sensitive routes (settings, files, login) are always excluded.",
}),
resetIndex: buttonSetting({ resetIndex: buttonSetting({
title: "Reset Index", title: "Reset Index",
description: "Reset the search index and storage", description: "Reset the search index and storage",
@@ -131,6 +141,9 @@ class GlobalSearchPlugin extends BasePlugin<typeof settings> {
@Setting(settings.runIndexingOnLoad) @Setting(settings.runIndexingOnLoad)
runIndexingOnLoad!: boolean; runIndexingOnLoad!: boolean;
@Setting(settings.passiveIndexing)
passiveIndexing!: boolean;
@Setting(settings.resetIndex) @Setting(settings.resetIndex)
resetIndex!: () => void; resetIndex!: () => void;
} }
@@ -150,26 +163,35 @@ const globalSearchPlugin: Plugin<typeof settings> = {
run: async (api) => { run: async (api) => {
const appRef = { current: null }; const appRef = { current: null };
// Check for extension updates and clear caches if needed // Run the version check BEFORE we open any IndexedDB connections.
// Use a timeout to avoid blocking initialization // On a normal load (no version change) this is just a string compare
setTimeout(async () => { // and a manifest read, so the cost is negligible. On a real update,
// we want the database wipe to complete before `IndexedDbManager`
// grabs a handle on `embeddiaDB`, otherwise the delete request comes
// back blocked.
try { try {
const wasUpdated = await checkAndHandleUpdate(); const wasUpdated = await checkAndHandleUpdate();
if (wasUpdated) { if (wasUpdated) {
console.log("[Global Search] Extension updated - caches cleared"); console.log(
"[Global Search] Extension updated — search index reset; the next indexing pass will repopulate.",
);
} }
} catch (error: any) { } catch (error: any) {
// Handle CSS preload errors and other failures gracefully // Firefox sometimes refuses CSS preloads or asset reads; we never
// These can happen in Firefox or when assets aren't available // want this path to take the whole plugin down.
if (error?.message?.includes("preload CSS") || if (
error?.message?.includes("preload CSS") ||
error?.message?.includes("MIME type") || error?.message?.includes("MIME type") ||
error?.message?.includes("NS_ERROR_CORRUPTED_CONTENT")) { error?.message?.includes("NS_ERROR_CORRUPTED_CONTENT")
console.debug("[Global Search] Version check skipped due to asset loading restrictions:", error.message); ) {
console.debug(
"[Global Search] Version check skipped due to asset loading restrictions:",
error.message,
);
} else { } else {
console.warn("[Global Search] Failed to check for updates:", error); console.warn("[Global Search] Failed to check for updates:", error);
} }
} }
}, 100);
try { try {
await IndexedDbManager.create("embeddiaDB", "embeddiaObjectStore", { await IndexedDbManager.create("embeddiaDB", "embeddiaObjectStore", {
@@ -210,6 +232,17 @@ const globalSearchPlugin: Plugin<typeof settings> = {
const workerManager = VectorWorkerManager.getInstance(); const workerManager = VectorWorkerManager.getInstance();
console.log("Streaming active:", workerManager.isStreamingActive()); console.log("Streaming active:", workerManager.isStreamingActive());
}, },
passiveItems: async () => {
const items = await getStoredPassiveItems();
console.log(`Captured ${items.length} passive items`);
return items;
},
runSelfTests: async () => {
const { runGlobalSearchSelfTests } = await import(
"../indexing/selfTests"
);
return runGlobalSearchSelfTests();
},
checkIndexedDBSize: async () => { checkIndexedDBSize: async () => {
try { try {
const estimate = await navigator.storage.estimate(); const estimate = await navigator.storage.estimate();
@@ -232,6 +265,14 @@ const globalSearchPlugin: Plugin<typeof settings> = {
} }
}; };
if (api.settings.passiveIndexing) {
try {
installPassiveObserver();
} catch (error) {
console.warn("[Global Search] Passive observer install failed:", error);
}
}
if (api.settings.runIndexingOnLoad) { if (api.settings.runIndexingOnLoad) {
setTimeout(async () => { setTimeout(async () => {
await runIndexing(); await runIndexing();
@@ -18,36 +18,39 @@ export function mountSearchBar(
let currentHotkey = isValidHotkey(api.settings.searchHotkey) ? api.settings.searchHotkey : "ctrl+k"; let currentHotkey = isValidHotkey(api.settings.searchHotkey) ? api.settings.searchHotkey : "ctrl+k";
let hotkeyDisplay = formatHotkeyForDisplay(currentHotkey); let hotkeyDisplay = formatHotkeyForDisplay(currentHotkey);
// Search trigger + progress UI live in one wrapper so the auto-margin
// pushes the whole group to the left edge of the topbar instead of
// stranding the progress text on the far right of the screen.
const searchWrapper = document.createElement("div");
searchWrapper.className = "search-trigger-wrapper";
// Anchor lets us absolutely position the progress bar directly beneath
// the search button without disturbing the topbar's vertical rhythm.
const searchAnchor = document.createElement("div");
searchAnchor.className = "search-trigger-anchor";
const searchButton = document.createElement("div"); const searchButton = document.createElement("div");
searchButton.className = "search-trigger"; searchButton.className = "search-trigger";
// Create progress indicator container
const progressContainer = document.createElement("div");
progressContainer.className = "search-progress-container";
progressContainer.style.cssText = "display: flex; align-items: center; gap: 8px; margin-left: 8px; min-width: 120px;";
// Create progress bar
const progressBarWrapper = document.createElement("div"); const progressBarWrapper = document.createElement("div");
progressBarWrapper.className = "search-progress-bar-wrapper"; progressBarWrapper.className = "search-progress-bar-wrapper";
progressBarWrapper.style.cssText = "flex: 1; height: 4px; background: rgba(0, 0, 0, 0.1); border-radius: 2px; overflow: hidden; display: none;";
const progressBar = document.createElement("div"); const progressBar = document.createElement("div");
progressBar.className = "search-progress-bar"; progressBar.className = "search-progress-bar";
progressBar.style.cssText = "height: 100%; background: linear-gradient(90deg, #3b82f6, #2563eb, #3b82f6); transition: width 0.3s ease-out; width: 0%; position: relative;";
// Add shimmer effect
const shimmer = document.createElement("div");
shimmer.style.cssText = "position: absolute; inset: 0; background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent); animation: shimmer 2s infinite;";
progressBar.appendChild(shimmer);
progressBarWrapper.appendChild(progressBar); progressBarWrapper.appendChild(progressBar);
// Create progress text // Use a block-level <div> so the label reliably participates in flex
const progressText = document.createElement("span"); // layout. A <span> defaults to `display: inline`, which silently ignores
// `max-width`, `overflow`, and `text-overflow: ellipsis`, and was the
// reason the label appeared blank when the bar was visible.
const progressText = document.createElement("div");
progressText.className = "search-progress-text"; progressText.className = "search-progress-text";
progressText.style.cssText = "font-size: 11px; color: #666; white-space: nowrap; display: none;"; progressText.setAttribute("aria-live", "polite");
progressContainer.appendChild(progressBarWrapper); searchAnchor.appendChild(searchButton);
progressContainer.appendChild(progressText); searchAnchor.appendChild(progressBarWrapper);
searchWrapper.appendChild(searchAnchor);
searchWrapper.appendChild(progressText);
// Indexing state // Indexing state
let isIndexing = false; let isIndexing = false;
@@ -59,18 +62,21 @@ export function mountSearchBar(
if (isIndexing && totalJobs > 0) { if (isIndexing && totalJobs > 0) {
const percentage = Math.round((completedJobs / totalJobs) * 100); const percentage = Math.round((completedJobs / totalJobs) * 100);
progressBar.style.width = `${Math.max(2, percentage)}%`; progressBar.style.width = `${Math.max(2, percentage)}%`;
progressBarWrapper.style.display = "block"; progressBarWrapper.classList.add("is-active");
if (indexingStatus) { if (indexingStatus) {
progressText.textContent = indexingStatus.length > 20 ? indexingStatus.substring(0, 20) + "..." : indexingStatus; const statusText =
progressText.style.display = "block"; indexingStatus.length > 28
? indexingStatus.substring(0, 28) + "…"
: indexingStatus;
progressText.textContent = `${statusText} · ${percentage}%`;
} else { } else {
progressText.textContent = `${completedJobs}/${totalJobs} (${percentage}%)`; progressText.textContent = `Indexing ${completedJobs}/${totalJobs} (${percentage}%)`;
progressText.style.display = "block";
} }
progressText.classList.add("is-active");
} else { } else {
progressBarWrapper.style.display = "none"; progressBarWrapper.classList.remove("is-active");
progressText.style.display = "none"; progressText.classList.remove("is-active");
} }
}; };
@@ -99,8 +105,7 @@ export function mountSearchBar(
}; };
updateSearchButtonDisplay(); updateSearchButtonDisplay();
titleElement.appendChild(searchButton); titleElement.appendChild(searchWrapper);
titleElement.appendChild(progressContainer);
// Listen for hotkey setting changes // Listen for hotkey setting changes
const handleStorageChange = (changes: any, area: string) => { const handleStorageChange = (changes: any, area: string) => {
@@ -155,17 +160,16 @@ export function cleanupSearchBar(appRef: { current: any; storageChangeHandler?:
appRef.progressHandler = null; appRef.progressHandler = null;
} }
// Remove search trigger button // Remove search trigger wrapper (which contains the button and progress UI)
const searchTrigger = document.querySelector(".search-trigger"); const searchWrapper = document.querySelector(".search-trigger-wrapper");
if (searchTrigger) { if (searchWrapper) {
searchTrigger.remove(); searchWrapper.remove();
} }
// Remove progress container // Defensive cleanup for older mounts that may have left the trigger or
const progressContainer = document.querySelector(".search-progress-container"); // progress container as direct children of the topbar.
if (progressContainer) { document.querySelector(".search-trigger")?.remove();
progressContainer.remove(); document.querySelector(".search-progress-container")?.remove();
}
// Remove search root // Remove search root
const searchRoot = document.querySelector("div[data-search-root]"); const searchRoot = document.querySelector("div[data-search-root]");
@@ -1,13 +1,36 @@
/*
* Wrapper that owns the auto-margin so the whole search-trigger-and-progress
* group sits at the left of the SEQTA topbar. Previously, only the
* `.search-trigger` had `margin-right: auto`, which pushed the progress text
* all the way to the far right of the screen.
*/
.search-trigger-wrapper {
display: flex !important;
align-items: center;
gap: 12px;
margin-left: 10px;
margin-right: auto !important;
/* Allow the bar's bottom portion to peek out below the wrapper without
getting clipped by the topbar's flex line. */
overflow: visible;
}
.search-trigger-anchor {
position: relative;
display: inline-flex;
isolation: isolate; /* new stacking context so the bar's z-index is local */
}
.search-trigger { .search-trigger {
position: relative;
z-index: 2; /* sits above the progress bar so the bar tucks under */
display: flex; display: flex;
align-items: center; align-items: center;
justify-content: center; justify-content: center;
height: 32px; height: 32px;
margin-left: 10px;
border-radius: 8px; border-radius: 8px;
cursor: pointer; cursor: pointer;
transition: all 0.2s ease; transition: all 0.2s ease;
margin-right: auto !important;
padding: 3px 12px; padding: 3px 12px;
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
backdrop-filter: blur(4px); backdrop-filter: blur(4px);
@@ -83,29 +106,45 @@
animation: shimmer 2s infinite; animation: shimmer 2s infinite;
} }
/* Progress indicator next to search trigger */ /*
.search-progress-container { * Progress bar that hugs the bottom of the search button like the next
display: flex; * card peeking from a small stack. The bar is intentionally inset on the
align-items: center; * sides and slightly shorter than the button so it reads as a stacked
gap: 8px; * shadow card rather than a separate, floating element.
margin-left: 8px; */
min-width: 120px; .search-progress-bar-wrapper {
max-width: 200px; position: absolute;
height: 32px; left: 6px;
right: 6px;
/*
* `top: 100%; margin-top: -6px;` makes the bar slide UP into the button
* by 6px while still extending below it. Combined with z-index: 1 (vs
* the button's z-index: 2), the bar's top edge tucks under the button
* so only the bottom portion peeks out — the card-stack look.
*/
top: 100%;
margin-top: -6px;
height: 10px;
z-index: 1;
background: rgba(0, 0, 0, 0.1);
border-radius: 0 0 7px 7px;
overflow: hidden;
opacity: 0;
transform: translateY(-3px) scaleX(0.94);
transform-origin: top center;
transition: opacity 0.2s ease, transform 0.25s cubic-bezier(0.2, 0.7, 0.3, 1);
pointer-events: none;
box-shadow: 0 3px 6px rgba(0, 0, 0, 0.12);
} }
.search-progress-bar-wrapper { .search-progress-bar-wrapper.is-active {
flex: 1; opacity: 1;
height: 4px; transform: translateY(0) scaleX(1);
background: rgba(0, 0, 0, 0.1);
border-radius: 2px;
overflow: hidden;
display: none;
min-width: 60px;
} }
.dark .search-progress-bar-wrapper { .dark .search-progress-bar-wrapper {
background: rgba(255, 255, 255, 0.1); background: rgba(255, 255, 255, 0.08);
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.35);
} }
.search-progress-bar { .search-progress-bar {
@@ -114,7 +153,7 @@
transition: width 0.3s ease-out; transition: width 0.3s ease-out;
width: 0%; width: 0%;
position: relative; position: relative;
border-radius: 2px; border-radius: 0 0 6px 6px;
} }
.search-progress-bar::after { .search-progress-bar::after {
@@ -123,17 +162,37 @@
inset: 0; inset: 0;
background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.3), transparent); background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.3), transparent);
animation: shimmer 2s infinite; animation: shimmer 2s infinite;
border-radius: 2px; border-radius: 0 0 6px 6px;
} }
/*
* Progress label sits as a flex child immediately to the right of the
* search button (gap is provided by .search-trigger-wrapper). It's hidden
* by default and fades in once an indexing pass is active.
*/
.search-progress-text { .search-progress-text {
font-size: 11px; display: block;
color: #666; font-size: 12px;
color: #475569;
white-space: nowrap; white-space: nowrap;
display: none;
font-weight: 500; font-weight: 500;
opacity: 0;
transform: translateX(-4px);
transition: opacity 0.2s ease, transform 0.2s ease;
pointer-events: none;
max-width: 240px;
overflow: hidden;
text-overflow: ellipsis;
line-height: 32px;
letter-spacing: 0.01em;
flex: 0 0 auto;
}
.search-progress-text.is-active {
opacity: 1;
transform: translateX(0);
} }
.dark .search-progress-text { .dark .search-progress-text {
color: #999; color: #cbd5e1;
} }
@@ -0,0 +1,161 @@
/**
* Representative SEQTA response shapes captured from a real `/seqta/student/`
* session via the websiteskimmer recorder. These are static fixtures used
* by `selfTests.ts` to verify our extractors and the passive observer
* remain compatible with the upstream API as it evolves.
*
* NOTE: These fixtures are scrubbed of any secrets and reduced in size; the
* structure (keys, types, nesting) faithfully matches what SEQTA returns
* but the values are illustrative rather than real student data.
*/
export const subjectsListPayload = [
{
code: "2026S1",
description: "Sample Semester 1 timetable",
active: 1,
id: 77,
subjects: [
{
code: "ENGG1",
classunit: 29248,
description: "English GEN 1",
metaclass: 29611,
title: "English GEN 1",
programme: 3830,
marksbook_type: "numeric",
},
{
code: "MASA1",
classunit: 29247,
description: "Mathematics Specialist 1",
metaclass: 29610,
title: "Mathematics Specialist 1",
programme: 3831,
marksbook_type: "numeric",
},
],
},
];
export const coursesPayload = {
c: "ENGG1#1",
t: "English GEN 1",
i: 3830,
m: 29611,
document:
'{"document":{"modules":[{"uuid":"1641cf87-ae08-4bcb-832d-d5709d84d0c5"}]}}',
w: [
[
{ t: "", h: "", i: 248293, l: "", n: 0, o: "" },
{
t: "",
i: 248316,
l: '<p><a href="http://ed.ted.com/on/r80lnJL0#watch">http://ed.ted.com/on/r80lnJL0#watch</a></p>',
n: 1,
o: "",
},
],
[{ t: "Lesson 2", h: "<h1>Module 2</h1>", i: 248294, l: "", n: 0, o: "" }],
],
};
export const messagesListPayload = {
hasMore: false,
messages: [
{
date: "2026-04-29 04:26:25.075868+00",
attachments: false,
read: 1,
sender: "Jacob Johannesburg",
subject: "test",
sender_type: "student",
attachmentCount: 0,
id: 81469,
sender_id: 3111,
},
],
ts: "2026-04-30 03:25:02.27900",
};
export const documentsPayload = [
{
docs: [
{
file: 49555,
filename: "School Glossary.docx",
size: "14931",
context_uuid: "3162189c-2052-4f83-ad83-a66c57460ea2",
mimetype:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
created_date: "2021-08-04 12:55:55.102653+00",
title: "School Glossary",
uuid: "3162189c-2052-4f83-ad83-a66c57460ea2",
created_by: "537",
},
],
id: 9,
category: "Document repository",
},
];
export const noticesPayload = [
{
id: 12345,
title: "Lunchtime sport tomorrow",
contents: "<p>Bring shoes.</p>",
staff: "Mr Coach",
staff_id: 246,
label: 9,
label_title: "All Students",
colour: "#ff5722",
},
];
export const portalsPayload = [
{
is_power_portal: false,
inherit_styles: true,
icon: "colour-cerulean",
id: 328,
label: "Mathletics",
priority: 20,
uuid: "9d20f40c-fdc9-4aa3-91f1-905d86e240c4",
url: "www.mathletics.com/",
},
];
export const folioListPayload = {
me: "Jacob Johannesburg",
list: [
{
student: "Jacob Johannesburg",
id: 203,
published: "2026-04-14 20:02:50",
title: "My folio",
},
],
};
export const folioEntryPayload = {
forum: 478,
contents:
'[[embed:raw|<p>Some <strong>reflection</strong> text.</p>]] Plain trailing text.',
created: "2026-04-14 10:32:34.264641+00",
allow_comments: true,
author: { year: "Year 10", name: "Jacob Johannesburg", id: 3111 },
files: [],
id: 203,
published: "2026-04-14 20:02:50",
title: "My folio",
updated: "2026-04-14 10:32:50.696678+00",
};
/**
* Settings payload contains tenant-wide configuration including third-party
* URLs and API keys. The passive observer must NEVER index this route.
*/
export const settingsPayload = {
"global.dropbox.api.key": { value: "xxx-do-not-index" },
"global.ai.api.baseurl": { value: "https://example.com" },
};
@@ -28,6 +28,40 @@ interface AssessmentMetadata {
type ActionHandler<T = any> = (item: IndexItem & { metadata: T }) => void; type ActionHandler<T = any> = (item: IndexItem & { metadata: T }) => void;
/**
* Navigate to a SEQTA SPA hash route in the most reliable way available.
*
* Setting `location.hash` works when the destination module is already
* registered with SEQTA's hashchange router (as is the case for the
* existing `message`/`assessment` actions, which then poke at the live
* DOM). For navigations that switch to a module the SPA may not have
* loaded yet (courses, forums, folios, portals, documents, reports,
* goals, notices, ...) we instead assign through `location.href` against
* the canonical `${origin}/` base. The path stays `/`, so the browser
* still treats this as a hash-only change in practice — but if anything
* went sideways with the path, we get a clean reload that bootstraps the
* SPA fresh, which is far less surprising than a blank screen.
*/
function navigateToHashRoute(routeWithLeadingSlash: string): void {
const target = `${location.origin}/#?page=${routeWithLeadingSlash}`;
window.location.href = target;
}
function navigateInCurrentSeqtaApp(routeWithLeadingSlash: string): void {
window.location.hash = `#?page=${routeWithLeadingSlash}`;
}
/**
* Final-fallback hub when an item has no usable deep-link metadata.
*
* `/dashboard` is the standard SEQTA Learn landing page and is the
* destination the websiteskimmer recording captured for unknown routes.
* `/home` is BetterSEQTA-Plus's custom replacement which only renders
* after our content script has hooked the SPA — using it as a fallback
* from a fresh nav can produce a blank frame.
*/
const FALLBACK_ROUTE = "/dashboard";
export const actionMap: Record<string, ActionHandler<any>> = { export const actionMap: Record<string, ActionHandler<any>> = {
message: (async (item: IndexItem & { metadata: MessageMetadata }) => { message: (async (item: IndexItem & { metadata: MessageMetadata }) => {
window.location.hash = `#?page=/messages`; window.location.hash = `#?page=/messages`;
@@ -81,32 +115,34 @@ export const actionMap: Record<string, ActionHandler<any>> = {
} }
} }
// Try to extract metadata values using multiple methods to handle XrayWrapper // Try to extract metadata values using multiple methods to handle XrayWrapper.
// The metadata bag is intentionally typed loosely here because Firefox's
// XrayWrapper occasionally surfaces extra/casing-variant keys we still
// want to read defensively.
const getMetadataValue = (key: string, altKey?: string): any => { const getMetadataValue = (key: string, altKey?: string): any => {
const bag = metadata as unknown as Record<string, any>;
try { try {
// Try direct access first const value = bag[key];
const value = metadata[key];
if (value !== undefined && value !== null) { if (value !== undefined && value !== null) {
return value; return value;
} }
if (altKey) { if (altKey) {
const altValue = metadata[altKey]; const altValue = bag[altKey];
if (altValue !== undefined && altValue !== null) { if (altValue !== undefined && altValue !== null) {
return altValue; return altValue;
} }
} }
// Try accessing via Object.keys iteration (works around XrayWrapper)
try { try {
const keys = Object.keys(metadata); const keys = Object.keys(bag);
for (const k of keys) { for (const k of keys) {
if (k === key || k === altKey) { if (k === key || k === altKey) {
const val = metadata[k]; const val = bag[k];
if (val !== undefined && val !== null) { if (val !== undefined && val !== null) {
return val; return val;
} }
} }
} }
} catch (e) { } catch {
// Object.keys might fail on XrayWrapper, that's okay // Object.keys might fail on XrayWrapper, that's okay
} }
return undefined; return undefined;
@@ -189,14 +225,218 @@ export const actionMap: Record<string, ActionHandler<any>> = {
}) as ActionHandler<any>, }) as ActionHandler<any>,
subjectassessment: ((item: IndexItem) => { subjectassessment: ((item: IndexItem) => {
window.location.href = `/#?page=/assessments/${item.metadata.programme}:${item.metadata.subjectId}`; navigateToHashRoute(
`/assessments/${item.metadata.programme}:${item.metadata.subjectId}`,
);
}) as ActionHandler<any>, }) as ActionHandler<any>,
subjectcourse: ((item: IndexItem) => { subjectcourse: ((item: IndexItem) => {
window.location.href = `/#?page=/courses/${item.metadata.programme}:${item.metadata.subjectId}`; navigateToHashRoute(
`/courses/${item.metadata.programme}:${item.metadata.subjectId}`,
);
}) as ActionHandler<any>, }) as ActionHandler<any>,
forum: ((item: IndexItem) => { forum: ((item: IndexItem) => {
window.location.href = `/#?page=/forums/${item.metadata.forumId}`; navigateToHashRoute(`/forums/${item.metadata.forumId}`);
}) as ActionHandler<any>,
course: ((item: IndexItem) => {
const programme = item.metadata?.programme;
const metaclass = item.metadata?.metaclass ?? item.metadata?.subjectId;
if (programme !== undefined && metaclass !== undefined) {
navigateToHashRoute(`/courses/${programme}:${metaclass}`);
return;
}
if (item.metadata?.route) {
navigateToHashRoute(String(item.metadata.route));
return;
}
navigateToHashRoute(FALLBACK_ROUTE);
}) as ActionHandler<any>,
notice: ((_item: IndexItem) => {
// SEQTA's notices route doesn't honour `&date=` from the hash, so just
// open the listing.
navigateToHashRoute("/notices");
}) as ActionHandler<any>,
document: ((_item: IndexItem) => {
// We don't trigger downloads automatically: opening the documents page
// gives users full SEQTA controls (preview, download, share) without
// needing the JWT-stamped streaming URL we deliberately avoid storing.
navigateToHashRoute("/documents");
}) as ActionHandler<any>,
folio: ((_item: IndexItem) => {
// SEQTA's folio SPA does not expose a per-id route; the previous
// `?page=/folios/read?id=N` shape contained a literal `?` inside the
// `page` query value and was unmatchable, which sent users to the
// dashboard. Always land on the read view and let the user pick.
navigateToHashRoute("/folios/read");
}) as ActionHandler<any>,
portal: ((item: IndexItem) => {
// SEQTA renders portals via the in-app viewer at `?page=/portals/<uuid>`
// (verified via the websiteskimmer capture). Prefer that so SSO/headers
// are preserved; only pop the external URL as a fallback if we don't
// have a UUID; final fallback to the dashboard rather than blanking.
const uuid = item.metadata?.portalUuid;
if (typeof uuid === "string" && uuid) {
navigateToHashRoute(`/portals/${uuid}`);
return;
}
const url = item.metadata?.url;
if (typeof url === "string" && url) {
window.open(url, "_blank", "noopener,noreferrer");
return;
}
navigateToHashRoute(FALLBACK_ROUTE);
}) as ActionHandler<any>,
report: ((_item: IndexItem) => {
navigateToHashRoute("/reports");
}) as ActionHandler<any>,
goal: ((item: IndexItem) => {
const year = item.metadata?.year;
if (year !== undefined) {
navigateToHashRoute(`/goals/${year}`);
} else {
navigateToHashRoute("/goals");
}
}) as ActionHandler<any>,
/**
* Routes for passively-captured items.
*
* The passive observer captures whatever `/seqta/student/...` JSON the
* page is fetching, so we can't trust a single category to imply a
* single SEQTA SPA route. Instead, derive the destination from the API
* route the entity came from, augmented with entity-shaped hints
* (programme/metaclass/year/uuid/...) that the observer hoists into
* metadata. We never replay the original POST: actions are user-driven
* and must stay safe even though the observer's own denylist excludes
* `save/*` and friends.
*/
passive: ((item: IndexItem) => {
const md = (item.metadata ?? {}) as Record<string, unknown>;
const route = typeof md.route === "string" ? (md.route as string) : "";
const sourcePage =
typeof md.sourcePage === "string" ? (md.sourcePage as string) : "";
const routeParts = route
.replace(/^\/seqta\/student\/?/, "")
.replace(/^load\//, "")
.split("/")
.filter(Boolean)
.map((part) => part.toLowerCase());
const tail = routeParts[0] ?? "";
const child = routeParts[1] ?? "";
const num = (key: string): number | undefined => {
const value = md[key];
if (typeof value === "number" && Number.isFinite(value)) return value;
if (typeof value === "string" && value && Number.isFinite(Number(value))) {
return Number(value);
}
return undefined;
};
const str = (key: string): string | undefined => {
const value = md[key];
return typeof value === "string" && value ? value : undefined;
};
const programme = num("programme") ?? num("programmeId") ?? num("programmeID");
const metaclass =
num("metaclass") ?? num("metaclassId") ?? num("metaclassID");
const portalUuid = str("portalUuid") ?? str("uuid");
const forumId = num("forumId") ?? num("forum");
const year = num("year");
const assessmentId =
num("assessmentId") ?? num("assessmentID") ?? num("id");
const messageId = num("messageId");
if (sourcePage === "/messages") {
navigateInCurrentSeqtaApp("/messages");
return;
}
switch (tail) {
case "courses":
if (programme !== undefined && metaclass !== undefined) {
navigateToHashRoute(`/courses/${programme}:${metaclass}`);
return;
}
break;
case "assessments":
if (programme !== undefined && metaclass !== undefined) {
const itemSuffix =
assessmentId !== undefined ? `&item=${assessmentId}` : "";
navigateToHashRoute(
`/assessments/${programme}:${metaclass}${itemSuffix}`,
);
return;
}
if (assessmentId !== undefined) {
navigateToHashRoute(`/assessments/upcoming&item=${assessmentId}`);
return;
}
navigateToHashRoute("/assessments/upcoming");
return;
case "forums":
case "forum":
if (forumId !== undefined) {
navigateToHashRoute(`/forums/${forumId}`);
return;
}
break;
case "portals":
case "portal":
if (portalUuid) {
navigateToHashRoute(`/portals/${portalUuid}`);
return;
}
break;
case "goals":
case "goal":
navigateToHashRoute(year !== undefined ? `/goals/${year}` : "/goals");
return;
case "folio":
case "folios":
navigateToHashRoute("/folios/read");
return;
case "notices":
case "notice":
navigateToHashRoute("/notices");
return;
case "documents":
case "document":
navigateToHashRoute("/documents");
return;
case "reports":
case "report":
navigateToHashRoute("/reports");
return;
case "messages":
case "message":
// `/seqta/student/load/message/people` and related endpoints are
// only meaningful while SEQTA's message module is mounted. Use the
// same live hash navigation as the real message action instead of
// forcing a fresh bootstrap, which can drop back to dashboard for
// context-only endpoints.
void messageId; // noqa — preserved for future deep-select work
navigateInCurrentSeqtaApp("/messages");
return;
case "people":
if (route.includes("/load/message/people") || child === "people") {
navigateInCurrentSeqtaApp("/messages");
return;
}
break;
case "timetable":
navigateToHashRoute("/timetable");
return;
}
navigateToHashRoute(FALLBACK_ROUTE);
}) as ActionHandler<any>, }) as ActionHandler<any>,
}; };
@@ -0,0 +1,386 @@
import { delay } from "@/seqta/utils/delay";
/**
* Shared SEQTA HTTP layer used by every indexing job.
*
* - All requests are same-origin POSTs against `/seqta/student/...` with
* `credentials: "include"` so they inherit the user's existing session.
* - Responses are parsed as JSON and lightly validated (status === "200" and
* payload present, mirroring the SEQTA convention).
* - Failures are retried with exponential backoff up to a configurable limit.
* - A simple per-route concurrency / spacing limiter prevents heavy jobs (e.g.
* per-subject course crawls) from hammering SEQTA.
*/
export interface SeqtaResponse<T = any> {
payload: T;
status: string;
}
export interface SeqtaFetchOptions {
/** Defaults to "POST". */
method?: "POST" | "GET";
/** Maximum number of retries for transient failures (default 2). */
retries?: number;
/** Initial backoff delay in ms (default 200). */
baseDelayMs?: number;
/** Hard cap on total request time in ms (default 20s). */
timeoutMs?: number;
/** AbortSignal for cancellation. */
signal?: AbortSignal;
/** Skip the routing limiter (rare; only for already-throttled callers). */
skipLimiter?: boolean;
}
const DEFAULT_RETRIES = 2;
const DEFAULT_BASE_DELAY = 200;
const DEFAULT_TIMEOUT = 20_000;
/* ------------------------------------------------------------------ */
/* limiter */
/* ------------------------------------------------------------------ */
/**
* Caps concurrent in-flight requests per normalized SEQTA route. Indexing
* jobs often fan out (e.g. one /load/courses per subject); we don't want them
* sending dozens of requests in parallel.
*/
class RouteLimiter {
private inFlight = new Map<string, number>();
private waiters = new Map<string, Array<() => void>>();
private readonly maxConcurrent: number;
constructor(maxConcurrent = 4) {
this.maxConcurrent = maxConcurrent;
}
async acquire(route: string): Promise<() => void> {
const current = this.inFlight.get(route) ?? 0;
if (current < this.maxConcurrent) {
this.inFlight.set(route, current + 1);
return () => this.release(route);
}
return new Promise((resolve) => {
const queue = this.waiters.get(route) ?? [];
queue.push(() => {
this.inFlight.set(route, (this.inFlight.get(route) ?? 0) + 1);
resolve(() => this.release(route));
});
this.waiters.set(route, queue);
});
}
private release(route: string) {
const next = (this.inFlight.get(route) ?? 1) - 1;
if (next <= 0) {
this.inFlight.delete(route);
} else {
this.inFlight.set(route, next);
}
const queue = this.waiters.get(route);
if (queue && queue.length > 0) {
const wake = queue.shift()!;
if (queue.length === 0) this.waiters.delete(route);
wake();
}
}
}
const routeLimiter = new RouteLimiter(4);
/* ------------------------------------------------------------------ */
/* route normalization */
/* ------------------------------------------------------------------ */
/**
* Strips the volatile anti-replay query token (e.g. `?mokx3qef`) so we can
* key caches and limiters off the canonical route.
*/
export function normalizeSeqtaPath(url: string): string {
try {
const parsed = new URL(url, location.origin);
// SEQTA appends a single random query token like `?mokx3qef`. Drop the
// entire query string so canonicalization is robust.
return parsed.pathname;
} catch {
// Fallback for already-relative URLs.
return url.split("?")[0];
}
}
/* ------------------------------------------------------------------ */
/* sensitive routes */
/* ------------------------------------------------------------------ */
/**
* Routes whose responses must never be indexed because they contain
* credentials, secrets, JWTs, or arbitrary configuration blobs.
*/
const SENSITIVE_PATH_PATTERNS: RegExp[] = [
/\/seqta\/student\/login(\b|\/)/i,
/\/seqta\/student\/save\//i,
/\/seqta\/student\/load\/settings(\b|\/)/i,
/\/seqta\/student\/load\/prefs(\b|\/)/i,
/\/seqta\/student\/heartbeat(\b|\/)/i,
/\/seqta\/student\/storage(\b|\/)/i,
/\/seqta\/student\/themes\//i,
/\/seqta\/student\/branding\//i,
/\/seqta\/student\/releasealert\//i,
/\/seqta\/student\/files\/stream(\b|\/)/i,
/\/seqta\/student\/load\/file(\b|\/)/i,
/\/seqta\/ta\/masquerade(\b|\/)/i,
];
export function isSensitiveSeqtaPath(path: string): boolean {
const normalized = normalizeSeqtaPath(path);
return SENSITIVE_PATH_PATTERNS.some((re) => re.test(normalized));
}
/* ------------------------------------------------------------------ */
/* student / user identity */
/* ------------------------------------------------------------------ */
interface SeqtaUserInfo {
id?: number;
personUUID?: string;
username?: string;
[key: string]: unknown;
}
let cachedUserInfo: SeqtaUserInfo | null = null;
let inflightUserInfo: Promise<SeqtaUserInfo | null> | null = null;
/**
* Resolves the current SEQTA user identity by re-using the same `login`
* handshake that the host page performs. This is the canonical way to
* discover the active student id and avoids the historical hard-coded
* `student: 69` placeholder that was incorrect on every real instance.
*
* Failures are intentionally NOT cached — a transient login glitch on the
* very first call must not poison the cache for the lifetime of the page,
* because every subsequent indexing pass that needs the student id (e.g.
* the assignments job) would skip silently.
*/
export async function getCurrentUserInfo(): Promise<SeqtaUserInfo | null> {
if (cachedUserInfo) return cachedUserInfo;
if (inflightUserInfo) return inflightUserInfo;
inflightUserInfo = (async () => {
try {
const res = await fetch(`${location.origin}/seqta/student/login`, {
method: "POST",
credentials: "include",
headers: { "Content-Type": "application/json; charset=utf-8" },
body: JSON.stringify({
mode: "normal",
query: null,
redirect_url: location.origin,
}),
});
if (!res.ok) return null;
const json = (await res.json()) as { payload?: SeqtaUserInfo };
const payload = json?.payload ?? null;
if (payload && typeof payload === "object") {
cachedUserInfo = payload;
return payload;
}
return null;
} catch (e) {
console.warn(
"[Global Search API] Failed to resolve current user info:",
e,
);
return null;
} finally {
inflightUserInfo = null;
}
})();
return inflightUserInfo;
}
/**
* Best-effort lookup of the active student id. Returns `undefined` when the
* value cannot be discovered (jobs should fall back gracefully rather than
* fabricating an id).
*/
export async function getCurrentStudentId(): Promise<number | undefined> {
const info = await getCurrentUserInfo();
const id = info?.id;
if (typeof id === "number" && Number.isFinite(id)) return id;
return undefined;
}
/* ------------------------------------------------------------------ */
/* core fetch */
/* ------------------------------------------------------------------ */
class SeqtaApiError extends Error {
status: number;
route: string;
constructor(message: string, status: number, route: string) {
super(message);
this.name = "SeqtaApiError";
this.status = status;
this.route = route;
}
}
function isTransientError(err: unknown): boolean {
if (err instanceof SeqtaApiError) {
if (err.status === 0 || err.status >= 500) return true;
if (err.status === 429) return true;
return false;
}
if (err instanceof TypeError) return true;
if ((err as any)?.name === "AbortError") return false;
return true;
}
/**
* Sends a JSON POST against a SEQTA route and returns the parsed envelope.
*
* - Adds `credentials: "include"` so requests reuse the active session.
* - Sets `X-Requested-With: XMLHttpRequest` so SEQTA classifies the request
* the same way as the first-party SPA (some routes 4xx without it).
* - Retries transient network/server errors with exponential backoff.
* - Validates that the response is JSON and has `status === "200"` (matches
* the SEQTA convention; jobs that need raw payloads can pass `path` but
* call `seqtaFetch` directly via the underlying API if they need to).
*/
export async function seqtaFetchJson<T = any>(
path: string,
body: Record<string, unknown> | undefined = {},
options: SeqtaFetchOptions = {},
): Promise<SeqtaResponse<T>> {
const route = normalizeSeqtaPath(path);
const retries = Math.max(0, options.retries ?? DEFAULT_RETRIES);
const baseDelay = Math.max(50, options.baseDelayMs ?? DEFAULT_BASE_DELAY);
const timeoutMs = Math.max(1_000, options.timeoutMs ?? DEFAULT_TIMEOUT);
let release: (() => void) | null = null;
if (!options.skipLimiter) {
release = await routeLimiter.acquire(route);
}
try {
let attempt = 0;
let lastError: unknown = null;
while (attempt <= retries) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
const onAbort = () => controller.abort();
if (options.signal) {
if (options.signal.aborted) controller.abort();
else options.signal.addEventListener("abort", onAbort, { once: true });
}
try {
const res = await fetch(`${location.origin}${route}`, {
method: options.method ?? "POST",
credentials: "include",
headers: {
"Content-Type": "application/json; charset=utf-8",
"X-Requested-With": "XMLHttpRequest",
Accept: "text/javascript, text/html, application/xml, text/xml, */*",
},
body: body === undefined ? undefined : JSON.stringify(body),
signal: controller.signal,
});
if (!res.ok) {
throw new SeqtaApiError(
`HTTP ${res.status} ${res.statusText} for ${route}`,
res.status,
route,
);
}
const rawJson = (await res.json()) as unknown;
if (!rawJson || typeof rawJson !== "object") {
throw new SeqtaApiError(
`Invalid SEQTA response (not a JSON object) for ${route}`,
res.status,
route,
);
}
// SEQTA's "envelope" convention is `{ status, payload }`, but in
// practice some endpoints — notably `/seqta/student/load/subjects`
// and `/seqta/student/assessment/list/*` — occasionally return
// either a bare array or an envelope with a non-"200" status.
// Strict validation here was historically silently killing the
// assignments + courses indexing pipelines when those endpoints
// returned a quirky shape, so we normalize permissively and let
// callers handle missing/empty payloads.
let json: SeqtaResponse<T>;
if (Array.isArray(rawJson)) {
json = { payload: rawJson as unknown as T, status: "200" };
} else {
const obj = rawJson as Record<string, unknown>;
const hasEnvelopeKey = "payload" in obj || "status" in obj;
if (hasEnvelopeKey) {
json = {
payload: ("payload" in obj ? obj.payload : undefined) as T,
status:
typeof obj.status === "string"
? obj.status
: typeof obj.status === "number"
? String(obj.status)
: "200",
};
} else {
json = { payload: rawJson as unknown as T, status: "200" };
}
}
if (json.status && json.status !== "200") {
console.warn(
`[Global Search API] Non-200 SEQTA status "${json.status}" for ${route} — returning payload anyway`,
);
}
return json;
} catch (err) {
lastError = err;
if (!isTransientError(err) || attempt === retries) {
throw err;
}
const wait = Math.min(5_000, baseDelay * Math.pow(2, attempt));
await delay(wait);
attempt++;
} finally {
clearTimeout(timer);
if (options.signal) options.signal.removeEventListener("abort", onAbort);
}
}
throw lastError ?? new Error(`seqtaFetchJson exhausted retries for ${route}`);
} finally {
if (release) release();
}
}
/**
* Convenience helper: fetch and unwrap `.payload` directly. Returns `null`
* on failure rather than throwing, so jobs can use the value optionally.
*/
export async function seqtaFetchPayload<T = any>(
path: string,
body: Record<string, unknown> | undefined = {},
options: SeqtaFetchOptions = {},
): Promise<T | null> {
try {
const res = await seqtaFetchJson<T>(path, body, options);
return res.payload ?? null;
} catch (e) {
console.warn(
`[Global Search API] Request to ${normalizeSeqtaPath(path)} failed:`,
e,
);
return null;
}
}
@@ -0,0 +1,303 @@
import { htmlToPlainText } from "./utils";
import type { IndexItem } from "./types";
/**
* Safe extraction helpers used by both active SEQTA jobs and the passive
* network observer.
*
* The goal is to take arbitrary SEQTA JSON / embedded HTML fragments and
* derive concise, redacted, search-friendly text without ever indexing
* obvious credentials, tokens, JWTs, or large binary blobs.
*/
/* ------------------------------------------------------------------ */
/* sensitive keys */
/* ------------------------------------------------------------------ */
/**
* Field names whose values should never be indexed regardless of context.
* Matches SEQTA's frequently-used credential / config keys plus generic
* security-related names. Comparison is case-insensitive and matches both
* the full key and any sub-string fragments (so `client_secret`,
* `apiKey`, `dropboxKey` all hit).
*/
const SENSITIVE_KEY_FRAGMENTS: readonly string[] = [
"password",
"passwd",
"pwd",
"secret",
"token",
"jwt",
"session",
"cookie",
"auth",
"apikey",
"api_key",
"clientid",
"client_id",
"clientsecret",
"client_secret",
"credential",
"private",
"salt",
"hash",
"csrf",
"x-api",
"bearer",
"dropbox",
"oauth",
"signature",
];
export function isSensitiveKey(key: string): boolean {
if (!key) return false;
const lower = key.toLowerCase();
return SENSITIVE_KEY_FRAGMENTS.some((frag) => lower.includes(frag));
}
/**
* Returns true if the supplied scalar value looks credential-shaped: a long
* hex/base64-like blob that doesn't decode to readable text. This catches
* arbitrary tokens that don't have a clear field-name signal.
*/
export function looksLikeSecretValue(value: unknown): boolean {
if (typeof value !== "string") return false;
const trimmed = value.trim();
if (trimmed.length < 32) return false;
// Long contiguous base64 / hex with no whitespace and no humanish punctuation.
if (/\s/.test(trimmed)) return false;
if (/^[A-Za-z0-9+/=._-]{32,}$/.test(trimmed) && !/[.,!?]/.test(trimmed)) {
// Reject obvious URLs and UUIDs (they're useful and not secret).
if (/^https?:\/\//i.test(trimmed)) return false;
if (
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(
trimmed,
)
) {
return false;
}
return true;
}
// JWT detection: three base64url segments separated by dots.
if (/^[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$/.test(trimmed)) {
return true;
}
return false;
}
/* ------------------------------------------------------------------ */
/* text extraction */
/* ------------------------------------------------------------------ */
/**
* Recursively pulls human-readable text out of an arbitrary JSON value.
*
* - HTML strings are passed through `htmlToPlainText`.
* - Sensitive keys and secret-shaped values are skipped.
* - Long blobs are truncated to keep the index lean.
* - Arrays and objects are walked; depth is bounded to avoid pathological
* structures.
*/
export interface ExtractTextOptions {
/** Hard cap on combined characters across the walk (default 4000). */
maxChars?: number;
/** Maximum recursion depth (default 6). */
maxDepth?: number;
/** Maximum array length to traverse (default 200). */
maxArrayItems?: number;
/** Skip individual string values longer than this (default 8000). */
maxStringLength?: number;
}
const DEFAULT_EXTRACT_OPTIONS: Required<ExtractTextOptions> = {
maxChars: 4000,
maxDepth: 6,
maxArrayItems: 200,
maxStringLength: 8000,
};
export function extractTextFromValue(
value: unknown,
options: ExtractTextOptions = {},
): string {
const opts = { ...DEFAULT_EXTRACT_OPTIONS, ...options };
const parts: string[] = [];
let remaining = opts.maxChars;
const push = (text: string) => {
if (!text || remaining <= 0) return;
const trimmed = text.trim();
if (!trimmed) return;
const slice = trimmed.length > remaining ? trimmed.slice(0, remaining) : trimmed;
parts.push(slice);
remaining -= slice.length + 1;
};
const walk = (node: unknown, depth: number, parentKey: string | null) => {
if (remaining <= 0) return;
if (node === null || node === undefined) return;
if (parentKey && isSensitiveKey(parentKey)) return;
if (typeof node === "string") {
if (node.length > opts.maxStringLength) return;
if (looksLikeSecretValue(node)) return;
if (node.includes("<") && node.includes(">")) {
push(htmlToPlainText(node));
} else {
push(node);
}
return;
}
if (typeof node === "number" || typeof node === "boolean") {
// Numbers/booleans rarely contribute to search recall; skip to keep
// the index focused on text.
return;
}
if (depth >= opts.maxDepth) return;
if (Array.isArray(node)) {
const limit = Math.min(node.length, opts.maxArrayItems);
for (let i = 0; i < limit; i++) {
walk(node[i], depth + 1, parentKey);
if (remaining <= 0) return;
}
return;
}
if (typeof node === "object") {
for (const [key, child] of Object.entries(node as Record<string, unknown>)) {
if (remaining <= 0) return;
if (isSensitiveKey(key)) continue;
walk(child, depth + 1, key);
}
}
};
walk(value, 0, null);
return parts.join("\n").trim();
}
/* ------------------------------------------------------------------ */
/* redacted clones */
/* ------------------------------------------------------------------ */
/**
* Returns a deep clone of `value` with sensitive keys/values stripped. The
* passive observer uses this when persisting metadata so we never store
* raw tokens or settings blobs in IndexedDB.
*/
export function redactSensitive<T>(value: T, depth = 0): T {
if (value === null || value === undefined) return value;
if (depth >= 8) return value;
if (Array.isArray(value)) {
return value
.slice(0, 200)
.map((v) => redactSensitive(v, depth + 1)) as unknown as T;
}
if (typeof value === "object") {
const out: Record<string, unknown> = {};
for (const [key, child] of Object.entries(value as Record<string, unknown>)) {
if (isSensitiveKey(key)) continue;
if (typeof child === "string" && looksLikeSecretValue(child)) continue;
out[key] = redactSensitive(child, depth + 1);
}
return out as T;
}
if (typeof value === "string" && looksLikeSecretValue(value)) {
return "" as unknown as T;
}
return value;
}
/* ------------------------------------------------------------------ */
/* title / id heuristics */
/* ------------------------------------------------------------------ */
const TITLE_KEYS = [
"title",
"subject",
"name",
"label",
"heading",
"displayName",
"filename",
"code",
];
const ID_KEYS = ["id", "uuid", "messageID", "assessmentID", "notificationID"];
/**
* Best-effort title extraction: returns the first sensible string-valued
* field commonly used by SEQTA payloads. Falls back to an empty string when
* none are present.
*/
export function pickTitle(node: unknown, fallback = ""): string {
if (!node || typeof node !== "object") return fallback;
const obj = node as Record<string, unknown>;
for (const key of TITLE_KEYS) {
const v = obj[key];
if (typeof v === "string" && v.trim()) return v.trim();
}
return fallback;
}
export function pickId(node: unknown, fallback = ""): string {
if (!node || typeof node !== "object") return fallback;
const obj = node as Record<string, unknown>;
for (const key of ID_KEYS) {
const v = obj[key];
if (typeof v === "string" && v.trim()) return v.trim();
if (typeof v === "number" && Number.isFinite(v)) return String(v);
}
return fallback;
}
/* ------------------------------------------------------------------ */
/* IndexItem builders */
/* ------------------------------------------------------------------ */
/**
* Constructs an `IndexItem` from a raw entity, applying our standard
* extraction rules. Callers fill in the things that need domain knowledge
* (`category`, `actionId`, `metadata`, deep-link route hints) and we handle
* the boring text + redaction work.
*/
export function buildIndexItem(input: {
id: string;
text: string;
category: string;
rawForContent?: unknown;
contentOverride?: string;
metadata?: Record<string, unknown>;
actionId: string;
renderComponentId: string;
dateAdded?: number;
contentMaxChars?: number;
}): IndexItem {
const content =
input.contentOverride !== undefined
? input.contentOverride
: extractTextFromValue(input.rawForContent, {
maxChars: input.contentMaxChars ?? 1500,
});
const metadata = input.metadata ? redactSensitive(input.metadata) : {};
return {
id: input.id,
text: input.text,
category: input.category,
content,
dateAdded: input.dateAdded ?? Date.now(),
metadata,
actionId: input.actionId,
renderComponentId: input.renderComponentId,
};
}
@@ -1,10 +1,11 @@
import { clear, get, getAll, put, remove } from "./db"; import { clear, get, getAll, put, remove, resetDatabase } from "./db";
import { jobs } from "./jobs"; import { jobs } from "./jobs";
import { renderComponentMap } from "./renderComponents"; import { renderComponentMap } from "./renderComponents";
import type { IndexItem, Job, JobContext } from "./types"; import type { IndexItem, Job, JobContext } from "./types";
import { VectorWorkerManager } from "./worker/vectorWorkerManager"; import { VectorWorkerManager } from "./worker/vectorWorkerManager";
import { loadDynamicItems } from "../utils/dynamicItems"; import { loadDynamicItems } from "../utils/dynamicItems";
import { getVectorizedItemIds } from "./utils"; import { getVectorizedItemIds } from "./utils";
import { INDEX_SCHEMA_VERSION, SCHEMA_VERSION_KEY } from "./schemaVersion";
const META_STORE = "meta"; const META_STORE = "meta";
const LOCK_KEY = "bsq-indexer-lock"; const LOCK_KEY = "bsq-indexer-lock";
@@ -12,6 +13,50 @@ const HEARTBEAT_INTERVAL = 10000;
const LOCK_TIMEOUT = 20000; const LOCK_TIMEOUT = 20000;
const LOCK_ACQUIRE_TIMEOUT = 5000; const LOCK_ACQUIRE_TIMEOUT = 5000;
let schemaCheckPromise: Promise<void> | null = null;
async function ensureSchemaCurrent(): Promise<void> {
if (schemaCheckPromise) return schemaCheckPromise;
schemaCheckPromise = (async () => {
let storedRaw: string | null = null;
try {
storedRaw = localStorage.getItem(SCHEMA_VERSION_KEY);
} catch {
return;
}
const stored = storedRaw ? parseInt(storedRaw, 10) : 0;
if (stored === INDEX_SCHEMA_VERSION) return;
console.warn(
`[Indexer] Schema version changed (${stored} -> ${INDEX_SCHEMA_VERSION}); resetting structured + vector indexes.`,
);
try {
await resetDatabase();
} catch (e) {
console.warn("[Indexer] Failed to reset structured database:", e);
}
try {
await new Promise<void>((resolve) => {
const req = indexedDB.deleteDatabase("embeddiaDB");
req.onsuccess = () => resolve();
req.onerror = () => resolve();
req.onblocked = () => resolve();
});
} catch (e) {
console.warn("[Indexer] Failed to reset embeddiaDB:", e);
}
try {
localStorage.setItem(SCHEMA_VERSION_KEY, String(INDEX_SCHEMA_VERSION));
} catch {
/* ignore */
}
})();
return schemaCheckPromise;
}
/* ─────────── Progressmeta helpers ─────────── */ /* ─────────── Progressmeta helpers ─────────── */
async function loadProgress<T = any>(jobId: string): Promise<T | undefined> { async function loadProgress<T = any>(jobId: string): Promise<T | undefined> {
const rec = await get(META_STORE, `progress:${jobId}`); const rec = await get(META_STORE, `progress:${jobId}`);
@@ -162,6 +207,8 @@ export async function loadAllStoredItems(): Promise<IndexItem[]> {
} }
export async function runIndexing(): Promise<void> { export async function runIndexing(): Promise<void> {
await ensureSchemaCurrent();
if (!(await acquireLock())) { if (!(await acquireLock())) {
console.debug( console.debug(
"%c[Indexer] Could not acquire lock - another tab is indexing or this tab is already indexing", "%c[Indexer] Could not acquire lock - another tab is indexing or this tab is already indexing",
@@ -178,8 +225,6 @@ export async function runIndexing(): Promise<void> {
const totalSteps = jobIds.length + 1; const totalSteps = jobIds.length + 1;
dispatchProgress(completedJobs, totalSteps, true, "Starting jobs"); dispatchProgress(completedJobs, totalSteps, true, "Starting jobs");
let hasStreamingJobs = false;
for (const jobId of jobIds) { for (const jobId of jobIds) {
dispatchProgress( dispatchProgress(
completedJobs, completedJobs,
@@ -255,10 +300,6 @@ export async function runIndexing(): Promise<void> {
await setStoredItems(merged); await setStoredItems(merged);
await updateLastRunMeta(jobId); await updateLastRunMeta(jobId);
if (jobId === 'messages' || jobId === 'notifications') {
hasStreamingJobs = true;
}
console.debug( console.debug(
`%c[Indexer] ${job.label}: ${newItemsRaw.length} new items reported by run, ${merged.length} total items now in '${jobId}' store.`, `%c[Indexer] ${job.label}: ${newItemsRaw.length} new items reported by run, ${merged.length} total items now in '${jobId}' store.`,
"color: #00c46f", "color: #00c46f",
@@ -4,6 +4,14 @@ import { notificationsJob } from "./jobs/notifications";
import { forumsJob } from "./jobs/forums"; import { forumsJob } from "./jobs/forums";
import { subjectsJob } from "./jobs/subjects"; import { subjectsJob } from "./jobs/subjects";
import { assignmentsJob } from "./jobs/assignments"; import { assignmentsJob } from "./jobs/assignments";
import { coursesJob } from "./jobs/courses";
import { noticesJob } from "./jobs/notices";
import { documentsJob } from "./jobs/documents";
import { folioJob } from "./jobs/folio";
import { portalsJob } from "./jobs/portals";
import { reportsJob } from "./jobs/reports";
import { goalsJob } from "./jobs/goals";
import { passiveJob } from "./jobs/passive";
export const jobs: Record<string, Job> = { export const jobs: Record<string, Job> = {
messages: messagesJob, messages: messagesJob,
@@ -11,4 +19,12 @@ export const jobs: Record<string, Job> = {
forums: forumsJob, forums: forumsJob,
subjects: subjectsJob, subjects: subjectsJob,
assignments: assignmentsJob, assignments: assignmentsJob,
courses: coursesJob,
notices: noticesJob,
documents: documentsJob,
folio: folioJob,
portals: portalsJob,
reports: reportsJob,
goals: goalsJob,
passive: passiveJob,
}; };
@@ -1,41 +1,67 @@
import type { IndexItem, Job } from "../types"; import type { IndexItem, Job } from "../types";
import { getCurrentStudentId, seqtaFetchPayload } from "../api";
import { getUserInfo } from "@/seqta/ui/AddBetterSEQTAElements";
const fetchJSON = async (url: string, body: any) => { /**
const res = await fetch(`${location.origin}${url}`, { * Resolves the active student id from whatever source is available.
method: "POST", *
credentials: "include", * The shared `getCurrentStudentId()` calls `/seqta/student/login` with a
headers: { "Content-Type": "application/json; charset=utf-8" }, * specific body shape; on some SEQTA installs that endpoint can return a
body: JSON.stringify(body), * response that confuses the helper (no `id`, or a non-"200" envelope).
}); * To make sure we never silently skip the entire assignments pass, we
return res.json(); * also fall back to `getUserInfo()` from `AddBetterSEQTAElements.ts` —
}; * it's the same handshake the host page uses to render the avatar, so
* if the user is logged in at all this path resolves.
const fetchUpcomingAssessments = async (student: number = 69) => { */
async function resolveStudentId(): Promise<number | undefined> {
try { try {
const res = await fetchJSON("/seqta/student/assessment/list/upcoming?", { const direct = await getCurrentStudentId();
student, if (typeof direct === "number" && Number.isFinite(direct)) return direct;
});
// Match analytics.rs: payload is an array, return empty array if not found
return Array.isArray(res.payload) ? res.payload : [];
} catch (e) { } catch (e) {
console.error("[Assignments job] Failed to fetch upcoming assessments:", e); console.warn(
return []; "[Assignments job] getCurrentStudentId() threw, falling back to getUserInfo()",
e,
);
} }
try {
const info = (await getUserInfo()) as { id?: unknown } | null;
const id = info?.id;
if (typeof id === "number" && Number.isFinite(id)) return id;
if (typeof id === "string" && id && Number.isFinite(Number(id))) {
return Number(id);
}
} catch (e) {
console.warn("[Assignments job] getUserInfo() fallback failed:", e);
}
return undefined;
}
const fetchUpcomingAssessments = async (student: number) => {
const payload = await seqtaFetchPayload<any[]>(
"/seqta/student/assessment/list/upcoming",
{ student },
);
return Array.isArray(payload) ? payload : [];
}; };
const fetchSubjects = async () => { const fetchSubjects = async () => {
try { // SEQTA accepts both `{}` and `{ mode: "list" }` here; the latter is the
const res = await fetchJSON("/seqta/student/load/subjects?", {}); // shape every BetterSEQTA-Plus path uses elsewhere and is the more
return res.payload // reliable response format on schools that customize the `student/load`
?.filter((s: any) => s.active === 1) // endpoint.
?.flatMap((s: any) => s.subjects) || []; const payload = await seqtaFetchPayload<any[]>(
} catch (e) { "/seqta/student/load/subjects",
console.error("[Assignments job] Failed to fetch subjects:", e); { mode: "list" },
return []; );
} if (!Array.isArray(payload)) return [];
return payload
.filter((s: any) => s && s.active === 1)
.flatMap((s: any) => (Array.isArray(s.subjects) ? s.subjects : []));
}; };
const fetchPastAssessments = async (student: number = 69, subjects: any[]) => { const fetchPastAssessments = async (student: number, subjects: any[]) => {
const map: Record<number, any> = {}; const map: Record<number, any> = {};
// Fetch past assessments for all subjects in parallel (like assessmentsOverview does) // Fetch past assessments for all subjects in parallel (like assessmentsOverview does)
@@ -43,12 +69,16 @@ const fetchPastAssessments = async (student: number = 69, subjects: any[]) => {
await Promise.all( await Promise.all(
subjects.map(async (subject) => { subjects.map(async (subject) => {
try { try {
// Match analytics.rs exactly: parameter order is programme, metaclass, student const payload = await seqtaFetchPayload<any>(
const res = await fetchJSON("/seqta/student/assessment/list/past?", { "/seqta/student/assessment/list/past",
{
programme: subject.programme, programme: subject.programme,
metaclass: subject.metaclass, metaclass: subject.metaclass,
student, student,
}); },
);
if (!payload) return;
// Past assessments API can return data in payload.tasks OR payload.pending (or both) // Past assessments API can return data in payload.tasks OR payload.pending (or both)
// Based on analytics.rs fetch_past_assessments, we need to check both arrays // Based on analytics.rs fetch_past_assessments, we need to check both arrays
@@ -68,13 +98,13 @@ const fetchPastAssessments = async (student: number = 69, subjects: any[]) => {
// Match analytics.rs: Check both pending and tasks arrays // Match analytics.rs: Check both pending and tasks arrays
// Check for pending array first (matching Rust code order) // Check for pending array first (matching Rust code order)
if (res.payload?.pending && Array.isArray(res.payload.pending)) { if (payload?.pending && Array.isArray(payload.pending)) {
res.payload.pending.forEach(processAssessment); payload.pending.forEach(processAssessment);
} }
// Check for tasks array // Check for tasks array
if (res.payload?.tasks && Array.isArray(res.payload.tasks)) { if (payload?.tasks && Array.isArray(payload.tasks)) {
res.payload.tasks.forEach(processAssessment); payload.tasks.forEach(processAssessment);
} }
} catch (e) { } catch (e) {
console.warn(`[Assignments job] Failed to fetch past assessments for subject ${subject.code || subject.subject || 'unknown'}:`, e); console.warn(`[Assignments job] Failed to fetch past assessments for subject ${subject.code || subject.subject || 'unknown'}:`, e);
@@ -126,9 +156,27 @@ export const assignmentsJob: Job = {
const existingItems = await ctx.getStoredItems("assignments"); const existingItems = await ctx.getStoredItems("assignments");
const existingIds = new Set(existingItems.map((i) => i.id)); const existingIds = new Set(existingItems.map((i) => i.id));
const student = 69; // TODO: Get from context if available // Resolve the active student id from the live SEQTA session. Historically
// this was hard-coded to 69, which only happens to be correct on a few
// local dev instances; the shared helper now reuses the same `login`
// handshake that the host page performs so every install gets the right
// value without configuration.
//
// We *throw* instead of returning [] when resolution fails, so the
// indexer's "lastRun" meta is NOT updated. Otherwise the job would be
// marked complete (with zero items) and `shouldRun` would skip it for
// the entire 24h frequency window — meaning a single bad page load
// could leave the user without any assessment results until tomorrow.
const student = await resolveStudentId();
if (typeof student !== "number") {
throw new Error(
"[Assignments job] Could not resolve current student id from /seqta/student/login. The job will retry on the next page load.",
);
}
console.debug("[Assignments job] Starting indexing - fetching all assessments (upcoming and past)..."); console.debug(
`[Assignments job] Starting indexing for student=${student} - fetching all assessments (upcoming and past)...`,
);
// Fetch data in parallel // Fetch data in parallel
const [upcoming, subjects] = await Promise.all([ const [upcoming, subjects] = await Promise.all([
@@ -0,0 +1,179 @@
import type { IndexItem, Job } from "../types";
import { seqtaFetchPayload } from "../api";
import { buildIndexItem } from "../extract";
import { htmlToPlainText } from "../utils";
/**
* Indexes per-subject course content from `/seqta/student/load/courses`.
*
* The course payload contains the lesson grid in `w[][]` where each cell's
* `l` field is a (possibly empty) HTML snippet authored by teachers. We
* concatenate these into searchable text per course, plus the course title
* and code from `t` / `c`. Embedded files referenced via TED/SEQTA URLs are
* preserved as plain-text links so users can find them by URL fragment.
*/
interface SubjectsListPayload {
code: string;
description?: string;
active: number;
subjects: Array<{
code: string;
title?: string;
description?: string;
metaclass: number;
programme: number;
}>;
}
interface CoursePayload {
c?: string;
t?: string;
i?: number;
m?: number;
w?: Array<Array<{ l?: string; h?: string; t?: string; o?: string; i?: number }>>;
document?: string;
}
const fetchActiveSubjects = async (): Promise<
SubjectsListPayload["subjects"]
> => {
const payload = await seqtaFetchPayload<SubjectsListPayload[]>(
"/seqta/student/load/subjects",
{},
);
if (!Array.isArray(payload)) return [];
const out: SubjectsListPayload["subjects"] = [];
for (const semester of payload) {
if (!semester || !Array.isArray(semester.subjects)) continue;
if (semester.active !== 1) continue;
for (const subject of semester.subjects) {
if (
subject &&
Number.isFinite(subject.programme) &&
Number.isFinite(subject.metaclass)
) {
out.push(subject);
}
}
}
return out;
};
function flattenLessonHtml(payload: CoursePayload): string {
if (!Array.isArray(payload.w)) return "";
const fragments: string[] = [];
for (const row of payload.w) {
if (!Array.isArray(row)) continue;
for (const cell of row) {
if (!cell) continue;
if (typeof cell.l === "string" && cell.l.trim()) {
fragments.push(cell.l);
}
if (typeof cell.h === "string" && cell.h.trim()) {
fragments.push(cell.h);
}
if (typeof cell.t === "string" && cell.t.trim()) {
fragments.push(cell.t);
}
if (typeof cell.o === "string" && cell.o.trim()) {
fragments.push(cell.o);
}
}
}
if (fragments.length === 0) return "";
return htmlToPlainText(fragments.join("\n"));
}
export const coursesJob: Job = {
id: "courses",
label: "Courses",
renderComponentId: "course",
// Course content rarely changes minute-to-minute but does evolve per term.
// Refresh once per day (after pageLoad cool-down) to keep new lessons
// discoverable without hammering SEQTA.
frequency: { type: "expiry", afterMs: 1000 * 60 * 60 * 24 },
boostCriteria: (item, searchTerm) => {
if (!searchTerm) return -50;
let score = 0;
if (item.metadata?.subjectCode) score += 0.05;
if (item.metadata?.isActive) score += 0.02;
return score;
},
run: async (_ctx) => {
const subjects = await fetchActiveSubjects();
if (subjects.length === 0) {
console.debug("[Courses job] No active subjects discovered.");
return [];
}
const items: IndexItem[] = [];
const seenIds = new Set<string>();
// Sequential per-subject fetch keeps load on SEQTA bounded; the shared
// API layer also limits concurrency per route as a defense in depth.
for (const subject of subjects) {
const id = `course-${subject.programme}-${subject.metaclass}`;
if (seenIds.has(id)) continue;
seenIds.add(id);
const payload = await seqtaFetchPayload<CoursePayload>(
"/seqta/student/load/courses",
{
programme: String(subject.programme),
metaclass: String(subject.metaclass),
},
);
if (!payload) continue;
const title =
(typeof payload.t === "string" && payload.t.trim()) ||
subject.title ||
subject.description ||
subject.code ||
"Course";
const lessonText = flattenLessonHtml(payload);
const courseCode =
(typeof payload.c === "string" && payload.c.trim()) || subject.code;
const summary = [courseCode, lessonText]
.filter((s) => s && s.length > 0)
.join("\n")
.slice(0, 4000);
items.push(
buildIndexItem({
id,
text: title,
category: "courses",
contentOverride: summary || `Course content for ${title}`,
metadata: {
subjectCode: subject.code,
subjectName: subject.title ?? title,
programme: subject.programme,
metaclass: subject.metaclass,
courseCode,
isActive: true,
route: `/courses/${subject.programme}:${subject.metaclass}`,
entityType: "course",
icon: "\ueb4d",
},
actionId: "course",
renderComponentId: "course",
}),
);
}
console.debug(
`[Courses job] Indexed ${items.length} courses across ${subjects.length} subjects.`,
);
return items;
},
purge: (items) => items,
};
@@ -0,0 +1,139 @@
import type { IndexItem, Job } from "../types";
import { seqtaFetchPayload } from "../api";
/**
* Indexes file metadata from `/seqta/student/load/documents`.
*
* Each top-level entry is a category containing one or more documents
* (`docs[]`). We capture the human-readable title, filename, mimetype, and
* stable UUID/category for every doc, but never download or index the
* binary content itself - the document streaming endpoint uses one-time
* JWTs that are unsafe to persist or replay.
*/
interface DocumentEntry {
file?: number | string;
filename?: string;
size?: string | number;
context_uuid?: string;
mimetype?: string;
created_date?: string;
title?: string;
uuid?: string;
created_by?: string;
}
interface DocumentCategory {
id: number | string;
category: string;
colour?: string;
docs: DocumentEntry[];
}
function prettySize(size: string | number | undefined): string | null {
if (size === undefined || size === null) return null;
const bytes = typeof size === "string" ? parseInt(size, 10) : size;
if (!Number.isFinite(bytes) || bytes <= 0) return null;
const units = ["B", "KB", "MB", "GB"];
let value = bytes;
let i = 0;
while (value >= 1024 && i < units.length - 1) {
value /= 1024;
i++;
}
return `${value.toFixed(value < 10 && i > 0 ? 1 : 0)} ${units[i]}`;
}
function describeMime(mime: string | undefined): string | null {
if (!mime) return null;
if (mime.startsWith("application/pdf")) return "PDF";
if (mime.includes("officedocument.wordprocessingml")) return "Word";
if (mime.includes("officedocument.spreadsheetml")) return "Excel";
if (mime.includes("officedocument.presentationml")) return "PowerPoint";
if (mime.startsWith("image/")) return "Image";
if (mime.startsWith("video/")) return "Video";
if (mime.startsWith("audio/")) return "Audio";
return null;
}
export const documentsJob: Job = {
id: "documents",
label: "Documents",
renderComponentId: "document",
frequency: { type: "expiry", afterMs: 1000 * 60 * 60 * 12 }, // 12 hours
boostCriteria: (_item, searchTerm) => {
if (!searchTerm) return -20;
return 0;
},
run: async (_ctx) => {
const payload = await seqtaFetchPayload<DocumentCategory[] | null>(
"/seqta/student/load/documents",
{},
);
if (!Array.isArray(payload)) return [];
const items: IndexItem[] = [];
const seen = new Set<string>();
for (const category of payload) {
if (!category || !Array.isArray(category.docs)) continue;
for (const doc of category.docs) {
const uuid = doc.uuid || doc.context_uuid;
if (!uuid && !doc.file) continue;
const id = `document-${uuid ?? doc.file}`;
if (seen.has(id)) continue;
seen.add(id);
const title =
doc.title?.trim() ||
doc.filename?.trim() ||
`Document ${doc.file ?? uuid}`;
const sizeText = prettySize(doc.size);
const mimeLabel = describeMime(doc.mimetype);
const contentParts: string[] = [];
if (doc.filename && doc.filename !== title) contentParts.push(doc.filename);
if (category.category) contentParts.push(`Category: ${category.category}`);
if (mimeLabel) contentParts.push(mimeLabel);
if (sizeText) contentParts.push(sizeText);
if (doc.created_date) contentParts.push(`Added ${doc.created_date}`);
const dateAdded = doc.created_date
? new Date(doc.created_date).getTime() || Date.now()
: Date.now();
items.push({
id,
text: title,
category: "documents",
content: contentParts.join(" \u2022 "),
dateAdded,
metadata: {
documentUuid: uuid,
fileId: doc.file,
filename: doc.filename,
mimetype: doc.mimetype,
sizeBytes:
typeof doc.size === "string" ? parseInt(doc.size, 10) : doc.size,
categoryId: category.id,
categoryName: category.category,
createdDate: doc.created_date,
entityType: "document",
route: "/documents",
icon: "\ueb6f",
},
actionId: "document",
renderComponentId: "document",
});
}
}
console.debug(`[Documents job] Indexed ${items.length} document entries.`);
return items;
},
purge: (items) => items,
};
@@ -0,0 +1,134 @@
import type { IndexItem, Job } from "../types";
import { seqtaFetchPayload } from "../api";
import { htmlToPlainText } from "../utils";
import { delay } from "@/seqta/utils/delay";
/**
* Indexes student folio entries from `/seqta/student/folio`.
*
* The list mode returns `{ me, list: [{ id, title, published, student }] }`,
* and the load mode returns the full body via `{ contents, files, ... }`.
* Folio bodies frequently contain `[[embed:raw|<html>]]` blocks which we
* normalize to plain text before indexing - the htmlToPlainText sanitizer
* never executes scripts because it parses into an inert document.
*/
interface FolioListPayload {
me?: string;
list?: Array<{
id: number | string;
title?: string;
published?: string;
student?: string;
}>;
}
interface FolioEntryPayload {
forum?: number;
contents?: string;
created?: string;
allow_comments?: boolean;
author?: { name?: string; year?: string; id?: number };
files?: unknown[];
id?: number | string;
published?: string;
title?: string;
updated?: string;
}
const PER_ITEM_DELAY_MS = 80;
function stripEmbedRaw(text: string): string {
if (!text) return "";
return text.replace(/\[\[embed:raw\|([\s\S]*?)\]\]/g, (_match, inner) => {
return htmlToPlainText(typeof inner === "string" ? inner : "");
});
}
export const folioJob: Job = {
id: "folio",
label: "Folio",
renderComponentId: "folio",
frequency: { type: "expiry", afterMs: 1000 * 60 * 60 * 24 },
boostCriteria: (_item, searchTerm) => {
if (!searchTerm) return -30;
return 0;
},
run: async (ctx) => {
const stored = await ctx.getStoredItems("folio");
const existing = new Map(stored.map((i) => [i.id, i]));
const list = await seqtaFetchPayload<FolioListPayload | null>(
"/seqta/student/folio",
{ mode: "list", page: 0, filters: {} },
);
if (!list || !Array.isArray(list.list)) return [];
const items: IndexItem[] = [];
for (const entry of list.list) {
if (!entry || entry.id === undefined) continue;
const id = `folio-${entry.id}`;
const dateAdded = entry.published
? new Date(entry.published).getTime() || Date.now()
: Date.now();
// If we already have this folio and the title hasn't changed, reuse
// the stored content instead of paying for another /folio?mode=load.
const existingItem = existing.get(id);
const titleChanged = existingItem && existingItem.text !== (entry.title ?? "");
if (existingItem && !titleChanged) {
items.push({
...existingItem,
dateAdded,
});
continue;
}
try {
const detail = await seqtaFetchPayload<FolioEntryPayload | null>(
"/seqta/student/folio",
{ mode: "load", id: entry.id },
);
const rawContents = detail?.contents ?? "";
const flattened = stripEmbedRaw(rawContents);
const content = flattened.slice(0, 4000);
items.push({
id,
text: entry.title?.trim() || `Folio ${entry.id}`,
category: "folio",
content,
dateAdded,
metadata: {
folioId: entry.id,
student: list.me ?? entry.student,
publishedAt: entry.published,
updatedAt: detail?.updated,
createdAt: detail?.created,
authorName: detail?.author?.name,
authorId: detail?.author?.id,
forumId: detail?.forum,
allowComments: detail?.allow_comments,
fileCount: Array.isArray(detail?.files) ? detail!.files!.length : 0,
entityType: "folio",
route: "/folios/read",
icon: "\ueb16",
},
actionId: "folio",
renderComponentId: "folio",
});
} catch (e) {
console.warn(`[Folio job] Failed to load folio ${entry.id}:`, e);
}
await delay(PER_ITEM_DELAY_MS);
}
console.debug(`[Folio job] Indexed ${items.length} folio entries.`);
return items;
},
purge: (items) => items,
};
@@ -0,0 +1,109 @@
import type { IndexItem, Job } from "../types";
import { seqtaFetchPayload } from "../api";
import { extractTextFromValue } from "../extract";
import { delay } from "@/seqta/utils/delay";
/**
* Indexes student goals from `/seqta/student/load/goals`.
*
* The endpoint exposes `mode: "years"` which returns the list of available
* years and `mode: "list"` (per-year) which returns the actual goals. We
* gracefully degrade if the school has goals disabled (the years payload
* is empty in that case).
*/
interface GoalEntry {
id?: number | string;
uuid?: string;
title?: string;
description?: string;
status?: string;
year?: number | string;
created?: string;
updated?: string;
}
const PER_YEAR_DELAY_MS = 80;
export const goalsJob: Job = {
id: "goals",
label: "Goals",
renderComponentId: "goal",
frequency: { type: "expiry", afterMs: 1000 * 60 * 60 * 24 * 3 }, // every 3 days
boostCriteria: (_item, searchTerm) => {
if (!searchTerm) return -40;
return 0;
},
run: async (_ctx) => {
const years = await seqtaFetchPayload<Array<string | number> | null>(
"/seqta/student/load/goals",
{ mode: "years" },
);
if (!Array.isArray(years) || years.length === 0) {
console.debug("[Goals job] No goal years available; skipping.");
return [];
}
const items: IndexItem[] = [];
const seen = new Set<string>();
for (const year of years) {
try {
const yearGoals = await seqtaFetchPayload<GoalEntry[] | null>(
"/seqta/student/load/goals",
{ mode: "list", year },
);
if (!Array.isArray(yearGoals)) continue;
for (const goal of yearGoals) {
if (!goal) continue;
const stableId = goal.uuid ?? goal.id;
if (stableId === undefined || stableId === null) continue;
const id = `goal-${stableId}`;
if (seen.has(id)) continue;
seen.add(id);
const title =
goal.title?.trim() || goal.description?.slice(0, 80) || `Goal ${stableId}`;
const dateAdded = goal.updated || goal.created
? new Date(goal.updated ?? goal.created!).getTime() || Date.now()
: Date.now();
items.push({
id,
text: title,
category: "goals",
content: extractTextFromValue(
{ description: goal.description, status: goal.status },
{ maxChars: 1000 },
),
dateAdded,
metadata: {
goalId: goal.id,
goalUuid: goal.uuid,
status: goal.status,
year: goal.year ?? year,
createdAt: goal.created,
updatedAt: goal.updated,
entityType: "goal",
route: `/goals/${year}`,
icon: "\uea15",
},
actionId: "goal",
renderComponentId: "goal",
});
}
} catch (e) {
console.warn(`[Goals job] Failed to fetch goals for year ${year}:`, e);
}
await delay(PER_YEAR_DELAY_MS);
}
console.debug(`[Goals job] Indexed ${items.length} goal entries.`);
return items;
},
purge: (items) => items,
};
@@ -0,0 +1,218 @@
import type { IndexItem, Job } from "../types";
import { seqtaFetchPayload } from "../api";
import { htmlToPlainText } from "../utils";
import { delay } from "@/seqta/utils/delay";
/**
* Indexes daily notices from `/seqta/student/load/notices`.
*
* SEQTA returns notices keyed by date, so we sweep a sliding window
* (default: 14 days back) the first time we run, then incrementally pull
* the most recent days on subsequent runs. Sensitive routes are excluded
* because notices are surfaced for the active student already.
*/
interface NoticeRecord {
id?: number | string;
title?: string;
contents?: string;
staff?: string;
staff_id?: number;
date?: string;
label?: number;
label_title?: string;
colour?: string;
}
interface NoticesProgress {
earliestDate: string | null;
lastSweepBackTo: string | null;
}
const SWEEP_DAYS = 14;
const MAX_HISTORY_DAYS = 365;
const FETCH_DELAY_MS = 60;
function formatYmd(date: Date): string {
const y = date.getFullYear();
const m = (date.getMonth() + 1).toString().padStart(2, "0");
const d = date.getDate().toString().padStart(2, "0");
return `${y}-${m}-${d}`;
}
function parseYmd(value: string | null | undefined): Date | null {
if (!value) return null;
const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(value);
if (!match) return null;
const [, y, m, d] = match;
return new Date(Number(y), Number(m) - 1, Number(d));
}
const fetchNoticesForDate = async (date: string): Promise<NoticeRecord[]> => {
const payload = await seqtaFetchPayload<NoticeRecord[] | { notices?: NoticeRecord[] } | null>(
"/seqta/student/load/notices",
{ date },
);
if (!payload) return [];
if (Array.isArray(payload)) return payload;
if (Array.isArray((payload as any).notices)) return (payload as any).notices;
return [];
};
const fetchLabelLookup = async (): Promise<Map<number, string>> => {
const payload = await seqtaFetchPayload<
Array<{ id: number; title?: string }>
>("/seqta/student/load/notices", { mode: "labels" });
const map = new Map<number, string>();
if (Array.isArray(payload)) {
for (const entry of payload) {
if (entry && typeof entry.id === "number" && entry.title) {
map.set(entry.id, entry.title);
}
}
}
return map;
};
export const noticesJob: Job = {
id: "notices",
label: "Notices",
renderComponentId: "notice",
frequency: { type: "expiry", afterMs: 1000 * 60 * 60 * 6 }, // 6 hours
boostCriteria: (item, searchTerm) => {
if (!searchTerm) return -10;
let score = 0;
const ts = item.metadata?.timestamp;
if (typeof ts === "string") {
const ageDays =
(Date.now() - new Date(ts).getTime()) / (1000 * 60 * 60 * 24);
if (ageDays >= 0 && ageDays <= 7) score += 0.05;
}
return score;
},
run: async (ctx) => {
const stored = await ctx.getStoredItems("notices");
const existingIds = new Set(stored.map((i) => i.id));
const progress = (await ctx.getProgress<NoticesProgress>()) ?? {
earliestDate: null,
lastSweepBackTo: null,
};
const labelLookup = await fetchLabelLookup();
const today = new Date();
today.setHours(0, 0, 0, 0);
// Sweep window: always the most recent SWEEP_DAYS, plus extend further
// back the first time we run until we hit MAX_HISTORY_DAYS.
const earliestEverIso = formatYmd(
new Date(today.getTime() - MAX_HISTORY_DAYS * 86_400_000),
);
const dates: string[] = [];
for (let offset = 0; offset < SWEEP_DAYS; offset++) {
const day = new Date(today.getTime() - offset * 86_400_000);
dates.push(formatYmd(day));
}
if (
!progress.lastSweepBackTo ||
progress.lastSweepBackTo > earliestEverIso
) {
// Walk backwards in batches of ~30 days per run so we don't blow up
// a single indexing pass.
const startBack = parseYmd(progress.lastSweepBackTo) ?? today;
const targetBack = new Date(startBack.getTime() - 30 * 86_400_000);
const minBack = parseYmd(earliestEverIso) ?? targetBack;
const stopBack = targetBack < minBack ? minBack : targetBack;
for (
let cursor = new Date(startBack.getTime() - SWEEP_DAYS * 86_400_000);
cursor >= stopBack;
cursor = new Date(cursor.getTime() - 86_400_000)
) {
dates.push(formatYmd(cursor));
}
progress.lastSweepBackTo = formatYmd(stopBack);
}
const items: IndexItem[] = [];
const seen = new Set<string>();
for (const date of dates) {
try {
const notices = await fetchNoticesForDate(date);
for (const notice of notices) {
if (!notice || (notice.id === undefined && !notice.title)) continue;
const id = `notice-${date}-${notice.id ?? notice.title}`;
if (seen.has(id)) continue;
seen.add(id);
const labelTitle =
notice.label_title ??
(typeof notice.label === "number"
? labelLookup.get(notice.label) ?? null
: null);
const bodyText = notice.contents
? htmlToPlainText(notice.contents)
: "";
items.push({
id,
text: notice.title?.trim() || `Notice ${notice.id ?? date}`,
category: "notices",
content: bodyText.slice(0, 4000),
dateAdded: new Date(date).getTime(),
metadata: {
noticeId: notice.id,
date,
author: notice.staff,
authorId: notice.staff_id,
label: labelTitle,
labelId: notice.label,
colour: notice.colour,
timestamp: date,
entityType: "notice",
route: "/notices",
icon: "\ueb24",
},
actionId: "notice",
renderComponentId: "notice",
});
}
} catch (e) {
console.warn(`[Notices job] Failed to fetch notices for ${date}:`, e);
}
await delay(FETCH_DELAY_MS);
}
if (items.length > 0) {
const dateStrings = items
.map((i) => i.metadata?.date as string | undefined)
.filter((d): d is string => !!d);
if (dateStrings.length > 0) {
const earliest = dateStrings.sort()[0];
if (
!progress.earliestDate ||
earliest < progress.earliestDate
) {
progress.earliestDate = earliest;
}
}
}
await ctx.setProgress(progress);
const newCount = items.filter((i) => !existingIds.has(i.id)).length;
console.debug(
`[Notices job] Indexed ${items.length} notices across ${dates.length} dates (${newCount} new).`,
);
return items;
},
purge: (items) => {
const oneYearAgo = Date.now() - 365 * 24 * 60 * 60 * 1000;
return items.filter((i) => i.dateAdded >= oneYearAgo);
},
};
@@ -0,0 +1,49 @@
import type { Job } from "../types";
/**
* Stub job for the passive-observer store.
*
* The passive observer (see `passiveObserver.ts`) writes captured items
* directly into IndexedDB via `getAll`/`put`. We still register a job here
* so the indexer:
* - Creates the `passive` object store on first use.
* - Picks up the right `renderComponentId` when materializing in-memory
* items in `loadAllStoredItems()`.
* - Applies a deterministic boost / purge policy to passive results.
*
* `run()` is a no-op: the passive observer has its own write path so it
* works whether or not an active indexing pass is running.
*/
export const passiveJob: Job = {
id: "passive",
label: "Recently viewed",
renderComponentId: "passive",
// Run frequently so any newly captured items are merged into the
// dynamic-items cache on the next indexing tick. The actual capture is
// continuous; this is only the synchronization cadence.
frequency: { type: "interval", ms: 1000 * 60 * 5 },
boostCriteria: (item, searchTerm) => {
// Passive items are noisier than curated ones, so penalize them
// slightly when there's no query and only modestly help on matches.
if (!searchTerm) return -60;
let score = 0;
if (item.metadata?.entityType) score += 0.02;
return score;
},
run: async () => {
return [];
},
purge: (items) => {
// Keep the most recent ~500 passive entries and anything newer than
// 30 days. This caps storage growth from heavy browsing sessions.
const cutoff = Date.now() - 30 * 24 * 60 * 60 * 1000;
const recent = items
.filter((i) => i.dateAdded >= cutoff)
.sort((a, b) => b.dateAdded - a.dateAdded)
.slice(0, 500);
return recent;
},
};
@@ -0,0 +1,90 @@
import type { IndexItem, Job } from "../types";
import { seqtaFetchPayload } from "../api";
/**
* Indexes the user's external portal entries from `/seqta/student/load/portals`.
*
* Portals are user-facing tiles linking to third-party tools (Mathletics,
* Seesaw, Google Classroom, ...). We index their labels and external URLs
* so users can jump to them via the global search palette without scrolling
* the dashboard.
*/
interface PortalPayload {
id: number | string;
label?: string;
url?: string;
uuid?: string;
icon?: string;
priority?: number;
is_power_portal?: boolean;
contents?: string;
inherit_styles?: boolean;
}
function normalizePortalUrl(raw: string | undefined): string | undefined {
if (!raw) return undefined;
const trimmed = raw.trim();
if (!trimmed) return undefined;
if (/^https?:\/\//i.test(trimmed)) return trimmed;
return `https://${trimmed.replace(/^\/+/, "")}`;
}
export const portalsJob: Job = {
id: "portals",
label: "Portals",
renderComponentId: "portal",
frequency: { type: "expiry", afterMs: 1000 * 60 * 60 * 24 * 7 }, // weekly
boostCriteria: (_item, searchTerm) => {
if (!searchTerm) return -50;
return 0;
},
run: async (_ctx) => {
const payload = await seqtaFetchPayload<PortalPayload[] | null>(
"/seqta/student/load/portals",
{},
);
if (!Array.isArray(payload)) return [];
const items: IndexItem[] = [];
const seen = new Set<string>();
for (const portal of payload) {
if (!portal || (portal.id === undefined && !portal.uuid)) continue;
const id = `portal-${portal.uuid ?? portal.id}`;
if (seen.has(id)) continue;
seen.add(id);
const url = normalizePortalUrl(portal.url);
const label = portal.label?.trim() || `Portal ${portal.id}`;
const contentParts: string[] = [];
if (url) contentParts.push(url);
if (portal.is_power_portal) contentParts.push("Power Portal");
items.push({
id,
text: label,
category: "portals",
content: contentParts.join(" \u2022 "),
dateAdded: Date.now(),
metadata: {
portalId: portal.id,
portalUuid: portal.uuid,
url,
isPowerPortal: !!portal.is_power_portal,
entityType: "portal",
icon: "\ueb01",
},
actionId: "portal",
renderComponentId: "portal",
});
}
console.debug(`[Portals job] Indexed ${items.length} portal entries.`);
return items;
},
purge: (items) => items,
};
@@ -0,0 +1,97 @@
import type { IndexItem, Job } from "../types";
import { seqtaFetchPayload } from "../api";
/**
* Indexes report metadata from `/seqta/student/load/reports`.
*
* Reports are PDFs gated behind SEQTA's authenticated download endpoint, so
* we only index the human-readable metadata (year, term, title, file UUID)
* and a stable hash route so the search palette can deep-link straight
* into the reports page.
*/
interface ReportEntry {
id?: number | string;
uuid?: string;
title?: string;
description?: string;
date_published?: string;
date_created?: string;
year?: number | string;
term?: number | string;
metaclass?: number;
programme?: number;
filename?: string;
}
export const reportsJob: Job = {
id: "reports",
label: "Reports",
renderComponentId: "report",
frequency: { type: "expiry", afterMs: 1000 * 60 * 60 * 24 }, // daily
boostCriteria: (_item, searchTerm) => {
if (!searchTerm) return -25;
return 0;
},
run: async (_ctx) => {
const payload = await seqtaFetchPayload<ReportEntry[] | null>(
"/seqta/student/load/reports",
{},
);
if (!Array.isArray(payload)) return [];
const items: IndexItem[] = [];
const seen = new Set<string>();
for (const report of payload) {
if (!report) continue;
const stableId = report.uuid ?? report.id;
if (stableId === undefined || stableId === null) continue;
const id = `report-${stableId}`;
if (seen.has(id)) continue;
seen.add(id);
const title = report.title?.trim() || `Report ${stableId}`;
const dateAdded = report.date_published
? new Date(report.date_published).getTime() || Date.now()
: Date.now();
const contentParts: string[] = [];
if (report.description) contentParts.push(report.description);
if (report.year) contentParts.push(`Year ${report.year}`);
if (report.term) contentParts.push(`Term ${report.term}`);
if (report.date_published) contentParts.push(report.date_published);
items.push({
id,
text: title,
category: "reports",
content: contentParts.join(" \u2022 "),
dateAdded,
metadata: {
reportId: report.id,
reportUuid: report.uuid,
year: report.year,
term: report.term,
metaclass: report.metaclass,
programme: report.programme,
publishedAt: report.date_published,
createdAt: report.date_created,
filename: report.filename,
entityType: "report",
route: "/reports",
icon: "\ueb70",
},
actionId: "report",
renderComponentId: "report",
});
}
console.debug(`[Reports job] Indexed ${items.length} reports.`);
return items;
},
purge: (items) => items,
};
@@ -0,0 +1,583 @@
import type { IndexItem } from "./types";
import { put, getAll } from "./db";
import {
buildIndexItem,
extractTextFromValue,
pickId,
pickTitle,
} from "./extract";
import { isSensitiveSeqtaPath, normalizeSeqtaPath } from "./api";
import { loadAllStoredItems } from "./indexer";
import { loadDynamicItems } from "../utils/dynamicItems";
import { renderComponentMap } from "./renderComponents";
import { jobs } from "./jobs";
/**
* Passive network observer.
*
* Wraps the page's `fetch` (and best-effort `XMLHttpRequest`) so that any
* successful same-origin SEQTA JSON response observed while the user
* browses is opportunistically distilled into IndexItems and persisted to
* the `passive` object store.
*
* Hard guarantees:
* - Only same-origin requests under `/seqta/student/` are considered.
* - The shared sensitive-route denylist (login, save/*, settings, prefs,
* heartbeat, branding, themes, file streams, masquerade, ...) is checked
* before any persistence.
* - Response bodies are read via `Response.clone()` so we never consume the
* body the host page intends to use.
* - Sensitive keys/values are stripped via `redactSensitive` before the
* item is stored.
* - Binary file contents are never indexed (we only work on JSON responses
* served as `text/json` / `application/json`).
*/
const STORE_ID = "passive";
const FLUSH_DEBOUNCE_MS = 1500;
const MAX_ITEMS_PER_RESPONSE = 50;
const MAX_PER_RESPONSE_TEXT_CHARS = 1500;
let installed = false;
let pendingFlush: ReturnType<typeof setTimeout> | null = null;
let pendingDirty = false;
export function isPassiveObserverInstalled(): boolean {
return installed;
}
/* ------------------------------------------------------------------ */
/* eligibility checks */
/* ------------------------------------------------------------------ */
function isSameOriginSeqtaUrl(url: string): boolean {
try {
const parsed = new URL(url, location.origin);
if (parsed.origin !== location.origin) return false;
return parsed.pathname.startsWith("/seqta/student/");
} catch {
return false;
}
}
function looksLikeJsonContentType(contentType: string | null): boolean {
if (!contentType) return false;
return /json/i.test(contentType);
}
/* ------------------------------------------------------------------ */
/* item synthesis */
/* ------------------------------------------------------------------ */
interface CapturedContext {
route: string;
requestBody: unknown;
observedAt: number;
}
function categoryFromRoute(route: string): string {
// /seqta/student/load/courses -> courses
// /seqta/student/load/message -> message
const tail = route.replace(/^\/seqta\/student\//, "").split("/").filter(Boolean);
if (tail.length === 0) return "passive";
// message/people is a support endpoint that backs the messages compose UI.
// We treat it as a low-priority `messages-support` record rather than a
// standalone "people" category so it never competes with real assessments
// / messages in the result list.
if (route.includes("/load/message/people")) return "messages-support";
return tail[tail.length - 1].toLowerCase();
}
/**
* `/seqta/student/load/message/people` returns the contact picker dataset
* used by the messages compose view. We only want to surface entries that
* actually carry a human display name — the rest is structural noise that
* historically caused raw API paths to appear as titles.
*/
function isPeopleEntityWorthIndexing(entity: unknown): boolean {
if (!entity || typeof entity !== "object") return false;
const obj = entity as Record<string, unknown>;
const first = stringField(obj, [
"preferredName",
"preferred",
"firstname",
"firstName",
"first_name",
"given",
"givenName",
]);
const last = stringField(obj, [
"surname",
"lastname",
"lastName",
"last_name",
"familyName",
]);
const display = stringField(obj, ["displayName", "name", "fullName"]);
return Boolean((first && last) || display);
}
function sourcePageForRoute(route: string): string | undefined {
if (route.includes("/load/message/people")) return "/messages";
if (route.includes("/load/message")) return "/messages";
if (route.includes("/load/messages")) return "/messages";
if (route.includes("/load/courses")) return "/courses";
if (route.includes("/load/assessments")) return "/assessments/upcoming";
if (route.includes("/load/notices")) return "/notices";
if (route.includes("/load/documents")) return "/documents";
if (route.includes("/folio")) return "/folios/read";
if (route.includes("/load/forums")) return "/forums";
if (route.includes("/load/goals")) return "/goals";
if (route.includes("/load/reports")) return "/reports";
if (route.includes("/load/portals")) return "/dashboard";
return undefined;
}
function entitiesFromPayload(payload: unknown): unknown[] {
if (Array.isArray(payload)) return payload;
if (payload && typeof payload === "object") {
const obj = payload as Record<string, unknown>;
// SEQTA frequently nests arrays as `payload.list`, `.messages`,
// `.items`, `.tasks`, etc. Pull the first array-shaped child as our
// best guess; if none exists, fall back to the object itself so we
// still index a single entry.
for (const key of [
"list",
"items",
"messages",
"tasks",
"pending",
"forums",
"docs",
]) {
const value = obj[key];
if (Array.isArray(value)) return value;
}
return [payload];
}
return [];
}
/**
* Whitelist of entity-shaped fields we hoist into item metadata so the
* `passive` action handler can deep-link into the right SEQTA SPA route.
* These mirror what the active jobs already store (see `courses.ts`,
* `portals.ts`, etc.) so the action only has to consult one source.
*/
const DEEP_LINK_FIELDS = [
"programme",
"programmeId",
"programmeID",
"metaclass",
"metaclassId",
"metaclassID",
"year",
"uuid",
"portalUuid",
"forum",
"forumId",
"assessmentId",
"assessmentID",
"messageId",
] as const;
function pickDeepLinkHints(
entity: unknown,
): Record<string, string | number> {
if (!entity || typeof entity !== "object") return {};
const src = entity as Record<string, unknown>;
const out: Record<string, string | number> = {};
for (const key of DEEP_LINK_FIELDS) {
const value = src[key];
if (typeof value === "number" && Number.isFinite(value)) {
out[key] = value;
} else if (typeof value === "string" && value) {
out[key] = value;
}
}
return out;
}
function stringField(
entity: Record<string, unknown>,
keys: readonly string[],
): string | undefined {
for (const key of keys) {
const value = entity[key];
if (typeof value === "string" && value.trim()) return value.trim();
}
return undefined;
}
function titleFromEndpoint(
route: string,
entity: unknown,
extractedContent: string,
fallback: string,
): string {
if (route.includes("/load/message/people") && entity && typeof entity === "object") {
const obj = entity as Record<string, unknown>;
const first = stringField(obj, [
"preferredName",
"preferred",
"firstname",
"firstName",
"first_name",
"given",
"givenName",
]);
const last = stringField(obj, [
"surname",
"lastname",
"lastName",
"last_name",
"familyName",
]);
const full = [first, last].filter(Boolean).join(" ").trim();
if (full) return full.slice(0, 200);
}
const picked = pickTitle(entity, "");
if (picked) return picked.slice(0, 200);
// Last resort: show a human-readable content preview instead of a raw API
// path like `/seqta/student/load/message/people#20`.
const firstLine = extractedContent
.split(/\r?\n/)
.map((line) => line.trim())
.find(Boolean);
return (firstLine || fallback).slice(0, 200);
}
function synthesizeItems(
ctx: CapturedContext,
payload: unknown,
): IndexItem[] {
const entities = entitiesFromPayload(payload);
if (entities.length === 0) return [];
const category = categoryFromRoute(ctx.route);
const now = ctx.observedAt;
const out: IndexItem[] = [];
const isPeopleSupport = ctx.route.includes("/load/message/people");
const limit = Math.min(entities.length, MAX_ITEMS_PER_RESPONSE);
for (let i = 0; i < limit; i++) {
const entity = entities[i];
if (!entity || (typeof entity !== "object" && typeof entity !== "string")) {
continue;
}
// For the messages compose-people endpoint, skip records that don't
// carry a real human name. We never want raw entries like
// `/seqta/student/load/message/people#20` becoming titles, and we
// explicitly route the rest to /messages so they're treated as support
// records, not standalone "people" results.
if (isPeopleSupport && !isPeopleEntityWorthIndexing(entity)) {
continue;
}
const fallbackId = `${ctx.route}#${i}`;
const entityId = pickId(entity, fallbackId);
const stableId = `passive-${ctx.route.replace(/\//g, "_")}-${entityId}`;
const content = extractTextFromValue(entity, {
maxChars: MAX_PER_RESPONSE_TEXT_CHARS,
});
const title = titleFromEndpoint(ctx.route, entity, content, fallbackId);
if (!content && (!title || title === fallbackId)) {
// Skip records that produced neither title nor content; they are
// structurally noise (e.g. tiny acknowledgement payloads).
continue;
}
const deepLinkHints = pickDeepLinkHints(entity);
const sourcePage = sourcePageForRoute(ctx.route);
out.push(
buildIndexItem({
id: stableId,
text: title,
category,
contentOverride: content,
metadata: {
route: ctx.route,
source: "passive",
observedAt: new Date(now).toISOString(),
entityType: category,
entityId,
icon: "\ueb71",
sourcePage,
// Mark message/people as a low-priority support record so the
// search ranker can deprioritize it relative to real messages,
// assessments, courses, etc.
...(isPeopleSupport ? { supportRecord: true, priority: "low" } : {}),
...deepLinkHints,
},
actionId: "passive",
renderComponentId: "passive",
dateAdded: now,
}),
);
}
return out;
}
/* ------------------------------------------------------------------ */
/* persistence */
/* ------------------------------------------------------------------ */
async function persistItems(items: IndexItem[]): Promise<void> {
if (items.length === 0) return;
// Dedupe against existing entries. We replace on collision so the latest
// observation wins (e.g. if a message changes title).
for (const item of items) {
try {
await put(STORE_ID, item, item.id);
} catch (e) {
console.warn(
`[Passive Observer] Failed to persist item ${item.id}:`,
e,
);
}
}
pendingDirty = true;
scheduleFlush();
}
function scheduleFlush() {
if (pendingFlush) return;
pendingFlush = setTimeout(() => {
pendingFlush = null;
if (!pendingDirty) return;
pendingDirty = false;
void flushDynamicItems();
}, FLUSH_DEBOUNCE_MS);
}
async function flushDynamicItems(): Promise<void> {
try {
const all = await loadAllStoredItems();
const decorated = all.map((item) => {
try {
const jobDef =
jobs[item.category] ||
Object.values(jobs).find((j) => j.id === item.category) ||
jobs[item.renderComponentId];
let renderComponent = item.renderComponent;
if (jobDef) {
renderComponent =
renderComponentMap[jobDef.renderComponentId] || renderComponent;
} else if (renderComponentMap[item.renderComponentId]) {
renderComponent = renderComponentMap[item.renderComponentId];
}
try {
const cloned = JSON.parse(JSON.stringify(item));
cloned.renderComponent = renderComponent;
return cloned;
} catch {
return { ...item, renderComponent };
}
} catch {
return item;
}
});
loadDynamicItems(decorated);
window.dispatchEvent(
new CustomEvent("dynamic-items-updated", {
detail: {
incremental: true,
jobId: STORE_ID,
streaming: false,
},
}),
);
} catch (e) {
console.warn("[Passive Observer] Failed to refresh dynamic items:", e);
}
}
/* ------------------------------------------------------------------ */
/* fetch hook */
/* ------------------------------------------------------------------ */
async function consumeResponse(
response: Response,
url: string,
requestBody: unknown,
): Promise<void> {
if (!response.ok) return;
const route = normalizeSeqtaPath(url);
if (isSensitiveSeqtaPath(route)) return;
const contentType = response.headers.get("content-type");
if (!looksLikeJsonContentType(contentType)) return;
let body: any;
try {
body = await response.clone().json();
} catch {
return;
}
if (!body || typeof body !== "object") return;
if (body.status && body.status !== "200") return;
const payload = body.payload;
if (payload === undefined || payload === null) return;
const items = synthesizeItems(
{
route,
requestBody,
observedAt: Date.now(),
},
payload,
);
if (items.length > 0) {
await persistItems(items);
}
}
function tryParseJson(value: unknown): unknown {
if (typeof value !== "string") return value;
try {
return JSON.parse(value);
} catch {
return value;
}
}
/**
* Installs the passive observer once. Subsequent calls are no-ops.
*
* Designed to be called from the global-search plugin bootstrap after
* `mountSearchBar` succeeds so the observer is only active when the
* plugin itself is enabled.
*/
export function installPassiveObserver(): void {
if (installed) return;
if (typeof window === "undefined" || typeof window.fetch !== "function") {
return;
}
installed = true;
const originalFetch = window.fetch.bind(window);
window.fetch = async function patchedFetch(
input: RequestInfo | URL,
init?: RequestInit,
): Promise<Response> {
const response = await originalFetch(input, init);
try {
const url =
typeof input === "string"
? input
: input instanceof URL
? input.toString()
: input.url;
if (isSameOriginSeqtaUrl(url)) {
const body = init?.body;
const parsed =
body && typeof body === "string"
? tryParseJson(body)
: undefined;
// Fire-and-forget: never block the host page on indexing work.
void consumeResponse(response, url, parsed);
}
} catch (e) {
// Never let observer errors bubble up to the host page.
console.debug("[Passive Observer] fetch hook error:", e);
}
return response;
};
// Best-effort XHR hook for the rare callers that bypass fetch.
const ProtoXhr = (window as any).XMLHttpRequest?.prototype;
if (ProtoXhr) {
const originalOpen = ProtoXhr.open;
const originalSend = ProtoXhr.send;
ProtoXhr.open = function patchedOpen(
this: XMLHttpRequest,
method: string,
url: string,
...rest: any[]
) {
try {
(this as any).__bsplusUrl = url;
(this as any).__bsplusMethod = method;
} catch {
/* ignore */
}
return originalOpen.call(this, method, url, ...rest);
};
ProtoXhr.send = function patchedSend(
this: XMLHttpRequest,
body?: any,
) {
try {
const url = (this as any).__bsplusUrl as string | undefined;
if (url && isSameOriginSeqtaUrl(url)) {
const parsed =
typeof body === "string" ? tryParseJson(body) : undefined;
this.addEventListener("load", () => {
try {
if (this.status < 200 || this.status >= 300) return;
const ct = this.getResponseHeader("content-type");
if (!looksLikeJsonContentType(ct)) return;
const route = normalizeSeqtaPath(url);
if (isSensitiveSeqtaPath(route)) return;
let json: any;
try {
json = JSON.parse(this.responseText);
} catch {
return;
}
if (!json || typeof json !== "object") return;
if (json.status && json.status !== "200") return;
const payload = json.payload;
if (payload === undefined || payload === null) return;
const items = synthesizeItems(
{
route,
requestBody: parsed,
observedAt: Date.now(),
},
payload,
);
if (items.length > 0) {
void persistItems(items);
}
} catch (e) {
console.debug("[Passive Observer] xhr load error:", e);
}
});
}
} catch {
/* ignore */
}
return originalSend.call(this, body);
};
}
console.debug("[Passive Observer] Installed.");
}
/**
* Returns currently-stored passive items. Mainly used for diagnostics from
* `window.globalSearchDebug`.
*/
export async function getStoredPassiveItems(): Promise<IndexItem[]> {
try {
return (await getAll(STORE_ID)) as IndexItem[];
} catch {
return [];
}
}
@@ -2,10 +2,23 @@ import type { SvelteComponent } from "svelte";
import AssessmentItem from "../components/items/AssessmentItem.svelte"; import AssessmentItem from "../components/items/AssessmentItem.svelte";
import ForumItem from "../components/items/ForumItem.svelte"; import ForumItem from "../components/items/ForumItem.svelte";
import SubjectItem from "../components/items/SubjectItem.svelte"; import SubjectItem from "../components/items/SubjectItem.svelte";
import GenericItem from "../components/items/GenericItem.svelte";
export const renderComponentMap: Record<string, typeof SvelteComponent> = { export const renderComponentMap: Record<string, typeof SvelteComponent> = {
assessment: AssessmentItem as unknown as typeof SvelteComponent, assessment: AssessmentItem as unknown as typeof SvelteComponent,
message: AssessmentItem as unknown as typeof SvelteComponent, message: AssessmentItem as unknown as typeof SvelteComponent,
forum: ForumItem as unknown as typeof SvelteComponent, forum: ForumItem as unknown as typeof SvelteComponent,
subject: SubjectItem as unknown as typeof SvelteComponent, subject: SubjectItem as unknown as typeof SvelteComponent,
// New categories share a generic, category-aware row component to keep
// the palette consistent without bespoke layouts for every job. The
// component reads `item.metadata.icon` and the `category` to pick a
// sensible default visual treatment.
course: GenericItem as unknown as typeof SvelteComponent,
notice: GenericItem as unknown as typeof SvelteComponent,
document: GenericItem as unknown as typeof SvelteComponent,
folio: GenericItem as unknown as typeof SvelteComponent,
portal: GenericItem as unknown as typeof SvelteComponent,
report: GenericItem as unknown as typeof SvelteComponent,
goal: GenericItem as unknown as typeof SvelteComponent,
passive: GenericItem as unknown as typeof SvelteComponent,
}; };
@@ -0,0 +1,112 @@
import { SCHEMA_VERSION_KEY } from "./schemaVersion";
/**
* Hard-reset of all global-search persistence.
*
* This module is intentionally dependency-free (no imports from `db.ts`,
* the worker manager, embeddia, or any heavy bundle) so it can be
* statically imported from:
*
* - The always-loaded plugin shell (`lazy.ts`) for the manual
* "Reset Index" settings button. Statically importing means the button
* keeps working across extension updates — there's no chunk hash to
* chase via dynamic import, which previously produced
* `Failed to fetch dynamically imported module: .../assets/<chunk>.js`
* when an older settings page tried to load a chunk that the new build
* had already replaced.
*
* - The version-check path (`utils/versionCheck.ts`) for the auto-reset
* that fires whenever the extension's manifest version changes.
*
* The function:
* 1. Notifies in-process modules to drop in-memory caches and any open
* IndexedDB connections via custom DOM events (best effort).
* 2. Deletes the structured `betterseqta-index` and the vector
* `embeddiaDB` databases.
* 3. Clears version-tracking localStorage keys so the next indexing
* pass treats the world as fresh.
*
* It never throws on partial failure: each step is wrapped in try/catch
* so a stuck connection on one DB doesn't block the other.
*/
const STRUCTURED_DB = "betterseqta-index";
const VECTOR_DB = "embeddiaDB";
const STRUCTURED_VERSION_KEY = "betterseqta-index-version";
function deleteIndexedDb(name: string): Promise<void> {
return new Promise((resolve) => {
let resolved = false;
const finish = () => {
if (resolved) return;
resolved = true;
resolve();
};
let req: IDBOpenDBRequest;
try {
req = indexedDB.deleteDatabase(name);
} catch (e) {
console.warn(`[Reset] Could not start delete of ${name}:`, e);
finish();
return;
}
req.onsuccess = () => finish();
req.onerror = () => {
console.warn(`[Reset] Error deleting ${name}:`, req.error);
finish();
};
req.onblocked = () => {
// Connections are still open in another tab. Wait briefly, retry,
// then resolve regardless so we never hang the caller forever.
console.warn(
`[Reset] Delete of ${name} blocked; will retry then resolve.`,
);
setTimeout(() => {
try {
const retry = indexedDB.deleteDatabase(name);
retry.onsuccess = () => finish();
retry.onerror = () => finish();
retry.onblocked = () => finish();
} catch {
finish();
}
}, 600);
};
});
}
export async function resetSearchIndexes(): Promise<void> {
try {
if (typeof window !== "undefined") {
window.dispatchEvent(
new CustomEvent("betterseqta-clear-search-cache"),
);
window.dispatchEvent(
new CustomEvent("betterseqta-clear-embedding-cache"),
);
window.dispatchEvent(
new CustomEvent("betterseqta-reset-search-index"),
);
}
} catch {
/* ignore — events are best-effort */
}
// Give listeners a tick to close any open IDB connections; otherwise
// the delete request below comes back with `onblocked`.
await new Promise<void>((resolve) => setTimeout(resolve, 150));
await Promise.allSettled([
deleteIndexedDb(STRUCTURED_DB),
deleteIndexedDb(VECTOR_DB),
]);
try {
localStorage.removeItem(STRUCTURED_VERSION_KEY);
localStorage.removeItem(SCHEMA_VERSION_KEY);
} catch {
/* ignore */
}
}
@@ -0,0 +1,16 @@
/**
* Index schema version. Bump whenever the IndexItem shape, category set,
* or text construction changes in a way that should invalidate previously
* stored items (and their embeddings).
*
* On mismatch, both the structured IndexedDB store and the embeddiaDB are
* wiped before the next indexing pass so we don't serve stale results.
*
* Kept in its own file (with no imports) so very lightweight callers — the
* always-loaded plugin shell in `lazy.ts`, the version-check path — can
* pull it in without bringing the heavy indexer/worker bundle along.
*/
export const INDEX_SCHEMA_VERSION = 6;
/** Key used to track the schema version a previous run wrote out. */
export const SCHEMA_VERSION_KEY = "bsq-index-schema-version";
@@ -0,0 +1,328 @@
import {
isSensitiveKey,
looksLikeSecretValue,
redactSensitive,
extractTextFromValue,
pickTitle,
pickId,
buildIndexItem,
} from "./extract";
import { isSensitiveSeqtaPath, normalizeSeqtaPath } from "./api";
import {
coursesPayload,
documentsPayload,
folioEntryPayload,
noticesPayload,
portalsPayload,
settingsPayload,
subjectsListPayload,
} from "./__fixtures__/seqtaResponses";
/**
* Lightweight in-process self-tests for the global-search overhaul.
*
* The repository does not (yet) ship with a test runner, so we instead
* expose a deterministic suite of assertions over the pure helpers that
* back active jobs and the passive observer. This is intentionally
* dependency-free so it can run inside the extension page (`window.
* globalSearchDebug.runSelfTests()`) and from any future Vitest harness
* without modification.
*/
interface TestCase {
name: string;
run: () => void | Promise<void>;
}
class AssertionError extends Error {
constructor(message: string) {
super(message);
this.name = "AssertionError";
}
}
function assert(condition: unknown, message: string): asserts condition {
if (!condition) throw new AssertionError(message);
}
function assertEqual<T>(actual: T, expected: T, label: string) {
if (actual !== expected) {
throw new AssertionError(
`${label}: expected ${JSON.stringify(expected)} but got ${JSON.stringify(actual)}`,
);
}
}
function assertContains(haystack: string, needle: string, label: string) {
if (!haystack.includes(needle)) {
throw new AssertionError(
`${label}: expected "${haystack}" to contain "${needle}"`,
);
}
}
function assertNotContains(haystack: string, needle: string, label: string) {
if (haystack.includes(needle)) {
throw new AssertionError(
`${label}: expected "${haystack}" NOT to contain "${needle}"`,
);
}
}
const cases: TestCase[] = [
{
name: "normalizeSeqtaPath strips query tokens",
run: () => {
assertEqual(
normalizeSeqtaPath("/seqta/student/load/messages?mokx3qef"),
"/seqta/student/load/messages",
"trailing token",
);
assertEqual(
normalizeSeqtaPath(
"https://learn.example.com/seqta/student/load/courses?abc123",
),
"/seqta/student/load/courses",
"absolute URL",
);
},
},
{
name: "isSensitiveSeqtaPath catches credential routes",
run: () => {
assert(
isSensitiveSeqtaPath("/seqta/student/login?xyz"),
"login is sensitive",
);
assert(
isSensitiveSeqtaPath("/seqta/student/save/message"),
"save/* is sensitive",
);
assert(
isSensitiveSeqtaPath("/seqta/student/load/settings"),
"settings is sensitive",
);
assert(
isSensitiveSeqtaPath("/seqta/student/load/prefs?z=1"),
"prefs is sensitive",
);
assert(
isSensitiveSeqtaPath("/seqta/ta/masquerade"),
"masquerade is sensitive",
);
assert(
!isSensitiveSeqtaPath("/seqta/student/load/messages"),
"messages is NOT sensitive",
);
assert(
!isSensitiveSeqtaPath("/seqta/student/load/courses"),
"courses is NOT sensitive",
);
},
},
{
name: "isSensitiveKey covers the credential vocabulary",
run: () => {
for (const key of [
"password",
"Password",
"client_secret",
"apiKey",
"X-API-Token",
"jwtSession",
"oauth_signature",
]) {
assert(isSensitiveKey(key), `expected ${key} to be sensitive`);
}
for (const key of ["title", "subject", "uuid", "metaclass"]) {
assert(!isSensitiveKey(key), `expected ${key} to be safe`);
}
},
},
{
name: "looksLikeSecretValue catches token-shaped strings",
run: () => {
assert(
looksLikeSecretValue(
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjMifQ.abc123def456",
),
"JWT looks secret",
);
assert(
looksLikeSecretValue("a".repeat(40) + "b".repeat(40)),
"long base64-ish string looks secret",
);
assert(
!looksLikeSecretValue("Hello world"),
"short readable text is safe",
);
assert(
!looksLikeSecretValue("https://example.com/foo/bar"),
"URLs are not secrets",
);
assert(
!looksLikeSecretValue("3162189c-2052-4f83-ad83-a66c57460ea2"),
"UUIDs are useful and not secret",
);
},
},
{
name: "redactSensitive scrubs settings payloads",
run: () => {
const cleaned = redactSensitive(settingsPayload);
const json = JSON.stringify(cleaned);
assertNotContains(json, "global.dropbox.api.key", "dropbox key dropped");
assertNotContains(json, "xxx-do-not-index", "secret value dropped");
},
},
{
name: "extractTextFromValue distills HTML and skips secrets",
run: () => {
const text = extractTextFromValue({
title: "Hello",
body: "<p>Some <strong>HTML</strong> body.</p>",
password: "should-not-appear",
nested: { token: "leak-me-please" },
});
assertContains(text, "Hello", "title preserved");
assertContains(text, "HTML body", "html flattened");
assertNotContains(text, "should-not-appear", "password redacted");
assertNotContains(text, "leak-me-please", "nested token redacted");
},
},
{
name: "pickTitle / pickId prefer common SEQTA fields",
run: () => {
assertEqual(
pickTitle({ title: "Hello", name: "Other" }),
"Hello",
"title wins over name",
);
assertEqual(
pickTitle({ filename: "doc.pdf" }),
"doc.pdf",
"filename fallback",
);
assertEqual(pickId({ id: 42 }), "42", "numeric id stringified");
assertEqual(pickId({ uuid: "abc" }), "abc", "uuid id");
},
},
{
name: "buildIndexItem produces redacted, well-formed records",
run: () => {
const item = buildIndexItem({
id: "x-1",
text: "Test",
category: "passive",
rawForContent: {
title: "Test",
body: "<p>Hello</p>",
token: "should-be-stripped",
},
metadata: { route: "/seqta/student/load/whatever", apiKey: "drop" },
actionId: "passive",
renderComponentId: "passive",
});
assertEqual(item.id, "x-1", "id propagated");
assertContains(item.content, "Hello", "html distilled");
assertNotContains(item.content, "should-be-stripped", "token stripped");
assert(
!("apiKey" in (item.metadata as Record<string, unknown>)),
"apiKey metadata stripped",
);
assertEqual(item.category, "passive", "category passes through");
},
},
{
name: "courses fixture flattens lesson HTML",
run: () => {
// Verify that the structural shape we depend on still matches.
assert(Array.isArray(coursesPayload.w), "lesson grid present");
const lessonHtml = (coursesPayload.w[0]?.[1] as { l?: string })?.l ?? "";
assertContains(lessonHtml, "ed.ted.com", "lesson html link present");
},
},
{
name: "subjects fixture exposes programme/metaclass",
run: () => {
const subject = subjectsListPayload[0]?.subjects[0];
assert(subject, "fixture has at least one subject");
assert(
Number.isFinite(subject!.programme) &&
Number.isFinite(subject!.metaclass),
"programme & metaclass numeric",
);
},
},
{
name: "documents fixture exposes uuid + filename",
run: () => {
const doc = documentsPayload[0]?.docs[0];
assert(doc?.uuid && doc?.filename, "uuid + filename present");
},
},
{
name: "notices fixture is HTML-bearing",
run: () => {
assertContains(
noticesPayload[0]?.contents ?? "",
"<p>",
"notice html present",
);
},
},
{
name: "portals fixture has external url",
run: () => {
assert(portalsPayload[0]?.url?.includes("mathletics"), "portal url");
},
},
{
name: "folio entry contents passes html-flattening",
run: () => {
const distilled = extractTextFromValue(folioEntryPayload, {
maxChars: 4000,
});
assertContains(distilled, "reflection", "folio body extracted");
},
},
];
export interface SelfTestReport {
passed: number;
failed: number;
failures: Array<{ name: string; error: string }>;
}
/**
* Runs every assertion case and resolves with a summary. Never throws.
*
* Designed to be invoked from `window.globalSearchDebug.runSelfTests()`
* by maintainers who want to validate the indexing pipeline against a
* real SEQTA tab.
*/
export async function runGlobalSearchSelfTests(): Promise<SelfTestReport> {
const report: SelfTestReport = { passed: 0, failed: 0, failures: [] };
for (const test of cases) {
try {
await test.run();
report.passed++;
} catch (e) {
report.failed++;
const error =
e instanceof Error ? `${e.name}: ${e.message}` : String(e);
report.failures.push({ name: test.name, error });
}
}
if (report.failed > 0) {
console.warn(
`[Global Search Self-Tests] ${report.failed} failed / ${report.passed} passed`,
report.failures,
);
} else {
console.info(
`[Global Search Self-Tests] All ${report.passed} cases passed`,
);
}
return report;
}
@@ -2,6 +2,32 @@ import type { IndexItem } from "../indexing/types";
import type { CombinedResult } from "../core/types"; import type { CombinedResult } from "../core/types";
import { searchVectors, type VectorSearchResult } from "./vector/vectorSearch"; import { searchVectors, type VectorSearchResult } from "./vector/vectorSearch";
import { jobs } from "../indexing/jobs"; import { jobs } from "../indexing/jobs";
import {
getLexicalMatchQuality,
isStrongLexicalMatch,
STRONG_LEXICAL_THRESHOLD,
} from "./lexicalMatch";
function isIndexItem(item: CombinedResult["item"]): item is IndexItem {
return (item as IndexItem).dateAdded !== undefined;
}
/**
* Heuristic for "this query is still too short / too sparse for vector
* recall to be reliable". When true we should not promote vector-only
* results above lexical ones.
*
* Note: this is intentionally distinct from the absolute >2 character cut-off
* used for `hybridSearch`. Vector recall on 3-7 character single-token
* queries is noisy enough that we should keep lexical results dominant.
*/
function isWeakSemanticQuery(trimmedQuery: string): boolean {
if (trimmedQuery.length < 8) return true;
const meaningfulTokens = trimmedQuery
.split(/\s+/)
.filter((t) => t.length >= 3);
return meaningfulTokens.length < 2;
}
/** /**
* Hybrid Search Implementation * Hybrid Search Implementation
@@ -36,14 +62,6 @@ const DEFAULT_OPTIONS: Required<HybridSearchOptions> = {
recencyWeight: 0.1, recencyWeight: 0.1,
}; };
/**
* Normalizes a score to 0-1 range
*/
function normalizeScore(score: number, min: number, max: number): number {
if (max === min) return 0.5;
return Math.max(0, Math.min(1, (score - min) / (max - min)));
}
/** /**
* Calculates recency boost based on item age * Calculates recency boost based on item age
*/ */
@@ -55,28 +73,56 @@ function calculateRecencyBoost(item: IndexItem, now: number): number {
} }
/** /**
* Calculates popularity boost (can be extended with click tracking, etc.) * Category-aware popularity / structure boost.
*
* High-confidence curated content (assignments, courses, subjects, forums)
* sits above noisier sources (notices, documents) which sit above the
* passive store. This keeps the most actionable hits at the top while
* still surfacing wide-recall semantic matches when relevant.
*/ */
function calculatePopularityBoost(item: IndexItem): number { function calculatePopularityBoost(item: IndexItem): number {
// For now, boost based on category and metadata
let boost = 0; let boost = 0;
// Boost assignments/assessments switch (item.category) {
if (item.category === "assignments") { case "assignments":
boost += 0.1; boost += 0.12;
break;
case "subjects":
case "courses":
boost += 0.08;
break;
case "forums":
case "messages":
boost += 0.06;
break;
case "notices":
case "folio":
case "reports":
case "goals":
boost += 0.04;
break;
case "documents":
boost += 0.03;
break;
case "portals":
boost += 0.02;
break;
case "passive":
boost -= 0.1;
break;
case "messages-support":
boost -= 0.18;
break;
} }
// Boost upcoming items if (item.metadata?.isUpcoming) boost += 0.12;
if (item.metadata?.isUpcoming) { if (item.metadata?.subjectCode) boost += 0.04;
boost += 0.15; if (item.metadata?.entityType === "course") boost += 0.02;
} if (item.metadata?.source === "passive") boost -= 0.08;
if (item.metadata?.supportRecord) boost -= 0.12;
if (item.metadata?.priority === "low") boost -= 0.05;
// Boost items with subject codes (more structured) return Math.max(-0.2, Math.min(boost, 0.3));
if (item.metadata?.subjectCode) {
boost += 0.05;
}
return Math.min(boost, 0.3); // Cap at 0.3
} }
/** /**
@@ -98,10 +144,6 @@ export async function hybridSearch(
// Limit BM25 results to top K // Limit BM25 results to top K
const topBm25Results = bm25Results.slice(0, opts.bm25TopK); const topBm25Results = bm25Results.slice(0, opts.bm25TopK);
// Get vector search results for reranking
// We'll search the full index and then filter to our BM25 results
let vectorResults: VectorSearchResult[] = [];
if (trimmedQuery.length > 2) { if (trimmedQuery.length > 2) {
try { try {
// Get more vector results than BM25 results to ensure coverage // Get more vector results than BM25 results to ensure coverage
@@ -121,59 +163,57 @@ export async function hybridSearch(
// Now rerank BM25 results with vector scores // Now rerank BM25 results with vector scores
const now = Date.now(); const now = Date.now();
const rerankedResults = topBm25Results.map(result => { const rerankedResults: CombinedResult[] = topBm25Results.map(result => {
const item = result.item; const item = result.item;
// Normalize BM25 score to 0-1 // Static command items don't have dateAdded/metadata/category to score
// Fuse.js scores: lower is better (0 = perfect match) // against — pass them through untouched so palette commands still
// We need to invert: higher score = better match // surface correctly.
// Result.score is typically 0-100, where higher = better if (!isIndexItem(item)) {
// So we normalize it to 0-1 return result;
}
// Normalize BM25 score to 0-1.
// Result.score is typically 0-100, where higher = better, so we
// clamp into the 0..1 range.
const normalizedBm25Score = Math.max(0, Math.min(1, result.score / 100)); const normalizedBm25Score = Math.max(0, Math.min(1, result.score / 100));
// Get vector similarity (0-1, already normalized) // Get vector similarity (0-1, already normalized). If item wasn't in
// If item wasn't in vector results, use a default low score // vector results, use a default mid-low score.
const vectorSimilarity = vectorMap.get(item.id) || 0.3; // Default to 0.3 if not found const vectorSimilarity = vectorMap.get(item.id) || 0.3;
// Calculate recency boost (0-1 range)
const recencyBoost = opts.recencyBoost const recencyBoost = opts.recencyBoost
? calculateRecencyBoost(item, now) * opts.recencyWeight ? calculateRecencyBoost(item, now) * opts.recencyWeight
: 0; : 0;
// Calculate popularity boost (0-1 range)
const popularityBoost = calculatePopularityBoost(item); const popularityBoost = calculatePopularityBoost(item);
// Apply job-specific boost if available
const job = jobs[item.category]; const job = jobs[item.category];
let jobBoost = 0; let jobBoost = 0;
if (job && typeof job.boostCriteria === 'function') { if (job && typeof job.boostCriteria === 'function') {
const boost = job.boostCriteria(item, trimmedQuery); const boost = job.boostCriteria(item, trimmedQuery);
if (boost) { if (boost) {
jobBoost = boost / 100; // Normalize boost to 0-1 jobBoost = boost / 100;
} }
} }
// Combine scores using weighted average // Lexical guardrail: a strong title match is worth a meaningful
// BM25 and vector are weighted, boosts are additive // bonus so vector reranking can't quietly drop an exact assessment
// title between adjacent keystrokes. Scale is roughly 0..0.18.
const lexicalQuality = getLexicalMatchQuality(item, trimmedQuery);
const lexicalBonus = lexicalQuality > 0 ? lexicalQuality / 80 : 0;
const hybridScore = const hybridScore =
(normalizedBm25Score * opts.bm25Weight) + (normalizedBm25Score * opts.bm25Weight) +
(vectorSimilarity * opts.vectorWeight) + (vectorSimilarity * opts.vectorWeight) +
recencyBoost + recencyBoost +
popularityBoost + popularityBoost +
jobBoost; jobBoost +
lexicalBonus;
return { return {
...result, ...result,
score: hybridScore * 100, // Scale back to 0-100 for consistency score: hybridScore * 100,
// Store component scores for debugging (optional, can be removed in production)
_hybridScores: {
bm25: normalizedBm25Score,
vector: vectorSimilarity,
recency: recencyBoost,
popularity: popularityBoost,
jobBoost: jobBoost,
final: hybridScore,
},
}; };
}); });
@@ -200,7 +240,7 @@ export async function hybridSearch(
export async function hybridSearchWithExpansion( export async function hybridSearchWithExpansion(
bm25Results: CombinedResult[], bm25Results: CombinedResult[],
query: string, query: string,
allItems: IndexItem[], _allItems: IndexItem[],
options: HybridSearchOptions = {}, options: HybridSearchOptions = {},
): Promise<CombinedResult[]> { ): Promise<CombinedResult[]> {
const opts = { ...DEFAULT_OPTIONS, ...options }; const opts = { ...DEFAULT_OPTIONS, ...options };
@@ -214,6 +254,13 @@ export async function hybridSearchWithExpansion(
return rerankedBm25; return rerankedBm25;
} }
// For short / single-token queries vector expansion brings in too much
// noise (and is the main reason results "flicker" between adjacent
// keystrokes). Keep semantic recall for longer queries.
if (isWeakSemanticQuery(trimmedQuery)) {
return rerankedBm25.slice(0, opts.finalLimit);
}
// Get vector search results // Get vector search results
let vectorResults: VectorSearchResult[] = []; let vectorResults: VectorSearchResult[] = [];
try { try {
@@ -229,8 +276,26 @@ export async function hybridSearchWithExpansion(
const now = Date.now(); const now = Date.now();
// Compute the floor at which a vector-only result is allowed to enter the
// ranking. Strong lexical matches in the BM25 list set this floor — a
// vector-only result must beat the lowest strong lexical match's score by
// a margin to displace it.
let strongLexicalFloor = -Infinity;
for (const r of rerankedBm25) {
if (isIndexItem(r.item) && isStrongLexicalMatch(r.item, trimmedQuery)) {
if (r.score > strongLexicalFloor) {
strongLexicalFloor = r.score;
}
}
}
// Vector-only results may sit at most at this score:
const vectorOnlyCeiling = strongLexicalFloor === -Infinity
? Infinity
: strongLexicalFloor - 1;
vectorResults.forEach(v => { vectorResults.forEach(v => {
if (!bm25Ids.has(v.object.id)) { if (bm25Ids.has(v.object.id)) return;
// This is a semantic match that BM25 missed // This is a semantic match that BM25 missed
const item = v.object; const item = v.object;
@@ -240,8 +305,24 @@ export async function hybridSearchWithExpansion(
: 0; : 0;
const popularityBoost = calculatePopularityBoost(item); const popularityBoost = calculatePopularityBoost(item);
// Penalize vector-only matches that have no lexical content overlap.
// Vector recall on its own is fuzzy — without lexical confirmation we
// should rank these below curated keyword hits.
const lexicalQuality = getLexicalMatchQuality(item, trimmedQuery);
let vectorOnlyPenalty = 0;
if (lexicalQuality === 0) {
vectorOnlyPenalty -= 0.18;
}
// Passive captures without lexical confirmation are demoted further —
// they're often raw API records that should never lead the result list.
if (item.category === "passive" && lexicalQuality < STRONG_LEXICAL_THRESHOLD) {
vectorOnlyPenalty -= 0.12;
}
// Vector-only results get lower base score but high vector similarity // Vector-only results get lower base score but high vector similarity
const vectorScore = v.similarity * opts.vectorWeight + recencyBoost + popularityBoost; const vectorScore =
v.similarity * opts.vectorWeight + recencyBoost + popularityBoost + vectorOnlyPenalty;
// Apply job-specific boost if available // Apply job-specific boost if available
const job = jobs[item.category]; const job = jobs[item.category];
@@ -253,20 +334,15 @@ export async function hybridSearchWithExpansion(
} }
} }
let finalScore = (vectorScore + jobBoost) * 100;
if (finalScore > vectorOnlyCeiling) finalScore = vectorOnlyCeiling;
vectorOnlyResults.push({ vectorOnlyResults.push({
id: item.id, id: item.id,
type: "dynamic" as const, type: "dynamic" as const,
score: (vectorScore + jobBoost) * 100, score: finalScore,
item, item,
_hybridScores: {
bm25: 0,
vector: v.similarity,
recency: recencyBoost,
popularity: popularityBoost,
final: vectorScore + jobBoost,
},
}); });
}
}); });
// Combine reranked BM25 results with vector-only results // Combine reranked BM25 results with vector-only results
@@ -0,0 +1,118 @@
import type { IndexItem } from "../indexing/types";
/**
* Maximum bonus a strong lexical title match can contribute on top of the
* underlying Fuse / hybrid score. Tuned to outweigh small vector reranking
* deltas so a true assessment-title match cannot be displaced by a vector
* neighbour as the user types one more character.
*/
export const LEXICAL_TITLE_BONUS = 12;
/**
* Threshold at or above which a result counts as a "strong lexical match".
* Strong matches must always be surfaced and protected from vector reranking
* displacing them.
*/
export const STRONG_LEXICAL_THRESHOLD = 6;
const WORD_SPLIT_RE = /\s+/;
const NON_WORD_RE = /[^a-z0-9]+/gi;
function normalize(value: string | undefined | null): string {
if (!value) return "";
return String(value).toLowerCase().trim();
}
function tokens(value: string): string[] {
return normalize(value)
.split(WORD_SPLIT_RE)
.map((t) => t.replace(NON_WORD_RE, ""))
.filter(Boolean);
}
/**
* Score how strongly the query lexically matches the title-like fields of an
* IndexItem. Return value is a non-negative number — 0 means no useful match.
*
* Tiers (roughly):
* ~12 exact title equality
* ~10 title starts with full query string
* ~8 title contains full query string, on a word boundary
* ~7 ordered token-prefix match (e.g. `world w` vs `World War 2 Essay`)
* ~5 subject / metadata title contains query
* ~3 any token in title starts with query
* ~2 substring anywhere in title
* 0 no lexical signal
*
* The function is intentionally cheap (string ops only, no regex compilation
* per call beyond the constants above) because it is called for every item in
* the candidate pool.
*/
export function getLexicalMatchQuality(item: IndexItem, query: string): number {
const q = normalize(query);
if (!q) return 0;
const title = normalize(item.text);
if (!title) return 0;
if (title === q) return 12;
if (title.startsWith(q + " ") || title.startsWith(q)) return 10;
const queryTokens = tokens(q);
const titleTokens = tokens(title);
if (queryTokens.length > 0 && titleTokens.length >= queryTokens.length) {
let bestStreakStart = -1;
for (let i = 0; i <= titleTokens.length - queryTokens.length; i++) {
let ok = true;
for (let j = 0; j < queryTokens.length; j++) {
const tt = titleTokens[i + j];
const qt = queryTokens[j];
const isLast = j === queryTokens.length - 1;
if (isLast) {
if (!tt.startsWith(qt)) {
ok = false;
break;
}
} else {
if (tt !== qt) {
ok = false;
break;
}
}
}
if (ok) {
bestStreakStart = i;
break;
}
}
if (bestStreakStart === 0) return 9;
if (bestStreakStart > 0) return 7;
}
if (title.includes(" " + q) || title.includes(q + " ")) return 8;
// Token starts-with anywhere
for (const t of titleTokens) {
if (t.startsWith(q)) return 3;
}
// Subject / curated metadata title
const md = (item.metadata ?? {}) as Record<string, unknown>;
const subjectName = normalize(
typeof md.subjectName === "string" ? md.subjectName : "",
);
const subjectCode = normalize(
typeof md.subjectCode === "string" ? md.subjectCode : "",
);
if (subjectName && (subjectName === q || subjectName.startsWith(q))) return 5;
if (subjectCode && (subjectCode === q || subjectCode.startsWith(q))) return 5;
if (title.includes(q)) return 2;
return 0;
}
export function isStrongLexicalMatch(item: IndexItem, query: string): boolean {
return getLexicalMatchQuality(item, query) >= STRONG_LEXICAL_THRESHOLD;
}
@@ -3,10 +3,12 @@ import { getStaticCommands, type StaticCommandItem } from "../core/commands";
import { getDynamicItems } from "../utils/dynamicItems"; import { getDynamicItems } from "../utils/dynamicItems";
import type { CombinedResult } from "../core/types"; import type { CombinedResult } from "../core/types";
import type { IndexItem } from "../indexing/types"; import type { IndexItem } from "../indexing/types";
import { searchVectors } from "./vector/vectorSearch";
import type { VectorSearchResult } from "./vector/vectorTypes";
import { jobs } from "../indexing/jobs";
import { hybridSearchWithExpansion } from "./hybridSearch"; import { hybridSearchWithExpansion } from "./hybridSearch";
import {
getLexicalMatchQuality,
isStrongLexicalMatch,
STRONG_LEXICAL_THRESHOLD,
} from "./lexicalMatch";
// Search result cache for better performance // Search result cache for better performance
const searchCache = new Map<string, { results: CombinedResult[]; timestamp: number }>(); const searchCache = new Map<string, { results: CombinedResult[]; timestamp: number }>();
@@ -25,8 +27,10 @@ function setCachedResults(query: string, results: CombinedResult[]) {
// Limit cache size // Limit cache size
if (searchCache.size >= MAX_CACHE_SIZE) { if (searchCache.size >= MAX_CACHE_SIZE) {
const firstKey = searchCache.keys().next().value; const firstKey = searchCache.keys().next().value;
if (firstKey !== undefined) {
searchCache.delete(firstKey); searchCache.delete(firstKey);
} }
}
searchCache.set(query, { results, timestamp: Date.now() }); searchCache.set(query, { results, timestamp: Date.now() });
} }
@@ -61,23 +65,40 @@ export function createSearchIndexes() {
findAllMatches: false, // Performance optimization findAllMatches: false, // Performance optimization
}; };
// Optimized dynamic content search options // Optimized dynamic content search options.
// The expanded corpus mixes structured entities (assessments, subjects)
// with free-form text (course content, notices, folio bodies, passive
// captures) so we list a broad set of metadata keys while keeping titles
// dominant in the ranking.
// NOTE: metadata.route is intentionally excluded. Raw API paths like
// `/seqta/student/load/message/people` should never influence ranking — they
// historically caused passive-capture support records to bubble up above
// real assessments when the user typed substrings that happened to appear in
// the path.
const dynamicOptions = { const dynamicOptions = {
keys: [ keys: [
{ name: "text", weight: 3 }, // Increased weight for title matches { name: "text", weight: 3 }, // Title is king
{ name: "content", weight: 1 }, { name: "content", weight: 1 },
{ name: "category", weight: 0.5 }, // Lower weight for category { name: "category", weight: 0.4 },
{ name: "metadata.subjectName", weight: 1.5 }, // Boost subject name matches { name: "metadata.subjectName", weight: 1.6 },
{ name: "metadata.subjectCode", weight: 1.5 }, // Boost subject code matches { name: "metadata.subjectCode", weight: 1.6 },
{ name: "metadata.subject", weight: 1.4 },
{ name: "metadata.courseCode", weight: 1.2 },
{ name: "metadata.filename", weight: 1.2 },
{ name: "metadata.author", weight: 0.8 },
{ name: "metadata.authorName", weight: 0.8 },
{ name: "metadata.label", weight: 0.6 },
{ name: "metadata.categoryName", weight: 0.6 },
{ name: "metadata.entityType", weight: 0.4 },
], ],
includeScore: true, includeScore: true,
includeMatches: true, includeMatches: true,
threshold: 0.5, // More permissive for better partial word matching (increased from 0.4) threshold: 0.5,
minMatchCharLength: 2, // Minimum 2 characters for Fuse.js matches (substring fallback handles shorter queries) minMatchCharLength: 2,
distance: 100, // Increased to allow matches across longer strings distance: 100,
useExtendedSearch: true, useExtendedSearch: true,
ignoreLocation: true, // Allow matches anywhere in the string for better partial word matching ignoreLocation: true,
findAllMatches: true, // Enable to find all matches for better partial word support findAllMatches: true,
shouldSort: true, shouldSort: true,
}; };
@@ -197,15 +218,24 @@ export function searchDynamicItems(
const recencyBoost = sortByRecent ? 1 / (ageInDays + 1) : 0; const recencyBoost = sortByRecent ? 1 / (ageInDays + 1) : 0;
score += recencyBoost; score += recencyBoost;
// Boost for exact text matches (especially at the start) // Lexical title bonus — sticky across adjacent keystrokes so a strong
const textLower = item.text.toLowerCase(); // title prefix match like `world wa` doesn't disappear from the top once
if (textLower.startsWith(queryLower)) { // vector reranking kicks in.
score += 5; // Strong boost for prefix matches const lexicalQuality = getLexicalMatchQuality(item, queryLower);
} else if (textLower.includes(queryLower)) { if (lexicalQuality > 0) {
score += 2; // Boost for substring matches score += lexicalQuality;
// Curated-content boost: assessments and assignments with a strong
// title match should be elevated further, since they are the items
// users are most often hunting for.
if (
lexicalQuality >= STRONG_LEXICAL_THRESHOLD &&
(item.category === "assignments" || item.category === "assessments")
) {
score += 4;
}
} }
// Boost for category matches // Category match (small nudge)
if (item.category.toLowerCase().includes(queryLower)) { if (item.category.toLowerCase().includes(queryLower)) {
score += 1; score += 1;
} }
@@ -221,17 +251,12 @@ export function searchDynamicItems(
// Add additional matches from simple substring search // Add additional matches from simple substring search
additionalMatches.forEach((item) => { additionalMatches.forEach((item) => {
// Check if already in results
if (!results.find(r => r.id === item.id)) { if (!results.find(r => r.id === item.id)) {
const textLower = item.text.toLowerCase();
let score = 5; // Base score for substring matches let score = 5; // Base score for substring matches
// Boost for prefix matches const lexicalQuality = getLexicalMatchQuality(item, queryLower);
if (textLower.startsWith(queryLower)) { score += lexicalQuality;
score += 5;
}
// Recency boost
const ageInDays = (now - item.dateAdded) / (1000 * 60 * 60 * 24); const ageInDays = (now - item.dateAdded) / (1000 * 60 * 60 * 24);
const recencyBoost = sortByRecent ? 1 / (ageInDays + 1) : 0; const recencyBoost = sortByRecent ? 1 / (ageInDays + 1) : 0;
score += recencyBoost; score += recencyBoost;
@@ -241,6 +266,7 @@ export function searchDynamicItems(
type: "dynamic" as const, type: "dynamic" as const,
score, score,
item, item,
matches: undefined,
}); });
} }
}); });
@@ -249,6 +275,7 @@ export function searchDynamicItems(
return results.sort((a, b) => b.score - a.score).slice(0, limit); return results.sort((a, b) => b.score - a.score).slice(0, limit);
} }
export async function performSearch( export async function performSearch(
query: string, query: string,
commandsFuse: Fuse<StaticCommandItem>, commandsFuse: Fuse<StaticCommandItem>,
@@ -286,12 +313,37 @@ export async function performSearch(
sortByRecent, sortByRecent,
); );
// Step 2b: Always include strong lexical title matches, even if Fuse
// missed them with the current threshold. This is the safety net that
// stops `world wa` from dropping a `World War 2 Essay` assessment that
// `world w` happily showed.
const allItems = Array.from(dynamicIdToItemMap.values());
const seen = new Set(bm25Results.map((r) => r.id));
const lexicalAdds: CombinedResult[] = [];
for (const item of allItems) {
if (seen.has(item.id)) continue;
if (!isStrongLexicalMatch(item, trimmedQuery)) continue;
const quality = getLexicalMatchQuality(item, trimmedQuery);
let score = 6 + quality;
if (item.category === "assignments" || item.category === "assessments") {
score += 4;
}
lexicalAdds.push({
id: item.id,
type: "dynamic" as const,
score,
item,
matches: undefined,
});
}
if (lexicalAdds.length > 0) {
bm25Results.push(...lexicalAdds);
bm25Results.sort((a, b) => b.score - a.score);
}
// Step 3: Apply hybrid search (BM25 + Vector reranking + boosting) // Step 3: Apply hybrid search (BM25 + Vector reranking + boosting)
if (trimmedQuery.length > 2 && bm25Results.length > 0) { if (trimmedQuery.length > 2 && bm25Results.length > 0) {
try { try {
// Get all items for expansion
const allItems = Array.from(dynamicIdToItemMap.values());
// Apply hybrid search with expansion // Apply hybrid search with expansion
dynamicResults = await hybridSearchWithExpansion( dynamicResults = await hybridSearchWithExpansion(
bm25Results, bm25Results,
@@ -40,7 +40,6 @@ export interface VectorSearchResult extends SearchResult {
// Cache for query embeddings to avoid recomputing // Cache for query embeddings to avoid recomputing
const embeddingCache = new Map<string, number[]>(); const embeddingCache = new Map<string, number[]>();
const EMBEDDING_CACHE_TTL = 1000 * 60 * 30; // 30 minutes
const MAX_EMBEDDING_CACHE_SIZE = 50; const MAX_EMBEDDING_CACHE_SIZE = 50;
function getCachedEmbedding(query: string): number[] | null { function getCachedEmbedding(query: string): number[] | null {
@@ -55,8 +54,10 @@ function setCachedEmbedding(query: string, embedding: number[]) {
// Limit cache size // Limit cache size
if (embeddingCache.size >= MAX_EMBEDDING_CACHE_SIZE) { if (embeddingCache.size >= MAX_EMBEDDING_CACHE_SIZE) {
const firstKey = embeddingCache.keys().next().value; const firstKey = embeddingCache.keys().next().value;
if (firstKey !== undefined) {
embeddingCache.delete(firstKey); embeddingCache.delete(firstKey);
} }
}
embeddingCache.set(query, embedding); embeddingCache.set(query, embedding);
} }
@@ -1,4 +1,5 @@
import browser from "webextension-polyfill"; import browser from "webextension-polyfill";
import { resetSearchIndexes } from "../indexing/resetIndexes";
const VERSION_STORAGE_KEY = "betterseqta-global-search-version"; const VERSION_STORAGE_KEY = "betterseqta-global-search-version";
const VERSION_CACHE_KEY = "betterseqta-global-search-cache-version"; const VERSION_CACHE_KEY = "betterseqta-global-search-cache-version";
@@ -40,32 +41,51 @@ export function storeVersion(version: string): void {
} }
/** /**
* Checks if the extension has been updated and clears caches if needed * Checks if the extension has been updated and clears caches + resets the
* Returns true if an update was detected * search index if needed.
*
* The reset is intentionally aggressive: every manifest version bump
* triggers a full IndexedDB wipe so changes to indexer extraction logic,
* job sets, or item shape can never serve stale results from an older
* build. The next indexing pass will repopulate from scratch in the
* background. Re-population is bounded by the per-job rate limits in
* `api.ts` so it can't hammer SEQTA after an update.
*
* Returns true if an update was detected.
*/ */
export async function checkAndHandleUpdate(): Promise<boolean> { export async function checkAndHandleUpdate(): Promise<boolean> {
const currentVersion = getCurrentVersion(); const currentVersion = getCurrentVersion();
const storedVersion = getStoredVersion(); const storedVersion = getStoredVersion();
// If no stored version, this is first run - store current version // First run: just remember the version, don't reset (the user likely
// just installed the extension; the index is already empty).
if (!storedVersion) { if (!storedVersion) {
console.debug(`[Version Check] First run detected, storing version ${currentVersion}`); console.debug(
`[Version Check] First run detected, storing version ${currentVersion}`,
);
storeVersion(currentVersion); storeVersion(currentVersion);
return false; return false;
} }
// If versions match, no update
if (storedVersion === currentVersion) { if (storedVersion === currentVersion) {
return false; return false;
} }
// Version mismatch detected - extension was updated console.log(
console.log(`[Version Check] Extension updated from ${storedVersion} to ${currentVersion}, clearing caches...`); `[Version Check] Extension updated from ${storedVersion} to ${currentVersion}, resetting search index...`,
);
// Clear all caches
await clearAllCaches(); await clearAllCaches();
// Store new version try {
await resetSearchIndexes();
console.log(
"[Version Check] Search index reset; next indexing pass will repopulate from scratch.",
);
} catch (e) {
console.warn("[Version Check] resetSearchIndexes failed:", e);
}
storeVersion(currentVersion); storeVersion(currentVersion);
return true; return true;