mirror of
https://github.com/BetterSEQTA/BetterSEQTA-Plus.git
synced 2026-06-06 11:44:40 +00:00
feat: early vector search testing
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
import { type StaticCommandItem } from './commands';
|
||||
import type { CombinedResult } from './types';
|
||||
import { createSearchIndexes, performSearch as doSearch } from './searchUtils';
|
||||
import { highlightMatch, highlightSnippet } from './highlightUtils';
|
||||
import { highlightMatch, highlightSnippet, stripHtmlButKeepHighlights } from './highlightUtils';
|
||||
import Fuse from 'fuse.js';
|
||||
import Calculator from './Calculator.svelte';
|
||||
import { actionMap } from './indexing/actions';
|
||||
@@ -105,14 +105,14 @@
|
||||
};
|
||||
});
|
||||
|
||||
const performSearch = () => {
|
||||
const performSearch = async () => {
|
||||
isLoading = true;
|
||||
selectedIndex = 0;
|
||||
|
||||
const term = searchTerm.trim().toLowerCase();
|
||||
|
||||
if (commandsFuse && dynamicContentFuse) {
|
||||
combinedResults = doSearch(
|
||||
combinedResults = await doSearch(
|
||||
term,
|
||||
commandsFuse,
|
||||
dynamicContentFuse,
|
||||
@@ -288,8 +288,9 @@
|
||||
onclick={() => executeItemAction(dynamicItem)}
|
||||
>
|
||||
<div class="flex items-center w-full">
|
||||
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{dynamicItem.metadata?.icon || '\ue924'}</div>
|
||||
<span class="ml-4 text-lg truncate">
|
||||
{@html highlightMatch(dynamicItem.text, searchTerm, result.matches)}
|
||||
{@html stripHtmlButKeepHighlights(highlightMatch(dynamicItem.text, searchTerm, result.matches))}
|
||||
</span>
|
||||
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
|
||||
{dynamicItem.category}
|
||||
@@ -297,7 +298,7 @@
|
||||
</div>
|
||||
{#if dynamicItem.content}
|
||||
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
|
||||
{@html highlightSnippet(dynamicItem.content, searchTerm, result.matches)}
|
||||
{@html stripHtmlButKeepHighlights(highlightSnippet(dynamicItem.content, searchTerm, result.matches))}
|
||||
</div>
|
||||
{/if}
|
||||
</button>
|
||||
|
||||
@@ -0,0 +1,597 @@
|
||||
# client-vector-search
|
||||
|
||||
A client side vector search library that can embed, search, and cache. Works on the browser and server side.
|
||||
|
||||
It outperforms OpenAI's text-embedding-ada-002 and is way faster than Pinecone and other VectorDBs.
|
||||
|
||||
I'm the founder of [searchbase.app](https://searchbase.app) and we needed this for our product and customers. We'll be using this library in production. You can be sure it'll be maintained and improved.
|
||||
|
||||
- Embed documents using transformers by default: gte-small (~30mb).
|
||||
- Calculate cosine similarity between embeddings.
|
||||
- Create an index and search on the client side
|
||||
- Cache vectors with browser caching support.
|
||||
|
||||
Lots of improvements are coming!
|
||||
|
||||
## Roadmap
|
||||
|
||||
Our goal is to build a super simple, fast vector search that works with couple hundred to thousands vectors. ~1k vectors per user covers 99% of the use cases.
|
||||
|
||||
We'll initially keep things super simple and sub 100ms
|
||||
|
||||
### TODOs
|
||||
- [ ] add HNSW index that works on node and browser env, don't rely on hnsw binder libs
|
||||
- [ ] add a proper testing suite and ci/cd for the lib
|
||||
- [ ] simple health tests
|
||||
- [ ] mock the @xenova/transformers for jest, it's not happy with it
|
||||
- [ ] performance tests, recall, memory usage, cpu usage etc.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm i client-vector-search
|
||||
```
|
||||
|
||||
|
||||
## Quickstart
|
||||
|
||||
This library provides a plug-and-play solution for embedding and vector search. It's designed to be easy to use, efficient, and versatile. Here's a quick start guide:
|
||||
|
||||
|
||||
```ts
|
||||
import { getEmbedding, EmbeddingIndex } from 'client-vector-search';
|
||||
|
||||
// getEmbedding is an async function, so you need to use 'await' or '.then()' to get the result
|
||||
const embedding = await getEmbedding("Apple"); // Returns embedding as number[]
|
||||
|
||||
// Each object should have an 'embedding' property of type number[]
|
||||
const initialObjects = [
|
||||
{ id: 1, name: "Apple", embedding: embedding },
|
||||
{ id: 2, name: "Banana", embedding: await getEmbedding("Banana") },
|
||||
{ id: 3, name: "Cheddar", embedding: await getEmbedding("Cheddar")},
|
||||
{ id: 4, name: "Space", embedding: await getEmbedding("Space")},
|
||||
{ id: 5, name: "database", embedding: await getEmbedding("database")},
|
||||
];
|
||||
const index = new EmbeddingIndex(initialObjects); // Creates an index
|
||||
|
||||
// The query should be an embedding of type number[]
|
||||
const queryEmbedding = await getEmbedding('Fruit'); // Query embedding
|
||||
const results = await index.search(queryEmbedding, { topK: 5 }); // Returns top similar objects
|
||||
|
||||
// specify the storage type
|
||||
await index.saveIndex('indexedDB');
|
||||
const results = await index.search([1, 2, 3], {
|
||||
topK: 5,
|
||||
useStorage: 'indexedDB',
|
||||
// storageOptions: { // use only if you overrode the defaults
|
||||
// indexedDBName: 'clientVectorDB',
|
||||
// indexedDBObjectStoreName: 'ClientEmbeddingStore',
|
||||
// },
|
||||
});
|
||||
|
||||
console.log(results);
|
||||
|
||||
await index.deleteIndexedDB(); // if you overrode default, specify db name
|
||||
```
|
||||
|
||||
## Trouble-shooting
|
||||
|
||||
### NextJS
|
||||
To use it inside NextJS projects you'll need to update the `next.config.js` file to include the following:
|
||||
|
||||
```js
|
||||
module.exports = {
|
||||
// Override the default webpack configuration
|
||||
webpack: (config) => {
|
||||
// See https://webpack.js.org/configuration/resolve/#resolvealias
|
||||
config.resolve.alias = {
|
||||
...config.resolve.alias,
|
||||
sharp$: false,
|
||||
"onnxruntime-node$": false,
|
||||
};
|
||||
return config;
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
#### Model load after page is loaded
|
||||
|
||||
You can initialize the model before using it to generate embeddings. This will ensure that the model is loaded before you use it and provide a better UX.
|
||||
|
||||
```js
|
||||
import { initializeModel } from "client-vector-search"
|
||||
...
|
||||
useEffect(() => {
|
||||
try {
|
||||
initializeModel();
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
}
|
||||
}, []);
|
||||
```
|
||||
|
||||
## Usage Guide
|
||||
|
||||
This guide provides a step-by-step walkthrough of the library's main features. It covers everything from generating embeddings for a string to performing operations on the index such as adding, updating, and removing objects. It also includes instructions on how to save the index to a database and perform search operations within it.
|
||||
|
||||
Until we have a reference documentation, you can find all the methods and their usage in this guide. Each step is accompanied by a code snippet to illustrate the usage of the method in question. Make sure to follow along and try out the examples in your own environment to get a better understanding of how everything works.
|
||||
|
||||
Let's get started!
|
||||
|
||||
### Step 1: Generate Embeddings for String
|
||||
Generate embeddings for a given string using the `getEmbedding` method.
|
||||
|
||||
```ts
|
||||
const embedding = await getEmbedding("Apple"); // Returns embedding as number[]
|
||||
```
|
||||
> **Note**: `getEmbedding` is asynchronous; make sure to use `await`.
|
||||
|
||||
---
|
||||
|
||||
### Step 2: Calculate Cosine Similarity
|
||||
Calculate the cosine similarity between two embeddings.
|
||||
|
||||
```ts
|
||||
const similarity = cosineSimilarity(embedding1, embedding2, 6);
|
||||
```
|
||||
> **Note**: Both embeddings should be of the same length.
|
||||
|
||||
---
|
||||
|
||||
### Step 3: Create an Index
|
||||
Create an index with an initial array of objects. Each object must have an 'embedding' property.
|
||||
|
||||
```ts
|
||||
const initialObjects = [...];
|
||||
const index = new EmbeddingIndex(initialObjects);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 4: Add to Index
|
||||
Add an object to the index.
|
||||
|
||||
```ts
|
||||
const objectToAdd = { id: 6, name: 'Cat', embedding: await getEmbedding('Cat') };
|
||||
index.add(objectToAdd);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 5: Update Index
|
||||
Update an existing object in the index.
|
||||
|
||||
```ts
|
||||
const vectorToUpdate = { id: 6, name: 'Dog', embedding: await getEmbedding('Dog') };
|
||||
index.update({ id: 6 }, vectorToUpdate);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 6: Remove from Index
|
||||
Remove an object from the index.
|
||||
|
||||
```ts
|
||||
index.remove({ id: 6 });
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 7: Retrieve from Index
|
||||
Retrieve an object from the index.
|
||||
|
||||
```ts
|
||||
const vector = index.get({ id: 1 });
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 8: Search the Index
|
||||
Search the index with a query embedding.
|
||||
|
||||
```ts
|
||||
const queryEmbedding = await getEmbedding('Fruit');
|
||||
const results = await index.search(queryEmbedding, { topK: 5 });
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 9: Print the Index
|
||||
Print the entire index to the console.
|
||||
|
||||
```ts
|
||||
index.printIndex();
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Step 10: Save Index to IndexedDB (for browser)
|
||||
Save the index to a persistent IndexedDB database. Note
|
||||
|
||||
```ts
|
||||
await index.saveIndex("indexedDB", { DBName: "clientVectorDB", objectStoreName:"ClientEmbeddingStore"})
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Important: Search in indexedDB
|
||||
Perform a search operation in the IndexedDB.
|
||||
|
||||
```ts
|
||||
const results = await index.search(queryEmbedding, {
|
||||
topK: 5,
|
||||
useStorage: "indexedDB",
|
||||
storageOptions: { // only if you want to override the default options, defaults are below
|
||||
indexedDBName: 'clientVectorDB',
|
||||
indexedDBObjectStoreName: 'ClientEmbeddingStore'
|
||||
}
|
||||
});
|
||||
|
||||
---
|
||||
|
||||
### Delete Database
|
||||
To delete an entire database.
|
||||
|
||||
```ts
|
||||
await IndexedDbManager.deleteIndexedDB("clientVectorDB");
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Delete Object Store
|
||||
To delete an object store from a database.
|
||||
|
||||
```ts
|
||||
await IndexedDbManager.deleteIndexedDBObjectStore("clientVectorDB", "ClientEmbeddingStore");
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Retrieve All Objects
|
||||
To retrieve all objects from a specific object store.
|
||||
|
||||
```ts
|
||||
const allObjects = await IndexedDbManager.getAllObjectsFromIndexedDB("clientVectorDB", "ClientEmbeddingStore");
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
# THE MAIN INDEX.TS FILE THAT YOU ARE IMPORTING FROM
|
||||
```index.ts
|
||||
const DEFAULT_TOP_K = 3;
|
||||
|
||||
interface Filter {
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
import Cache from './cache';
|
||||
import { IndexedDbManager } from './indexedDB';
|
||||
import { cosineSimilarity } from './utils';
|
||||
export { ExperimentalHNSWIndex } from './hnsw';
|
||||
|
||||
// uncomment if you want to test indexedDB implementation in node env for faster dev cycle
|
||||
// import { IDBFactory } from 'fake-indexeddb';
|
||||
// const indexedDB = new IDBFactory();
|
||||
|
||||
export interface SearchResult {
|
||||
similarity: number;
|
||||
object: any;
|
||||
}
|
||||
|
||||
type StorageOptions = 'indexedDB' | 'localStorage' | 'none';
|
||||
|
||||
/**
|
||||
* Interface for search options in the EmbeddingIndex class.
|
||||
* topK: The number of top similar items to return.
|
||||
* filter: An optional filter to apply to the objects before searching.
|
||||
* useStorage: A flag to indicate whether to use storage options like indexedDB or localStorage.
|
||||
*/
|
||||
interface SearchOptions {
|
||||
topK?: number;
|
||||
filter?: Filter;
|
||||
useStorage?: StorageOptions;
|
||||
storageOptions?: { indexedDBName: string; indexedDBObjectStoreName: string }; // TODO: generalize it to localStorage as well
|
||||
}
|
||||
|
||||
const cacheInstance = Cache.getInstance();
|
||||
|
||||
let pipe: any;
|
||||
let currentModel: string;
|
||||
|
||||
export const initializeModel = async (
|
||||
model: string = 'Xenova/gte-small',
|
||||
): Promise<void> => {
|
||||
if (model !== currentModel) {
|
||||
const transformersModule = await import('@xenova/transformers');
|
||||
const pipeline = transformersModule.pipeline;
|
||||
pipe = await pipeline('feature-extraction', model);
|
||||
currentModel = model;
|
||||
}
|
||||
};
|
||||
|
||||
export const getEmbedding = async (
|
||||
text: string,
|
||||
precision: number = 7,
|
||||
options = { pooling: 'mean', normalize: false },
|
||||
model = 'Xenova/gte-small',
|
||||
): Promise<number[]> => {
|
||||
const cachedEmbedding = cacheInstance.get(text);
|
||||
if (cachedEmbedding) {
|
||||
return Promise.resolve(cachedEmbedding);
|
||||
}
|
||||
|
||||
if (model !== currentModel) {
|
||||
await initializeModel(model);
|
||||
}
|
||||
|
||||
const output = await pipe(text, options);
|
||||
const roundedOutput = Array.from(output.data as number[]).map(
|
||||
(value: number) => parseFloat(value.toFixed(precision)),
|
||||
);
|
||||
cacheInstance.set(text, roundedOutput);
|
||||
return Array.from(roundedOutput);
|
||||
};
|
||||
|
||||
export class EmbeddingIndex {
|
||||
private objects: Filter[];
|
||||
private keys: string[];
|
||||
|
||||
constructor(initialObjects?: Filter[]) {
|
||||
// TODO: add support for options while creating index such as {... indexedDB: true, ...}
|
||||
this.objects = [];
|
||||
this.keys = [];
|
||||
if (initialObjects && initialObjects.length > 0) {
|
||||
initialObjects.forEach((obj) => this.validateAndAdd(obj));
|
||||
if (initialObjects[0]) {
|
||||
this.keys = Object.keys(initialObjects[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private findVectorIndex(filter: Filter): number {
|
||||
return this.objects.findIndex((object) =>
|
||||
Object.keys(filter).every((key) => object[key] === filter[key]),
|
||||
);
|
||||
}
|
||||
|
||||
private validateAndAdd(obj: Filter) {
|
||||
if (!Array.isArray(obj.embedding) || obj.embedding.some(isNaN)) {
|
||||
throw new Error(
|
||||
'Object must have an embedding property of type number[]',
|
||||
);
|
||||
}
|
||||
if (this.keys.length === 0) {
|
||||
this.keys = Object.keys(obj);
|
||||
} else if (!this.keys.every((key) => key in obj)) {
|
||||
throw new Error(
|
||||
'Object must have the same properties as the initial objects',
|
||||
);
|
||||
}
|
||||
this.objects.push(obj);
|
||||
}
|
||||
|
||||
add(obj: Filter) {
|
||||
this.validateAndAdd(obj);
|
||||
}
|
||||
|
||||
// Method to update an existing vector in the index
|
||||
update(filter: Filter, vector: Filter) {
|
||||
const index = this.findVectorIndex(filter);
|
||||
if (index === -1) {
|
||||
throw new Error('Vector not found');
|
||||
}
|
||||
if (vector.hasOwnProperty('embedding')) {
|
||||
// Validate and add the new vector
|
||||
this.validateAndAdd(vector);
|
||||
}
|
||||
// Replace the old vector with the new one
|
||||
this.objects[index] = Object.assign(this.objects[index] as Filter, vector);
|
||||
}
|
||||
|
||||
// Method to remove a vector from the index
|
||||
remove(filter: Filter) {
|
||||
const index = this.findVectorIndex(filter);
|
||||
if (index === -1) {
|
||||
throw new Error('Vector not found');
|
||||
}
|
||||
// Remove the vector from the index
|
||||
this.objects.splice(index, 1);
|
||||
}
|
||||
|
||||
// Method to remove multiple vectors from the index
|
||||
removeBatch(filters: Filter[]) {
|
||||
filters.forEach((filter) => {
|
||||
const index = this.findVectorIndex(filter);
|
||||
if (index !== -1) {
|
||||
// Remove the vector from the index
|
||||
this.objects.splice(index, 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Method to retrieve a vector from the index
|
||||
get(filter: Filter) {
|
||||
const vector = this.objects[this.findVectorIndex(filter)];
|
||||
return vector || null;
|
||||
}
|
||||
|
||||
size(): number {
|
||||
// Returns the size of the index
|
||||
return this.objects.length;
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.objects = [];
|
||||
}
|
||||
|
||||
async search(
|
||||
queryEmbedding: number[],
|
||||
options: SearchOptions = {
|
||||
topK: 3,
|
||||
useStorage: 'none',
|
||||
storageOptions: {
|
||||
indexedDBName: 'clientVectorDB',
|
||||
indexedDBObjectStoreName: 'ClientEmbeddingStore',
|
||||
},
|
||||
},
|
||||
): Promise<SearchResult[]> {
|
||||
const topK = options.topK || DEFAULT_TOP_K;
|
||||
const filter = options.filter || {};
|
||||
const useStorage = options.useStorage || 'none';
|
||||
|
||||
if (useStorage === 'indexedDB') {
|
||||
const DBname = options.storageOptions?.indexedDBName || 'clientVectorDB';
|
||||
const objectStoreName =
|
||||
options.storageOptions?.indexedDBObjectStoreName ||
|
||||
'ClientEmbeddingStore';
|
||||
|
||||
if (typeof indexedDB === 'undefined') {
|
||||
console.error('IndexedDB is not supported');
|
||||
throw new Error('IndexedDB is not supported');
|
||||
}
|
||||
const results = await this.loadAndSearchFromIndexedDB(
|
||||
DBname,
|
||||
objectStoreName,
|
||||
queryEmbedding,
|
||||
topK,
|
||||
filter,
|
||||
);
|
||||
return results;
|
||||
} else {
|
||||
// Compute similarities
|
||||
const similarities = this.objects
|
||||
.filter((object) =>
|
||||
Object.keys(filter).every((key) => object[key] === filter[key]),
|
||||
)
|
||||
.map((obj) => ({
|
||||
similarity: cosineSimilarity(queryEmbedding, obj.embedding),
|
||||
object: obj,
|
||||
}));
|
||||
|
||||
// Sort by similarity and return topK results
|
||||
return similarities
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, topK);
|
||||
}
|
||||
}
|
||||
|
||||
printIndex() {
|
||||
console.log('Index Content:');
|
||||
this.objects.forEach((obj, idx) => {
|
||||
console.log(`Item ${idx + 1}:`, obj);
|
||||
});
|
||||
}
|
||||
|
||||
async saveIndex(
|
||||
storageType: string,
|
||||
options: { DBName: string; objectStoreName: string } = {
|
||||
DBName: 'clientVectorDB',
|
||||
objectStoreName: 'ClientEmbeddingStore',
|
||||
},
|
||||
) {
|
||||
if (storageType === 'indexedDB') {
|
||||
await this.saveToIndexedDB(options.DBName, options.objectStoreName);
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unsupported storage type: ${storageType} \n Supported storage types: "indexedDB"`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async saveToIndexedDB(
|
||||
DBname: string = 'clientVectorDB',
|
||||
objectStoreName: string = 'ClientEmbeddingStore',
|
||||
): Promise<void> {
|
||||
if (typeof indexedDB === 'undefined') {
|
||||
console.error('IndexedDB is not defined');
|
||||
throw new Error('IndexedDB is not supported');
|
||||
}
|
||||
|
||||
if (!this.objects || this.objects.length === 0) {
|
||||
throw new Error('Index is empty. Nothing to save');
|
||||
}
|
||||
|
||||
try {
|
||||
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||
await db.addToIndexedDB(this.objects);
|
||||
console.log(
|
||||
`Index saved to database '${DBname}' object store '${objectStoreName}'`,
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Error saving index to database:', error);
|
||||
throw new Error('Error saving index to database');
|
||||
}
|
||||
}
|
||||
|
||||
async loadAndSearchFromIndexedDB(
|
||||
DBname: string = 'clientVectorDB',
|
||||
objectStoreName: string = 'ClientEmbeddingStore',
|
||||
queryEmbedding: number[],
|
||||
topK: number,
|
||||
filter: { [key: string]: any },
|
||||
): Promise<SearchResult[]> {
|
||||
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||
const generator = db.dbGenerator();
|
||||
const results: { similarity: number; object: any }[] = [];
|
||||
|
||||
for await (const record of generator) {
|
||||
if (Object.keys(filter).every((key) => record[key] === filter[key])) {
|
||||
const similarity = cosineSimilarity(queryEmbedding, record.embedding);
|
||||
results.push({ similarity, object: record });
|
||||
}
|
||||
}
|
||||
results.sort((a, b) => b.similarity - a.similarity);
|
||||
return results.slice(0, topK);
|
||||
}
|
||||
|
||||
async deleteIndexedDB(DBname: string = 'clientVectorDB'): Promise<void> {
|
||||
if (typeof indexedDB === 'undefined') {
|
||||
console.error('IndexedDB is not defined');
|
||||
throw new Error('IndexedDB is not supported');
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const request = indexedDB.deleteDatabase(DBname);
|
||||
|
||||
request.onsuccess = () => {
|
||||
console.log(`Database '${DBname}' deleted`);
|
||||
resolve();
|
||||
};
|
||||
request.onerror = (event) => {
|
||||
console.error('Failed to delete database', event);
|
||||
reject(new Error('Failed to delete database'));
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async deleteIndexedDBObjectStore(
|
||||
DBname: string = 'clientVectorDB',
|
||||
objectStoreName: string = 'ClientEmbeddingStore',
|
||||
): Promise<void> {
|
||||
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||
|
||||
try {
|
||||
await db.deleteIndexedDBObjectStoreFromDB(DBname, objectStoreName);
|
||||
console.log(
|
||||
`Object store '${objectStoreName}' deleted from database '${DBname}'`,
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Error deleting object store:', error);
|
||||
throw new Error('Error deleting object store');
|
||||
}
|
||||
}
|
||||
|
||||
async getAllObjectsFromIndexedDB(
|
||||
DBname: string = 'clientVectorDB',
|
||||
objectStoreName: string = 'ClientEmbeddingStore',
|
||||
): Promise<any[]> {
|
||||
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||
const objects: any[] = [];
|
||||
for await (const record of db.dbGenerator()) {
|
||||
objects.push(record);
|
||||
}
|
||||
return objects;
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -28,7 +28,7 @@ const staticCommands: StaticCommandItem[] = [
|
||||
window.location.hash = '?page=/home';
|
||||
loadHomePage();
|
||||
},
|
||||
priority: 10
|
||||
priority: 4
|
||||
},
|
||||
{
|
||||
id: 'messages',
|
||||
@@ -40,7 +40,7 @@ const staticCommands: StaticCommandItem[] = [
|
||||
action: () => {
|
||||
window.location.hash = '?page=/messages';
|
||||
},
|
||||
priority: 10
|
||||
priority: 4
|
||||
},
|
||||
{
|
||||
id: 'timetable',
|
||||
@@ -52,7 +52,7 @@ const staticCommands: StaticCommandItem[] = [
|
||||
action: () => {
|
||||
window.location.hash = '?page=/timetable';
|
||||
},
|
||||
priority: 10
|
||||
priority: 4
|
||||
},
|
||||
{
|
||||
id: 'assessments',
|
||||
@@ -64,7 +64,7 @@ const staticCommands: StaticCommandItem[] = [
|
||||
action: () => {
|
||||
window.location.hash = '?page=/assessments';
|
||||
},
|
||||
priority: 10
|
||||
priority: 4
|
||||
},
|
||||
{
|
||||
id: 'toggle-dark-mode',
|
||||
@@ -72,7 +72,7 @@ const staticCommands: StaticCommandItem[] = [
|
||||
category: 'action',
|
||||
text: 'Toggle Dark Mode',
|
||||
action: () => settingsState.DarkMode = !settingsState.DarkMode,
|
||||
priority: 5,
|
||||
priority: 2,
|
||||
keywords: ['theme', 'appearance']
|
||||
}
|
||||
];
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
<script lang="ts">
|
||||
import { highlightMatch, highlightSnippet } from '../highlightUtils';
|
||||
import { highlightMatch, highlightSnippet, stripHtmlButKeepHighlights } from '../highlightUtils';
|
||||
import type { DynamicContentItem } from '../dynamicSearch';
|
||||
import type { FuseResultMatch } from '../types';
|
||||
|
||||
const { item, isSelected, searchTerm, result } = $props<{
|
||||
const { item, isSelected, searchTerm, matches } = $props<{
|
||||
item: DynamicContentItem;
|
||||
isSelected: boolean;
|
||||
searchTerm: string;
|
||||
result: { matches: string[] };
|
||||
matches?: readonly FuseResultMatch[];
|
||||
}>();
|
||||
|
||||
/* const dueDate = $derived(item.metadata?.dueDate
|
||||
@@ -28,12 +29,11 @@
|
||||
<button
|
||||
class="w-full flex flex-col px-2 py-1.5 rounded-lg select-none cursor-pointer group
|
||||
{isSelected ? 'bg-zinc-900/5 dark:bg-white/10 text-zinc-900 dark:text-white' : 'hover:bg-zinc-500/5 dark:hover:bg-white/5 text-zinc-800 dark:text-zinc-200'}"
|
||||
onclick={() => { item.action(); }}
|
||||
>
|
||||
<div class="flex items-center w-full">
|
||||
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{item.icon}</div>
|
||||
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{item.metadata?.icon || '\ue924'}</div>
|
||||
<span class="ml-4 text-lg truncate">
|
||||
{@html highlightMatch(item.text, searchTerm, result.matches)}
|
||||
{@html stripHtmlButKeepHighlights(highlightMatch(item.text, searchTerm, matches))}
|
||||
</span>
|
||||
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
|
||||
{item.category}
|
||||
@@ -41,16 +41,22 @@
|
||||
</div>
|
||||
{#if item.content}
|
||||
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
|
||||
{@html highlightSnippet(item.content, searchTerm, result.matches)}
|
||||
{@html stripHtmlButKeepHighlights(highlightSnippet(item.content, searchTerm, matches))}
|
||||
</div>
|
||||
{/if}
|
||||
</button>
|
||||
|
||||
<style>
|
||||
.highlight {
|
||||
:global(.highlight) {
|
||||
background-color: rgba(255, 213, 0, 0.3);
|
||||
font-weight: 500;
|
||||
border-radius: 2px;
|
||||
padding: 0 1px;
|
||||
margin: 0 -1px;
|
||||
}
|
||||
|
||||
.dark :global(.highlight) {
|
||||
background-color: rgba(255, 230, 100, 0.4);
|
||||
}
|
||||
|
||||
.due-badge {
|
||||
|
||||
@@ -1,5 +1,48 @@
|
||||
import type { FuseResultMatch, MatchIndices } from './types';
|
||||
|
||||
/**
|
||||
* Simple utility to remove HTML tags from a string.
|
||||
*/
|
||||
export function stripHtmlTags(html: string): string {
|
||||
if (!html) return '';
|
||||
return html.replace(/<[^>]*>/g, '').replace('\n', ' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes HTML tags from a string, but preserves <span class="highlight"> tags.
|
||||
*/
|
||||
export function stripHtmlButKeepHighlights(html: string): string {
|
||||
if (!html) return '';
|
||||
// Use a placeholder for highlight tags, strip others, then restore placeholders.
|
||||
const highlightOpenPlaceholder = '__HIGHLIGHT_OPEN__';
|
||||
const highlightClosePlaceholder = '__HIGHLIGHT_CLOSE__';
|
||||
|
||||
let processed = html.replace(/<span class="highlight">/g, highlightOpenPlaceholder);
|
||||
processed = processed.replace(/<\/span>/g, (match, offset, fullString) => {
|
||||
// Only replace </span> if it likely corresponds to our highlight span
|
||||
// This is imperfect but helps avoid replacing unrelated spans.
|
||||
// Look backwards for the nearest opening placeholder.
|
||||
const lastPlaceholder = fullString.lastIndexOf(highlightOpenPlaceholder, offset);
|
||||
if (lastPlaceholder !== -1) {
|
||||
// Check if there's another opening tag between the placeholder and the closing span
|
||||
const interveningContent = fullString.substring(lastPlaceholder + highlightOpenPlaceholder.length, offset);
|
||||
if (!/<span/i.test(interveningContent)) {
|
||||
return highlightClosePlaceholder;
|
||||
}
|
||||
}
|
||||
return match; // Keep the original </span> if unsure
|
||||
});
|
||||
|
||||
// Strip all remaining HTML tags
|
||||
processed = processed.replace(/<[^>]*>/g, '');
|
||||
|
||||
// Restore the highlight tags
|
||||
processed = processed.replace(new RegExp(highlightOpenPlaceholder, 'g'), '<span class="highlight">');
|
||||
processed = processed.replace(new RegExp(highlightClosePlaceholder, 'g'), '</span>');
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
export function highlightMatch(
|
||||
text: string,
|
||||
term: string,
|
||||
|
||||
@@ -2,6 +2,7 @@ import { getAll, put, clear, remove } from './db';
|
||||
import { jobs } from './jobs';
|
||||
import { renderComponentMap } from './renderComponents';
|
||||
import type { IndexItem, HydratedIndexItem, Job, JobContext } from './types';
|
||||
import { processItems } from '../vectorSearch';
|
||||
|
||||
const META_STORE = 'meta';
|
||||
const LOCK_KEY = 'bsq-indexer-lock';
|
||||
@@ -94,6 +95,8 @@ export async function runIndexing(): Promise<void> {
|
||||
let completedJobs = 0;
|
||||
dispatchProgress(completedJobs, jobIds.length, true);
|
||||
|
||||
const allNewItems: HydratedIndexItem[] = [];
|
||||
|
||||
for (const jobId of jobIds) {
|
||||
const job = jobs[jobId];
|
||||
const lastRun = await getLastRunMeta(jobId);
|
||||
@@ -136,6 +139,13 @@ export async function runIndexing(): Promise<void> {
|
||||
await setStoredItems(merged);
|
||||
await updateLastRunMeta(jobId);
|
||||
|
||||
// Add to our collection of new items for vector processing
|
||||
const hydratedItems = merged.map(item => ({
|
||||
...item,
|
||||
renderComponent: renderComponentMap[job.renderComponentId]
|
||||
}));
|
||||
allNewItems.push(...hydratedItems);
|
||||
|
||||
console.debug(`%c[Indexer] ✅ ${job.label}: ${newItems.length} items indexed`, 'color: #00c46f');
|
||||
} catch (err) {
|
||||
console.debug(`%c[Indexer] ❌ ${job.label} failed:`, 'color: red');
|
||||
@@ -146,6 +156,12 @@ export async function runIndexing(): Promise<void> {
|
||||
dispatchProgress(completedJobs, jobIds.length, true);
|
||||
}
|
||||
|
||||
// Process all new items through vector search
|
||||
if (allNewItems.length > 0) {
|
||||
console.debug(`%c[Indexer] Processing ${allNewItems.length} items for vector search...`, 'color: #4ea1ff');
|
||||
await processItems(allNewItems);
|
||||
}
|
||||
|
||||
stopHeartbeat();
|
||||
dispatchProgress(completedJobs, jobIds.length, false);
|
||||
}
|
||||
|
||||
@@ -228,14 +228,6 @@ export const jobs: Record<string, Job> = {
|
||||
}
|
||||
|
||||
offset += limit;
|
||||
|
||||
// If we've processed 500 messages and haven't found any existing ones,
|
||||
// assume these are all new (first run) and stop here to avoid overwhelming
|
||||
if (offset >= 500 && consecutiveExisting === 0) {
|
||||
console.debug('[Messages Job] Processed 500 new messages, stopping for now');
|
||||
hasMore = false;
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error fetching messages:', error);
|
||||
break;
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import Fuse, { type FuseResult } from 'fuse.js';
|
||||
import { getStaticCommands, type StaticCommandItem } from './commands';
|
||||
import { type DynamicContentItem, getDynamicItems } from './dynamicSearch';
|
||||
import { getDynamicItems } from './dynamicSearch';
|
||||
import type { CombinedResult } from './types';
|
||||
import type { HydratedIndexItem } from './indexing/types';
|
||||
import { searchVectors, type VectorSearchResult } from './vectorSearch';
|
||||
|
||||
// This function is likely no longer needed as items are pre-processed by the indexer
|
||||
/* export function prepareDynamicItems(items: DynamicContentItem[]): DynamicContentItem[] {
|
||||
@@ -47,9 +48,9 @@ export function createSearchIndexes() {
|
||||
includeScore: true,
|
||||
includeMatches: true,
|
||||
threshold: 0.6,
|
||||
minMatchCharLength: 1,
|
||||
ignoreLocation: true,
|
||||
useExtendedSearch: false
|
||||
minMatchCharLength: 3,
|
||||
distance: 50,
|
||||
useExtendedSearch: false,
|
||||
};
|
||||
|
||||
return {
|
||||
@@ -141,18 +142,74 @@ export function searchDynamicItems(
|
||||
});
|
||||
}
|
||||
|
||||
export function performSearch(
|
||||
export async function performSearch(
|
||||
query: string,
|
||||
commandsFuse: Fuse<StaticCommandItem>,
|
||||
dynamicContentFuse: Fuse<HydratedIndexItem>,
|
||||
commandIdToItemMap: Map<string, StaticCommandItem>,
|
||||
dynamicIdToItemMap: Map<string, HydratedIndexItem>,
|
||||
showRecentFirst: boolean // Pass sorting preference
|
||||
): CombinedResult[] {
|
||||
showRecentFirst: boolean
|
||||
): Promise<CombinedResult[]> {
|
||||
const startTime = performance.now();
|
||||
|
||||
// Get all results first
|
||||
const commandResults = searchCommands(commandsFuse, query, commandIdToItemMap);
|
||||
const commandEndTime = performance.now();
|
||||
const dynamicResults = searchDynamicItems(dynamicContentFuse, query, dynamicIdToItemMap, 10, showRecentFirst);
|
||||
const fuseEndTime = performance.now();
|
||||
|
||||
const results = [...commandResults, ...dynamicResults];
|
||||
// Get vector results in parallel
|
||||
const vectorResults = await searchVectors(query, 10);
|
||||
const vectorEndTime = performance.now();
|
||||
|
||||
console.log('Vector results:', vectorResults);
|
||||
|
||||
// Log timings
|
||||
console.log(`Command search took ${commandEndTime - startTime} milliseconds`);
|
||||
console.log(`Dynamic search took ${fuseEndTime - commandEndTime} milliseconds`);
|
||||
console.log(`Vector search took ${vectorEndTime - fuseEndTime} milliseconds`);
|
||||
|
||||
// Create a map to store our final results, using ID as key to avoid duplicates
|
||||
const resultMap = new Map<string, CombinedResult>();
|
||||
|
||||
// Add command results first (they keep their original scores)
|
||||
commandResults.forEach(r => resultMap.set(r.id, r));
|
||||
|
||||
// Process dynamic results and vector results together
|
||||
const seenIds = new Set<string>();
|
||||
|
||||
// Add dynamic results first
|
||||
dynamicResults.forEach(r => {
|
||||
seenIds.add(r.id);
|
||||
const vectorMatch = vectorResults.find(v => v.object.id === r.id);
|
||||
if (vectorMatch) {
|
||||
// If we found it in both searches, combine the scores
|
||||
resultMap.set(r.id, {
|
||||
...r,
|
||||
score: r.score + (vectorMatch.similarity * 0.6) // Boost exact matches
|
||||
});
|
||||
} else {
|
||||
// If only in Fuse results, keep as is
|
||||
resultMap.set(r.id, r);
|
||||
}
|
||||
});
|
||||
|
||||
// Now add any vector results we haven't seen yet
|
||||
vectorResults.forEach(v => {
|
||||
const id = v.object.id;
|
||||
if (!seenIds.has(id)) {
|
||||
// This is a semantic match that Fuse missed - add it with the vector similarity as score
|
||||
resultMap.set(id, {
|
||||
id,
|
||||
type: 'dynamic' as const,
|
||||
score: v.similarity * 0.9, // High base score for semantic matches
|
||||
item: v.object
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Convert to array and sort by score
|
||||
const results = Array.from(resultMap.values());
|
||||
results.sort((a, b) => b.score - a.score);
|
||||
|
||||
return results;
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
import { getEmbedding, EmbeddingIndex, initializeModel } from 'client-vector-search';
|
||||
import type { HydratedIndexItem } from './indexing/types';
|
||||
import type { SearchResult } from 'client-vector-search';
|
||||
|
||||
let vectorIndex: EmbeddingIndex | null = null;
|
||||
|
||||
export async function initVectorSearch() {
|
||||
try {
|
||||
await initializeModel();
|
||||
vectorIndex = new EmbeddingIndex([]);
|
||||
// Load existing items from IndexedDB
|
||||
const stored = await vectorIndex.getAllObjectsFromIndexedDB();
|
||||
if (stored.length > 0) {
|
||||
stored.forEach(item => vectorIndex!.add(item));
|
||||
console.debug('Vector index loaded from IndexedDB');
|
||||
}
|
||||
} catch (e) {
|
||||
console.debug('Creating new vector index');
|
||||
vectorIndex = new EmbeddingIndex([]);
|
||||
}
|
||||
}
|
||||
|
||||
export async function vectorizeItem(item: HydratedIndexItem): Promise<HydratedIndexItem & { embedding: number[] }> {
|
||||
const textToEmbed = [
|
||||
item.text,
|
||||
item.content,
|
||||
item.category,
|
||||
item.metadata?.author,
|
||||
item.metadata?.subject
|
||||
].filter(Boolean).join(' ');
|
||||
|
||||
const embedding = await getEmbedding(textToEmbed);
|
||||
return { ...item, embedding };
|
||||
}
|
||||
|
||||
export async function processItems(items: HydratedIndexItem[]) {
|
||||
if (!vectorIndex) await initVectorSearch();
|
||||
|
||||
const unprocessedItems = items.filter(item => {
|
||||
try {
|
||||
return !vectorIndex!.get({ id: item.id });
|
||||
} catch {
|
||||
return true;
|
||||
}
|
||||
});
|
||||
|
||||
if (unprocessedItems.length === 0) {
|
||||
console.debug('No new items to vectorize');
|
||||
return;
|
||||
}
|
||||
|
||||
console.debug(`Vectorizing ${unprocessedItems.length} new items...`);
|
||||
|
||||
// Process in batches to avoid UI freeze
|
||||
const BATCH_SIZE = 5;
|
||||
for (let i = 0; i < unprocessedItems.length; i += BATCH_SIZE) {
|
||||
const batch = unprocessedItems.slice(i, i + BATCH_SIZE);
|
||||
const vectorized = await Promise.all(batch.map(vectorizeItem));
|
||||
|
||||
for (const item of vectorized) {
|
||||
vectorIndex!.add(item);
|
||||
}
|
||||
|
||||
// Save periodically to avoid losing progress
|
||||
await vectorIndex!.saveIndex('indexedDB');
|
||||
|
||||
// Log progress
|
||||
console.debug(`Vectorized ${Math.min(i + BATCH_SIZE, unprocessedItems.length)}/${unprocessedItems.length} items`);
|
||||
}
|
||||
}
|
||||
|
||||
export interface VectorSearchResult extends SearchResult {
|
||||
object: HydratedIndexItem & { embedding: number[] };
|
||||
}
|
||||
|
||||
export async function searchVectors(query: string, topK: number = 10): Promise<VectorSearchResult[]> {
|
||||
if (!vectorIndex) await initVectorSearch();
|
||||
|
||||
const queryEmbedding = await getEmbedding(query);
|
||||
const results = await vectorIndex!.search(queryEmbedding, {
|
||||
topK,
|
||||
useStorage: 'indexedDB'
|
||||
});
|
||||
|
||||
return results as VectorSearchResult[];
|
||||
}
|
||||
Reference in New Issue
Block a user