mirror of
https://github.com/BetterSEQTA/BetterSEQTA-Plus.git
synced 2026-06-05 19:24:39 +00:00
feat: early vector search testing
This commit is contained in:
@@ -75,6 +75,7 @@
|
|||||||
"@uiw/codemirror-extensions-color": "^4.23.10",
|
"@uiw/codemirror-extensions-color": "^4.23.10",
|
||||||
"@uiw/codemirror-theme-github": "^4.23.10",
|
"@uiw/codemirror-theme-github": "^4.23.10",
|
||||||
"autoprefixer": "^10.4.21",
|
"autoprefixer": "^10.4.21",
|
||||||
|
"client-vector-search": "^0.2.0",
|
||||||
"codemirror": "^6.0.1",
|
"codemirror": "^6.0.1",
|
||||||
"color": "^5.0.0",
|
"color": "^5.0.0",
|
||||||
"dompurify": "^3.2.4",
|
"dompurify": "^3.2.4",
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
import { type StaticCommandItem } from './commands';
|
import { type StaticCommandItem } from './commands';
|
||||||
import type { CombinedResult } from './types';
|
import type { CombinedResult } from './types';
|
||||||
import { createSearchIndexes, performSearch as doSearch } from './searchUtils';
|
import { createSearchIndexes, performSearch as doSearch } from './searchUtils';
|
||||||
import { highlightMatch, highlightSnippet } from './highlightUtils';
|
import { highlightMatch, highlightSnippet, stripHtmlButKeepHighlights } from './highlightUtils';
|
||||||
import Fuse from 'fuse.js';
|
import Fuse from 'fuse.js';
|
||||||
import Calculator from './Calculator.svelte';
|
import Calculator from './Calculator.svelte';
|
||||||
import { actionMap } from './indexing/actions';
|
import { actionMap } from './indexing/actions';
|
||||||
@@ -105,14 +105,14 @@
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
const performSearch = () => {
|
const performSearch = async () => {
|
||||||
isLoading = true;
|
isLoading = true;
|
||||||
selectedIndex = 0;
|
selectedIndex = 0;
|
||||||
|
|
||||||
const term = searchTerm.trim().toLowerCase();
|
const term = searchTerm.trim().toLowerCase();
|
||||||
|
|
||||||
if (commandsFuse && dynamicContentFuse) {
|
if (commandsFuse && dynamicContentFuse) {
|
||||||
combinedResults = doSearch(
|
combinedResults = await doSearch(
|
||||||
term,
|
term,
|
||||||
commandsFuse,
|
commandsFuse,
|
||||||
dynamicContentFuse,
|
dynamicContentFuse,
|
||||||
@@ -288,8 +288,9 @@
|
|||||||
onclick={() => executeItemAction(dynamicItem)}
|
onclick={() => executeItemAction(dynamicItem)}
|
||||||
>
|
>
|
||||||
<div class="flex items-center w-full">
|
<div class="flex items-center w-full">
|
||||||
|
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{dynamicItem.metadata?.icon || '\ue924'}</div>
|
||||||
<span class="ml-4 text-lg truncate">
|
<span class="ml-4 text-lg truncate">
|
||||||
{@html highlightMatch(dynamicItem.text, searchTerm, result.matches)}
|
{@html stripHtmlButKeepHighlights(highlightMatch(dynamicItem.text, searchTerm, result.matches))}
|
||||||
</span>
|
</span>
|
||||||
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
|
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
|
||||||
{dynamicItem.category}
|
{dynamicItem.category}
|
||||||
@@ -297,7 +298,7 @@
|
|||||||
</div>
|
</div>
|
||||||
{#if dynamicItem.content}
|
{#if dynamicItem.content}
|
||||||
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
|
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
|
||||||
{@html highlightSnippet(dynamicItem.content, searchTerm, result.matches)}
|
{@html stripHtmlButKeepHighlights(highlightSnippet(dynamicItem.content, searchTerm, result.matches))}
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</button>
|
</button>
|
||||||
|
|||||||
@@ -0,0 +1,597 @@
|
|||||||
|
# client-vector-search
|
||||||
|
|
||||||
|
A client side vector search library that can embed, search, and cache. Works on the browser and server side.
|
||||||
|
|
||||||
|
It outperforms OpenAI's text-embedding-ada-002 and is way faster than Pinecone and other VectorDBs.
|
||||||
|
|
||||||
|
I'm the founder of [searchbase.app](https://searchbase.app) and we needed this for our product and customers. We'll be using this library in production. You can be sure it'll be maintained and improved.
|
||||||
|
|
||||||
|
- Embed documents using transformers by default: gte-small (~30mb).
|
||||||
|
- Calculate cosine similarity between embeddings.
|
||||||
|
- Create an index and search on the client side
|
||||||
|
- Cache vectors with browser caching support.
|
||||||
|
|
||||||
|
Lots of improvements are coming!
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
Our goal is to build a super simple, fast vector search that works with couple hundred to thousands vectors. ~1k vectors per user covers 99% of the use cases.
|
||||||
|
|
||||||
|
We'll initially keep things super simple and sub 100ms
|
||||||
|
|
||||||
|
### TODOs
|
||||||
|
- [ ] add HNSW index that works on node and browser env, don't rely on hnsw binder libs
|
||||||
|
- [ ] add a proper testing suite and ci/cd for the lib
|
||||||
|
- [ ] simple health tests
|
||||||
|
- [ ] mock the @xenova/transformers for jest, it's not happy with it
|
||||||
|
- [ ] performance tests, recall, memory usage, cpu usage etc.
|
||||||
|
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm i client-vector-search
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
|
||||||
|
This library provides a plug-and-play solution for embedding and vector search. It's designed to be easy to use, efficient, and versatile. Here's a quick start guide:
|
||||||
|
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { getEmbedding, EmbeddingIndex } from 'client-vector-search';
|
||||||
|
|
||||||
|
// getEmbedding is an async function, so you need to use 'await' or '.then()' to get the result
|
||||||
|
const embedding = await getEmbedding("Apple"); // Returns embedding as number[]
|
||||||
|
|
||||||
|
// Each object should have an 'embedding' property of type number[]
|
||||||
|
const initialObjects = [
|
||||||
|
{ id: 1, name: "Apple", embedding: embedding },
|
||||||
|
{ id: 2, name: "Banana", embedding: await getEmbedding("Banana") },
|
||||||
|
{ id: 3, name: "Cheddar", embedding: await getEmbedding("Cheddar")},
|
||||||
|
{ id: 4, name: "Space", embedding: await getEmbedding("Space")},
|
||||||
|
{ id: 5, name: "database", embedding: await getEmbedding("database")},
|
||||||
|
];
|
||||||
|
const index = new EmbeddingIndex(initialObjects); // Creates an index
|
||||||
|
|
||||||
|
// The query should be an embedding of type number[]
|
||||||
|
const queryEmbedding = await getEmbedding('Fruit'); // Query embedding
|
||||||
|
const results = await index.search(queryEmbedding, { topK: 5 }); // Returns top similar objects
|
||||||
|
|
||||||
|
// specify the storage type
|
||||||
|
await index.saveIndex('indexedDB');
|
||||||
|
const results = await index.search([1, 2, 3], {
|
||||||
|
topK: 5,
|
||||||
|
useStorage: 'indexedDB',
|
||||||
|
// storageOptions: { // use only if you overrode the defaults
|
||||||
|
// indexedDBName: 'clientVectorDB',
|
||||||
|
// indexedDBObjectStoreName: 'ClientEmbeddingStore',
|
||||||
|
// },
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(results);
|
||||||
|
|
||||||
|
await index.deleteIndexedDB(); // if you overrode default, specify db name
|
||||||
|
```
|
||||||
|
|
||||||
|
## Trouble-shooting
|
||||||
|
|
||||||
|
### NextJS
|
||||||
|
To use it inside NextJS projects you'll need to update the `next.config.js` file to include the following:
|
||||||
|
|
||||||
|
```js
|
||||||
|
module.exports = {
|
||||||
|
// Override the default webpack configuration
|
||||||
|
webpack: (config) => {
|
||||||
|
// See https://webpack.js.org/configuration/resolve/#resolvealias
|
||||||
|
config.resolve.alias = {
|
||||||
|
...config.resolve.alias,
|
||||||
|
sharp$: false,
|
||||||
|
"onnxruntime-node$": false,
|
||||||
|
};
|
||||||
|
return config;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Model load after page is loaded
|
||||||
|
|
||||||
|
You can initialize the model before using it to generate embeddings. This will ensure that the model is loaded before you use it and provide a better UX.
|
||||||
|
|
||||||
|
```js
|
||||||
|
import { initializeModel } from "client-vector-search"
|
||||||
|
...
|
||||||
|
useEffect(() => {
|
||||||
|
try {
|
||||||
|
initializeModel();
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage Guide
|
||||||
|
|
||||||
|
This guide provides a step-by-step walkthrough of the library's main features. It covers everything from generating embeddings for a string to performing operations on the index such as adding, updating, and removing objects. It also includes instructions on how to save the index to a database and perform search operations within it.
|
||||||
|
|
||||||
|
Until we have a reference documentation, you can find all the methods and their usage in this guide. Each step is accompanied by a code snippet to illustrate the usage of the method in question. Make sure to follow along and try out the examples in your own environment to get a better understanding of how everything works.
|
||||||
|
|
||||||
|
Let's get started!
|
||||||
|
|
||||||
|
### Step 1: Generate Embeddings for String
|
||||||
|
Generate embeddings for a given string using the `getEmbedding` method.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const embedding = await getEmbedding("Apple"); // Returns embedding as number[]
|
||||||
|
```
|
||||||
|
> **Note**: `getEmbedding` is asynchronous; make sure to use `await`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 2: Calculate Cosine Similarity
|
||||||
|
Calculate the cosine similarity between two embeddings.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const similarity = cosineSimilarity(embedding1, embedding2, 6);
|
||||||
|
```
|
||||||
|
> **Note**: Both embeddings should be of the same length.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 3: Create an Index
|
||||||
|
Create an index with an initial array of objects. Each object must have an 'embedding' property.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const initialObjects = [...];
|
||||||
|
const index = new EmbeddingIndex(initialObjects);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 4: Add to Index
|
||||||
|
Add an object to the index.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const objectToAdd = { id: 6, name: 'Cat', embedding: await getEmbedding('Cat') };
|
||||||
|
index.add(objectToAdd);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 5: Update Index
|
||||||
|
Update an existing object in the index.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const vectorToUpdate = { id: 6, name: 'Dog', embedding: await getEmbedding('Dog') };
|
||||||
|
index.update({ id: 6 }, vectorToUpdate);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 6: Remove from Index
|
||||||
|
Remove an object from the index.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
index.remove({ id: 6 });
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 7: Retrieve from Index
|
||||||
|
Retrieve an object from the index.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const vector = index.get({ id: 1 });
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 8: Search the Index
|
||||||
|
Search the index with a query embedding.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const queryEmbedding = await getEmbedding('Fruit');
|
||||||
|
const results = await index.search(queryEmbedding, { topK: 5 });
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 9: Print the Index
|
||||||
|
Print the entire index to the console.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
index.printIndex();
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 10: Save Index to IndexedDB (for browser)
|
||||||
|
Save the index to a persistent IndexedDB database. Note
|
||||||
|
|
||||||
|
```ts
|
||||||
|
await index.saveIndex("indexedDB", { DBName: "clientVectorDB", objectStoreName:"ClientEmbeddingStore"})
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Important: Search in indexedDB
|
||||||
|
Perform a search operation in the IndexedDB.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const results = await index.search(queryEmbedding, {
|
||||||
|
topK: 5,
|
||||||
|
useStorage: "indexedDB",
|
||||||
|
storageOptions: { // only if you want to override the default options, defaults are below
|
||||||
|
indexedDBName: 'clientVectorDB',
|
||||||
|
indexedDBObjectStoreName: 'ClientEmbeddingStore'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Delete Database
|
||||||
|
To delete an entire database.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
await IndexedDbManager.deleteIndexedDB("clientVectorDB");
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Delete Object Store
|
||||||
|
To delete an object store from a database.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
await IndexedDbManager.deleteIndexedDBObjectStore("clientVectorDB", "ClientEmbeddingStore");
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Retrieve All Objects
|
||||||
|
To retrieve all objects from a specific object store.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const allObjects = await IndexedDbManager.getAllObjectsFromIndexedDB("clientVectorDB", "ClientEmbeddingStore");
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# THE MAIN INDEX.TS FILE THAT YOU ARE IMPORTING FROM
|
||||||
|
```index.ts
|
||||||
|
const DEFAULT_TOP_K = 3;
|
||||||
|
|
||||||
|
interface Filter {
|
||||||
|
[key: string]: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
import Cache from './cache';
|
||||||
|
import { IndexedDbManager } from './indexedDB';
|
||||||
|
import { cosineSimilarity } from './utils';
|
||||||
|
export { ExperimentalHNSWIndex } from './hnsw';
|
||||||
|
|
||||||
|
// uncomment if you want to test indexedDB implementation in node env for faster dev cycle
|
||||||
|
// import { IDBFactory } from 'fake-indexeddb';
|
||||||
|
// const indexedDB = new IDBFactory();
|
||||||
|
|
||||||
|
export interface SearchResult {
|
||||||
|
similarity: number;
|
||||||
|
object: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
type StorageOptions = 'indexedDB' | 'localStorage' | 'none';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for search options in the EmbeddingIndex class.
|
||||||
|
* topK: The number of top similar items to return.
|
||||||
|
* filter: An optional filter to apply to the objects before searching.
|
||||||
|
* useStorage: A flag to indicate whether to use storage options like indexedDB or localStorage.
|
||||||
|
*/
|
||||||
|
interface SearchOptions {
|
||||||
|
topK?: number;
|
||||||
|
filter?: Filter;
|
||||||
|
useStorage?: StorageOptions;
|
||||||
|
storageOptions?: { indexedDBName: string; indexedDBObjectStoreName: string }; // TODO: generalize it to localStorage as well
|
||||||
|
}
|
||||||
|
|
||||||
|
const cacheInstance = Cache.getInstance();
|
||||||
|
|
||||||
|
let pipe: any;
|
||||||
|
let currentModel: string;
|
||||||
|
|
||||||
|
export const initializeModel = async (
|
||||||
|
model: string = 'Xenova/gte-small',
|
||||||
|
): Promise<void> => {
|
||||||
|
if (model !== currentModel) {
|
||||||
|
const transformersModule = await import('@xenova/transformers');
|
||||||
|
const pipeline = transformersModule.pipeline;
|
||||||
|
pipe = await pipeline('feature-extraction', model);
|
||||||
|
currentModel = model;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const getEmbedding = async (
|
||||||
|
text: string,
|
||||||
|
precision: number = 7,
|
||||||
|
options = { pooling: 'mean', normalize: false },
|
||||||
|
model = 'Xenova/gte-small',
|
||||||
|
): Promise<number[]> => {
|
||||||
|
const cachedEmbedding = cacheInstance.get(text);
|
||||||
|
if (cachedEmbedding) {
|
||||||
|
return Promise.resolve(cachedEmbedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (model !== currentModel) {
|
||||||
|
await initializeModel(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
const output = await pipe(text, options);
|
||||||
|
const roundedOutput = Array.from(output.data as number[]).map(
|
||||||
|
(value: number) => parseFloat(value.toFixed(precision)),
|
||||||
|
);
|
||||||
|
cacheInstance.set(text, roundedOutput);
|
||||||
|
return Array.from(roundedOutput);
|
||||||
|
};
|
||||||
|
|
||||||
|
export class EmbeddingIndex {
|
||||||
|
private objects: Filter[];
|
||||||
|
private keys: string[];
|
||||||
|
|
||||||
|
constructor(initialObjects?: Filter[]) {
|
||||||
|
// TODO: add support for options while creating index such as {... indexedDB: true, ...}
|
||||||
|
this.objects = [];
|
||||||
|
this.keys = [];
|
||||||
|
if (initialObjects && initialObjects.length > 0) {
|
||||||
|
initialObjects.forEach((obj) => this.validateAndAdd(obj));
|
||||||
|
if (initialObjects[0]) {
|
||||||
|
this.keys = Object.keys(initialObjects[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private findVectorIndex(filter: Filter): number {
|
||||||
|
return this.objects.findIndex((object) =>
|
||||||
|
Object.keys(filter).every((key) => object[key] === filter[key]),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private validateAndAdd(obj: Filter) {
|
||||||
|
if (!Array.isArray(obj.embedding) || obj.embedding.some(isNaN)) {
|
||||||
|
throw new Error(
|
||||||
|
'Object must have an embedding property of type number[]',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (this.keys.length === 0) {
|
||||||
|
this.keys = Object.keys(obj);
|
||||||
|
} else if (!this.keys.every((key) => key in obj)) {
|
||||||
|
throw new Error(
|
||||||
|
'Object must have the same properties as the initial objects',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
this.objects.push(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
add(obj: Filter) {
|
||||||
|
this.validateAndAdd(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method to update an existing vector in the index
|
||||||
|
update(filter: Filter, vector: Filter) {
|
||||||
|
const index = this.findVectorIndex(filter);
|
||||||
|
if (index === -1) {
|
||||||
|
throw new Error('Vector not found');
|
||||||
|
}
|
||||||
|
if (vector.hasOwnProperty('embedding')) {
|
||||||
|
// Validate and add the new vector
|
||||||
|
this.validateAndAdd(vector);
|
||||||
|
}
|
||||||
|
// Replace the old vector with the new one
|
||||||
|
this.objects[index] = Object.assign(this.objects[index] as Filter, vector);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method to remove a vector from the index
|
||||||
|
remove(filter: Filter) {
|
||||||
|
const index = this.findVectorIndex(filter);
|
||||||
|
if (index === -1) {
|
||||||
|
throw new Error('Vector not found');
|
||||||
|
}
|
||||||
|
// Remove the vector from the index
|
||||||
|
this.objects.splice(index, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method to remove multiple vectors from the index
|
||||||
|
removeBatch(filters: Filter[]) {
|
||||||
|
filters.forEach((filter) => {
|
||||||
|
const index = this.findVectorIndex(filter);
|
||||||
|
if (index !== -1) {
|
||||||
|
// Remove the vector from the index
|
||||||
|
this.objects.splice(index, 1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method to retrieve a vector from the index
|
||||||
|
get(filter: Filter) {
|
||||||
|
const vector = this.objects[this.findVectorIndex(filter)];
|
||||||
|
return vector || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
size(): number {
|
||||||
|
// Returns the size of the index
|
||||||
|
return this.objects.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
clear() {
|
||||||
|
this.objects = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(
|
||||||
|
queryEmbedding: number[],
|
||||||
|
options: SearchOptions = {
|
||||||
|
topK: 3,
|
||||||
|
useStorage: 'none',
|
||||||
|
storageOptions: {
|
||||||
|
indexedDBName: 'clientVectorDB',
|
||||||
|
indexedDBObjectStoreName: 'ClientEmbeddingStore',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
): Promise<SearchResult[]> {
|
||||||
|
const topK = options.topK || DEFAULT_TOP_K;
|
||||||
|
const filter = options.filter || {};
|
||||||
|
const useStorage = options.useStorage || 'none';
|
||||||
|
|
||||||
|
if (useStorage === 'indexedDB') {
|
||||||
|
const DBname = options.storageOptions?.indexedDBName || 'clientVectorDB';
|
||||||
|
const objectStoreName =
|
||||||
|
options.storageOptions?.indexedDBObjectStoreName ||
|
||||||
|
'ClientEmbeddingStore';
|
||||||
|
|
||||||
|
if (typeof indexedDB === 'undefined') {
|
||||||
|
console.error('IndexedDB is not supported');
|
||||||
|
throw new Error('IndexedDB is not supported');
|
||||||
|
}
|
||||||
|
const results = await this.loadAndSearchFromIndexedDB(
|
||||||
|
DBname,
|
||||||
|
objectStoreName,
|
||||||
|
queryEmbedding,
|
||||||
|
topK,
|
||||||
|
filter,
|
||||||
|
);
|
||||||
|
return results;
|
||||||
|
} else {
|
||||||
|
// Compute similarities
|
||||||
|
const similarities = this.objects
|
||||||
|
.filter((object) =>
|
||||||
|
Object.keys(filter).every((key) => object[key] === filter[key]),
|
||||||
|
)
|
||||||
|
.map((obj) => ({
|
||||||
|
similarity: cosineSimilarity(queryEmbedding, obj.embedding),
|
||||||
|
object: obj,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Sort by similarity and return topK results
|
||||||
|
return similarities
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, topK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printIndex() {
|
||||||
|
console.log('Index Content:');
|
||||||
|
this.objects.forEach((obj, idx) => {
|
||||||
|
console.log(`Item ${idx + 1}:`, obj);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async saveIndex(
|
||||||
|
storageType: string,
|
||||||
|
options: { DBName: string; objectStoreName: string } = {
|
||||||
|
DBName: 'clientVectorDB',
|
||||||
|
objectStoreName: 'ClientEmbeddingStore',
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
if (storageType === 'indexedDB') {
|
||||||
|
await this.saveToIndexedDB(options.DBName, options.objectStoreName);
|
||||||
|
} else {
|
||||||
|
throw new Error(
|
||||||
|
`Unsupported storage type: ${storageType} \n Supported storage types: "indexedDB"`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async saveToIndexedDB(
|
||||||
|
DBname: string = 'clientVectorDB',
|
||||||
|
objectStoreName: string = 'ClientEmbeddingStore',
|
||||||
|
): Promise<void> {
|
||||||
|
if (typeof indexedDB === 'undefined') {
|
||||||
|
console.error('IndexedDB is not defined');
|
||||||
|
throw new Error('IndexedDB is not supported');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.objects || this.objects.length === 0) {
|
||||||
|
throw new Error('Index is empty. Nothing to save');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||||
|
await db.addToIndexedDB(this.objects);
|
||||||
|
console.log(
|
||||||
|
`Index saved to database '${DBname}' object store '${objectStoreName}'`,
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error saving index to database:', error);
|
||||||
|
throw new Error('Error saving index to database');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadAndSearchFromIndexedDB(
|
||||||
|
DBname: string = 'clientVectorDB',
|
||||||
|
objectStoreName: string = 'ClientEmbeddingStore',
|
||||||
|
queryEmbedding: number[],
|
||||||
|
topK: number,
|
||||||
|
filter: { [key: string]: any },
|
||||||
|
): Promise<SearchResult[]> {
|
||||||
|
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||||
|
const generator = db.dbGenerator();
|
||||||
|
const results: { similarity: number; object: any }[] = [];
|
||||||
|
|
||||||
|
for await (const record of generator) {
|
||||||
|
if (Object.keys(filter).every((key) => record[key] === filter[key])) {
|
||||||
|
const similarity = cosineSimilarity(queryEmbedding, record.embedding);
|
||||||
|
results.push({ similarity, object: record });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
results.sort((a, b) => b.similarity - a.similarity);
|
||||||
|
return results.slice(0, topK);
|
||||||
|
}
|
||||||
|
|
||||||
|
async deleteIndexedDB(DBname: string = 'clientVectorDB'): Promise<void> {
|
||||||
|
if (typeof indexedDB === 'undefined') {
|
||||||
|
console.error('IndexedDB is not defined');
|
||||||
|
throw new Error('IndexedDB is not supported');
|
||||||
|
}
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const request = indexedDB.deleteDatabase(DBname);
|
||||||
|
|
||||||
|
request.onsuccess = () => {
|
||||||
|
console.log(`Database '${DBname}' deleted`);
|
||||||
|
resolve();
|
||||||
|
};
|
||||||
|
request.onerror = (event) => {
|
||||||
|
console.error('Failed to delete database', event);
|
||||||
|
reject(new Error('Failed to delete database'));
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async deleteIndexedDBObjectStore(
|
||||||
|
DBname: string = 'clientVectorDB',
|
||||||
|
objectStoreName: string = 'ClientEmbeddingStore',
|
||||||
|
): Promise<void> {
|
||||||
|
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await db.deleteIndexedDBObjectStoreFromDB(DBname, objectStoreName);
|
||||||
|
console.log(
|
||||||
|
`Object store '${objectStoreName}' deleted from database '${DBname}'`,
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error deleting object store:', error);
|
||||||
|
throw new Error('Error deleting object store');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async getAllObjectsFromIndexedDB(
|
||||||
|
DBname: string = 'clientVectorDB',
|
||||||
|
objectStoreName: string = 'ClientEmbeddingStore',
|
||||||
|
): Promise<any[]> {
|
||||||
|
const db = await IndexedDbManager.create(DBname, objectStoreName);
|
||||||
|
const objects: any[] = [];
|
||||||
|
for await (const record of db.dbGenerator()) {
|
||||||
|
objects.push(record);
|
||||||
|
}
|
||||||
|
return objects;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
@@ -28,7 +28,7 @@ const staticCommands: StaticCommandItem[] = [
|
|||||||
window.location.hash = '?page=/home';
|
window.location.hash = '?page=/home';
|
||||||
loadHomePage();
|
loadHomePage();
|
||||||
},
|
},
|
||||||
priority: 10
|
priority: 4
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'messages',
|
id: 'messages',
|
||||||
@@ -40,7 +40,7 @@ const staticCommands: StaticCommandItem[] = [
|
|||||||
action: () => {
|
action: () => {
|
||||||
window.location.hash = '?page=/messages';
|
window.location.hash = '?page=/messages';
|
||||||
},
|
},
|
||||||
priority: 10
|
priority: 4
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'timetable',
|
id: 'timetable',
|
||||||
@@ -52,7 +52,7 @@ const staticCommands: StaticCommandItem[] = [
|
|||||||
action: () => {
|
action: () => {
|
||||||
window.location.hash = '?page=/timetable';
|
window.location.hash = '?page=/timetable';
|
||||||
},
|
},
|
||||||
priority: 10
|
priority: 4
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'assessments',
|
id: 'assessments',
|
||||||
@@ -64,7 +64,7 @@ const staticCommands: StaticCommandItem[] = [
|
|||||||
action: () => {
|
action: () => {
|
||||||
window.location.hash = '?page=/assessments';
|
window.location.hash = '?page=/assessments';
|
||||||
},
|
},
|
||||||
priority: 10
|
priority: 4
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'toggle-dark-mode',
|
id: 'toggle-dark-mode',
|
||||||
@@ -72,7 +72,7 @@ const staticCommands: StaticCommandItem[] = [
|
|||||||
category: 'action',
|
category: 'action',
|
||||||
text: 'Toggle Dark Mode',
|
text: 'Toggle Dark Mode',
|
||||||
action: () => settingsState.DarkMode = !settingsState.DarkMode,
|
action: () => settingsState.DarkMode = !settingsState.DarkMode,
|
||||||
priority: 5,
|
priority: 2,
|
||||||
keywords: ['theme', 'appearance']
|
keywords: ['theme', 'appearance']
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { highlightMatch, highlightSnippet } from '../highlightUtils';
|
import { highlightMatch, highlightSnippet, stripHtmlButKeepHighlights } from '../highlightUtils';
|
||||||
import type { DynamicContentItem } from '../dynamicSearch';
|
import type { DynamicContentItem } from '../dynamicSearch';
|
||||||
|
import type { FuseResultMatch } from '../types';
|
||||||
|
|
||||||
const { item, isSelected, searchTerm, result } = $props<{
|
const { item, isSelected, searchTerm, matches } = $props<{
|
||||||
item: DynamicContentItem;
|
item: DynamicContentItem;
|
||||||
isSelected: boolean;
|
isSelected: boolean;
|
||||||
searchTerm: string;
|
searchTerm: string;
|
||||||
result: { matches: string[] };
|
matches?: readonly FuseResultMatch[];
|
||||||
}>();
|
}>();
|
||||||
|
|
||||||
/* const dueDate = $derived(item.metadata?.dueDate
|
/* const dueDate = $derived(item.metadata?.dueDate
|
||||||
@@ -28,12 +29,11 @@
|
|||||||
<button
|
<button
|
||||||
class="w-full flex flex-col px-2 py-1.5 rounded-lg select-none cursor-pointer group
|
class="w-full flex flex-col px-2 py-1.5 rounded-lg select-none cursor-pointer group
|
||||||
{isSelected ? 'bg-zinc-900/5 dark:bg-white/10 text-zinc-900 dark:text-white' : 'hover:bg-zinc-500/5 dark:hover:bg-white/5 text-zinc-800 dark:text-zinc-200'}"
|
{isSelected ? 'bg-zinc-900/5 dark:bg-white/10 text-zinc-900 dark:text-white' : 'hover:bg-zinc-500/5 dark:hover:bg-white/5 text-zinc-800 dark:text-zinc-200'}"
|
||||||
onclick={() => { item.action(); }}
|
|
||||||
>
|
>
|
||||||
<div class="flex items-center w-full">
|
<div class="flex items-center w-full">
|
||||||
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{item.icon}</div>
|
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{item.metadata?.icon || '\ue924'}</div>
|
||||||
<span class="ml-4 text-lg truncate">
|
<span class="ml-4 text-lg truncate">
|
||||||
{@html highlightMatch(item.text, searchTerm, result.matches)}
|
{@html stripHtmlButKeepHighlights(highlightMatch(item.text, searchTerm, matches))}
|
||||||
</span>
|
</span>
|
||||||
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
|
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
|
||||||
{item.category}
|
{item.category}
|
||||||
@@ -41,16 +41,22 @@
|
|||||||
</div>
|
</div>
|
||||||
{#if item.content}
|
{#if item.content}
|
||||||
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
|
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
|
||||||
{@html highlightSnippet(item.content, searchTerm, result.matches)}
|
{@html stripHtmlButKeepHighlights(highlightSnippet(item.content, searchTerm, matches))}
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
.highlight {
|
:global(.highlight) {
|
||||||
background-color: rgba(255, 213, 0, 0.3);
|
background-color: rgba(255, 213, 0, 0.3);
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
border-radius: 2px;
|
border-radius: 2px;
|
||||||
|
padding: 0 1px;
|
||||||
|
margin: 0 -1px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dark :global(.highlight) {
|
||||||
|
background-color: rgba(255, 230, 100, 0.4);
|
||||||
}
|
}
|
||||||
|
|
||||||
.due-badge {
|
.due-badge {
|
||||||
|
|||||||
@@ -1,5 +1,48 @@
|
|||||||
import type { FuseResultMatch, MatchIndices } from './types';
|
import type { FuseResultMatch, MatchIndices } from './types';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple utility to remove HTML tags from a string.
|
||||||
|
*/
|
||||||
|
export function stripHtmlTags(html: string): string {
|
||||||
|
if (!html) return '';
|
||||||
|
return html.replace(/<[^>]*>/g, '').replace('\n', ' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes HTML tags from a string, but preserves <span class="highlight"> tags.
|
||||||
|
*/
|
||||||
|
export function stripHtmlButKeepHighlights(html: string): string {
|
||||||
|
if (!html) return '';
|
||||||
|
// Use a placeholder for highlight tags, strip others, then restore placeholders.
|
||||||
|
const highlightOpenPlaceholder = '__HIGHLIGHT_OPEN__';
|
||||||
|
const highlightClosePlaceholder = '__HIGHLIGHT_CLOSE__';
|
||||||
|
|
||||||
|
let processed = html.replace(/<span class="highlight">/g, highlightOpenPlaceholder);
|
||||||
|
processed = processed.replace(/<\/span>/g, (match, offset, fullString) => {
|
||||||
|
// Only replace </span> if it likely corresponds to our highlight span
|
||||||
|
// This is imperfect but helps avoid replacing unrelated spans.
|
||||||
|
// Look backwards for the nearest opening placeholder.
|
||||||
|
const lastPlaceholder = fullString.lastIndexOf(highlightOpenPlaceholder, offset);
|
||||||
|
if (lastPlaceholder !== -1) {
|
||||||
|
// Check if there's another opening tag between the placeholder and the closing span
|
||||||
|
const interveningContent = fullString.substring(lastPlaceholder + highlightOpenPlaceholder.length, offset);
|
||||||
|
if (!/<span/i.test(interveningContent)) {
|
||||||
|
return highlightClosePlaceholder;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return match; // Keep the original </span> if unsure
|
||||||
|
});
|
||||||
|
|
||||||
|
// Strip all remaining HTML tags
|
||||||
|
processed = processed.replace(/<[^>]*>/g, '');
|
||||||
|
|
||||||
|
// Restore the highlight tags
|
||||||
|
processed = processed.replace(new RegExp(highlightOpenPlaceholder, 'g'), '<span class="highlight">');
|
||||||
|
processed = processed.replace(new RegExp(highlightClosePlaceholder, 'g'), '</span>');
|
||||||
|
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
|
||||||
export function highlightMatch(
|
export function highlightMatch(
|
||||||
text: string,
|
text: string,
|
||||||
term: string,
|
term: string,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import { getAll, put, clear, remove } from './db';
|
|||||||
import { jobs } from './jobs';
|
import { jobs } from './jobs';
|
||||||
import { renderComponentMap } from './renderComponents';
|
import { renderComponentMap } from './renderComponents';
|
||||||
import type { IndexItem, HydratedIndexItem, Job, JobContext } from './types';
|
import type { IndexItem, HydratedIndexItem, Job, JobContext } from './types';
|
||||||
|
import { processItems } from '../vectorSearch';
|
||||||
|
|
||||||
const META_STORE = 'meta';
|
const META_STORE = 'meta';
|
||||||
const LOCK_KEY = 'bsq-indexer-lock';
|
const LOCK_KEY = 'bsq-indexer-lock';
|
||||||
@@ -94,6 +95,8 @@ export async function runIndexing(): Promise<void> {
|
|||||||
let completedJobs = 0;
|
let completedJobs = 0;
|
||||||
dispatchProgress(completedJobs, jobIds.length, true);
|
dispatchProgress(completedJobs, jobIds.length, true);
|
||||||
|
|
||||||
|
const allNewItems: HydratedIndexItem[] = [];
|
||||||
|
|
||||||
for (const jobId of jobIds) {
|
for (const jobId of jobIds) {
|
||||||
const job = jobs[jobId];
|
const job = jobs[jobId];
|
||||||
const lastRun = await getLastRunMeta(jobId);
|
const lastRun = await getLastRunMeta(jobId);
|
||||||
@@ -136,6 +139,13 @@ export async function runIndexing(): Promise<void> {
|
|||||||
await setStoredItems(merged);
|
await setStoredItems(merged);
|
||||||
await updateLastRunMeta(jobId);
|
await updateLastRunMeta(jobId);
|
||||||
|
|
||||||
|
// Add to our collection of new items for vector processing
|
||||||
|
const hydratedItems = merged.map(item => ({
|
||||||
|
...item,
|
||||||
|
renderComponent: renderComponentMap[job.renderComponentId]
|
||||||
|
}));
|
||||||
|
allNewItems.push(...hydratedItems);
|
||||||
|
|
||||||
console.debug(`%c[Indexer] ✅ ${job.label}: ${newItems.length} items indexed`, 'color: #00c46f');
|
console.debug(`%c[Indexer] ✅ ${job.label}: ${newItems.length} items indexed`, 'color: #00c46f');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.debug(`%c[Indexer] ❌ ${job.label} failed:`, 'color: red');
|
console.debug(`%c[Indexer] ❌ ${job.label} failed:`, 'color: red');
|
||||||
@@ -146,6 +156,12 @@ export async function runIndexing(): Promise<void> {
|
|||||||
dispatchProgress(completedJobs, jobIds.length, true);
|
dispatchProgress(completedJobs, jobIds.length, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Process all new items through vector search
|
||||||
|
if (allNewItems.length > 0) {
|
||||||
|
console.debug(`%c[Indexer] Processing ${allNewItems.length} items for vector search...`, 'color: #4ea1ff');
|
||||||
|
await processItems(allNewItems);
|
||||||
|
}
|
||||||
|
|
||||||
stopHeartbeat();
|
stopHeartbeat();
|
||||||
dispatchProgress(completedJobs, jobIds.length, false);
|
dispatchProgress(completedJobs, jobIds.length, false);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -228,14 +228,6 @@ export const jobs: Record<string, Job> = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
offset += limit;
|
offset += limit;
|
||||||
|
|
||||||
// If we've processed 500 messages and haven't found any existing ones,
|
|
||||||
// assume these are all new (first run) and stop here to avoid overwhelming
|
|
||||||
if (offset >= 500 && consecutiveExisting === 0) {
|
|
||||||
console.debug('[Messages Job] Processed 500 new messages, stopping for now');
|
|
||||||
hasMore = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching messages:', error);
|
console.error('Error fetching messages:', error);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import Fuse, { type FuseResult } from 'fuse.js';
|
import Fuse, { type FuseResult } from 'fuse.js';
|
||||||
import { getStaticCommands, type StaticCommandItem } from './commands';
|
import { getStaticCommands, type StaticCommandItem } from './commands';
|
||||||
import { type DynamicContentItem, getDynamicItems } from './dynamicSearch';
|
import { getDynamicItems } from './dynamicSearch';
|
||||||
import type { CombinedResult } from './types';
|
import type { CombinedResult } from './types';
|
||||||
import type { HydratedIndexItem } from './indexing/types';
|
import type { HydratedIndexItem } from './indexing/types';
|
||||||
|
import { searchVectors, type VectorSearchResult } from './vectorSearch';
|
||||||
|
|
||||||
// This function is likely no longer needed as items are pre-processed by the indexer
|
// This function is likely no longer needed as items are pre-processed by the indexer
|
||||||
/* export function prepareDynamicItems(items: DynamicContentItem[]): DynamicContentItem[] {
|
/* export function prepareDynamicItems(items: DynamicContentItem[]): DynamicContentItem[] {
|
||||||
@@ -47,9 +48,9 @@ export function createSearchIndexes() {
|
|||||||
includeScore: true,
|
includeScore: true,
|
||||||
includeMatches: true,
|
includeMatches: true,
|
||||||
threshold: 0.6,
|
threshold: 0.6,
|
||||||
minMatchCharLength: 1,
|
minMatchCharLength: 3,
|
||||||
ignoreLocation: true,
|
distance: 50,
|
||||||
useExtendedSearch: false
|
useExtendedSearch: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -141,18 +142,74 @@ export function searchDynamicItems(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export function performSearch(
|
export async function performSearch(
|
||||||
query: string,
|
query: string,
|
||||||
commandsFuse: Fuse<StaticCommandItem>,
|
commandsFuse: Fuse<StaticCommandItem>,
|
||||||
dynamicContentFuse: Fuse<HydratedIndexItem>,
|
dynamicContentFuse: Fuse<HydratedIndexItem>,
|
||||||
commandIdToItemMap: Map<string, StaticCommandItem>,
|
commandIdToItemMap: Map<string, StaticCommandItem>,
|
||||||
dynamicIdToItemMap: Map<string, HydratedIndexItem>,
|
dynamicIdToItemMap: Map<string, HydratedIndexItem>,
|
||||||
showRecentFirst: boolean // Pass sorting preference
|
showRecentFirst: boolean
|
||||||
): CombinedResult[] {
|
): Promise<CombinedResult[]> {
|
||||||
const commandResults = searchCommands(commandsFuse, query, commandIdToItemMap);
|
const startTime = performance.now();
|
||||||
const dynamicResults = searchDynamicItems(dynamicContentFuse, query, dynamicIdToItemMap, 10, showRecentFirst);
|
|
||||||
|
|
||||||
const results = [...commandResults, ...dynamicResults];
|
// Get all results first
|
||||||
|
const commandResults = searchCommands(commandsFuse, query, commandIdToItemMap);
|
||||||
|
const commandEndTime = performance.now();
|
||||||
|
const dynamicResults = searchDynamicItems(dynamicContentFuse, query, dynamicIdToItemMap, 10, showRecentFirst);
|
||||||
|
const fuseEndTime = performance.now();
|
||||||
|
|
||||||
|
// Get vector results in parallel
|
||||||
|
const vectorResults = await searchVectors(query, 10);
|
||||||
|
const vectorEndTime = performance.now();
|
||||||
|
|
||||||
|
console.log('Vector results:', vectorResults);
|
||||||
|
|
||||||
|
// Log timings
|
||||||
|
console.log(`Command search took ${commandEndTime - startTime} milliseconds`);
|
||||||
|
console.log(`Dynamic search took ${fuseEndTime - commandEndTime} milliseconds`);
|
||||||
|
console.log(`Vector search took ${vectorEndTime - fuseEndTime} milliseconds`);
|
||||||
|
|
||||||
|
// Create a map to store our final results, using ID as key to avoid duplicates
|
||||||
|
const resultMap = new Map<string, CombinedResult>();
|
||||||
|
|
||||||
|
// Add command results first (they keep their original scores)
|
||||||
|
commandResults.forEach(r => resultMap.set(r.id, r));
|
||||||
|
|
||||||
|
// Process dynamic results and vector results together
|
||||||
|
const seenIds = new Set<string>();
|
||||||
|
|
||||||
|
// Add dynamic results first
|
||||||
|
dynamicResults.forEach(r => {
|
||||||
|
seenIds.add(r.id);
|
||||||
|
const vectorMatch = vectorResults.find(v => v.object.id === r.id);
|
||||||
|
if (vectorMatch) {
|
||||||
|
// If we found it in both searches, combine the scores
|
||||||
|
resultMap.set(r.id, {
|
||||||
|
...r,
|
||||||
|
score: r.score + (vectorMatch.similarity * 0.6) // Boost exact matches
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// If only in Fuse results, keep as is
|
||||||
|
resultMap.set(r.id, r);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Now add any vector results we haven't seen yet
|
||||||
|
vectorResults.forEach(v => {
|
||||||
|
const id = v.object.id;
|
||||||
|
if (!seenIds.has(id)) {
|
||||||
|
// This is a semantic match that Fuse missed - add it with the vector similarity as score
|
||||||
|
resultMap.set(id, {
|
||||||
|
id,
|
||||||
|
type: 'dynamic' as const,
|
||||||
|
score: v.similarity * 0.9, // High base score for semantic matches
|
||||||
|
item: v.object
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convert to array and sort by score
|
||||||
|
const results = Array.from(resultMap.values());
|
||||||
results.sort((a, b) => b.score - a.score);
|
results.sort((a, b) => b.score - a.score);
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
|
|||||||
@@ -0,0 +1,86 @@
|
|||||||
|
import { getEmbedding, EmbeddingIndex, initializeModel } from 'client-vector-search';
|
||||||
|
import type { HydratedIndexItem } from './indexing/types';
|
||||||
|
import type { SearchResult } from 'client-vector-search';
|
||||||
|
|
||||||
|
let vectorIndex: EmbeddingIndex | null = null;
|
||||||
|
|
||||||
|
export async function initVectorSearch() {
|
||||||
|
try {
|
||||||
|
await initializeModel();
|
||||||
|
vectorIndex = new EmbeddingIndex([]);
|
||||||
|
// Load existing items from IndexedDB
|
||||||
|
const stored = await vectorIndex.getAllObjectsFromIndexedDB();
|
||||||
|
if (stored.length > 0) {
|
||||||
|
stored.forEach(item => vectorIndex!.add(item));
|
||||||
|
console.debug('Vector index loaded from IndexedDB');
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.debug('Creating new vector index');
|
||||||
|
vectorIndex = new EmbeddingIndex([]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function vectorizeItem(item: HydratedIndexItem): Promise<HydratedIndexItem & { embedding: number[] }> {
|
||||||
|
const textToEmbed = [
|
||||||
|
item.text,
|
||||||
|
item.content,
|
||||||
|
item.category,
|
||||||
|
item.metadata?.author,
|
||||||
|
item.metadata?.subject
|
||||||
|
].filter(Boolean).join(' ');
|
||||||
|
|
||||||
|
const embedding = await getEmbedding(textToEmbed);
|
||||||
|
return { ...item, embedding };
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function processItems(items: HydratedIndexItem[]) {
|
||||||
|
if (!vectorIndex) await initVectorSearch();
|
||||||
|
|
||||||
|
const unprocessedItems = items.filter(item => {
|
||||||
|
try {
|
||||||
|
return !vectorIndex!.get({ id: item.id });
|
||||||
|
} catch {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (unprocessedItems.length === 0) {
|
||||||
|
console.debug('No new items to vectorize');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.debug(`Vectorizing ${unprocessedItems.length} new items...`);
|
||||||
|
|
||||||
|
// Process in batches to avoid UI freeze
|
||||||
|
const BATCH_SIZE = 5;
|
||||||
|
for (let i = 0; i < unprocessedItems.length; i += BATCH_SIZE) {
|
||||||
|
const batch = unprocessedItems.slice(i, i + BATCH_SIZE);
|
||||||
|
const vectorized = await Promise.all(batch.map(vectorizeItem));
|
||||||
|
|
||||||
|
for (const item of vectorized) {
|
||||||
|
vectorIndex!.add(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save periodically to avoid losing progress
|
||||||
|
await vectorIndex!.saveIndex('indexedDB');
|
||||||
|
|
||||||
|
// Log progress
|
||||||
|
console.debug(`Vectorized ${Math.min(i + BATCH_SIZE, unprocessedItems.length)}/${unprocessedItems.length} items`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VectorSearchResult extends SearchResult {
|
||||||
|
object: HydratedIndexItem & { embedding: number[] };
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searchVectors(query: string, topK: number = 10): Promise<VectorSearchResult[]> {
|
||||||
|
if (!vectorIndex) await initVectorSearch();
|
||||||
|
|
||||||
|
const queryEmbedding = await getEmbedding(query);
|
||||||
|
const results = await vectorIndex!.search(queryEmbedding, {
|
||||||
|
topK,
|
||||||
|
useStorage: 'indexedDB'
|
||||||
|
});
|
||||||
|
|
||||||
|
return results as VectorSearchResult[];
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user