feat: safer text highlighting

This commit is contained in:
SethBurkart123
2025-05-20 20:43:16 +10:00
parent 281842ea48
commit 25623339f8
9 changed files with 109 additions and 342 deletions
@@ -6,13 +6,13 @@
import { type StaticCommandItem } from '../core/commands';
import type { CombinedResult } from '../core/types';
import { createSearchIndexes, performSearch as doSearch } from '../search/searchUtils';
import { highlightMatch, highlightSnippet, stripHtmlButKeepHighlights } from '../utils/highlight';
import Fuse from 'fuse.js';
import Calculator from './Calculator.svelte';
import { actionMap } from '../indexing/actions';
import type { IndexItem } from '../indexing/types';
import debounce from 'lodash/debounce';
import { renderComponentMap } from '../indexing/renderComponents';
import HighlightedText from '../utils/HighlightedText.svelte';
const {
transparencyEffects,
@@ -279,7 +279,7 @@
>
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{staticItem.icon}</div>
<span class="ml-4 text-lg truncate">
{@html highlightMatch(staticItem.text, searchTerm, result.matches)}
<HighlightedText text={staticItem.text} term={searchTerm} matches={result.matches} />
</span>
{#if staticItem.keybindLabel}
<div class="flex-none ml-auto">
@@ -310,7 +310,7 @@
<div class="flex items-center w-full">
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{dynamicItem.metadata?.icon || '\ue924'}</div>
<span class="ml-4 text-lg truncate">
{@html stripHtmlButKeepHighlights(highlightMatch(dynamicItem.text, searchTerm, result.matches))}
<HighlightedText text={dynamicItem.text} term={searchTerm} matches={result.matches} />
</span>
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
{dynamicItem.category}
@@ -318,7 +318,7 @@
</div>
{#if dynamicItem.content}
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
{@html stripHtmlButKeepHighlights(highlightSnippet(dynamicItem.content, searchTerm, result.matches))}
<HighlightedText text={dynamicItem.content} term={searchTerm} matches={result.matches} />
</div>
{/if}
</button>
@@ -1,5 +1,5 @@
<script lang="ts">
import { highlightMatch, highlightSnippet, stripHtmlButKeepHighlights } from '../../utils/highlight';
import HighlightedText from '../../utils/HighlightedText.svelte';
import type { DynamicContentItem } from '../../utils/dynamicItems';
import type { FuseResultMatch } from '../../core/types';
@@ -20,15 +20,15 @@
<div class="flex items-center w-full">
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{item.metadata?.icon || '\uebee'}</div>
<span class="ml-4 text-lg truncate">
{@html stripHtmlButKeepHighlights(highlightMatch(item.text, searchTerm, matches))}
<HighlightedText text={item.text} term={searchTerm} matches={matches} />
</span>
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
{item.category}
{item.category}
</span>
</div>
{#if item.content}
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
{@html stripHtmlButKeepHighlights(highlightSnippet(item.content, searchTerm, matches))}
<HighlightedText text={item.content} term={searchTerm} matches={matches} />
</div>
{/if}
</button>
@@ -1,5 +1,5 @@
<script lang="ts">
import { highlightMatch, stripHtmlButKeepHighlights } from '../../utils/highlight';
import HighlightedText from '../../utils/HighlightedText.svelte';
import type { DynamicContentItem } from '../../utils/dynamicItems';
import type { FuseResultMatch } from '../../core/types';
@@ -20,7 +20,7 @@
<div class="flex items-center w-full">
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">{item.metadata?.icon || '\uebe7'}</div>
<span class="ml-4 text-lg truncate">
{@html stripHtmlButKeepHighlights(highlightMatch(item.text, searchTerm, matches))}
<HighlightedText text={item.text} term={searchTerm} matches={matches} />
</span>
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
{item.category}
@@ -1,6 +1,6 @@
<script lang="ts">
import { highlightMatch, stripHtmlButKeepHighlights } from '../../utils/highlight';
import type { IndexItem } from '../../indexing/types';
import HighlightedText from '../../utils/HighlightedText.svelte';
import type { IndexItem } from '../../indexing/types';
import type { FuseResultMatch } from '../../core/types';
export let item: IndexItem;
@@ -9,47 +9,20 @@
export let matches: readonly FuseResultMatch[] | undefined;
export let onclick: (() => void) | undefined;
function handleClick() {
console.log('Subject item clicked', item.metadata);
const { type, subjectId, programme } = item.metadata;
let url = '';
if (type === 'assessments') {
if (programme && subjectId) {
url = `/#?page=/assessments/${programme}:${subjectId}`;
}
} else {
if (programme && subjectId) {
url = `/#?page=/courses/${programme}:${subjectId}`;
}
}
console.log('Navigating to:', url, { type, subjectId, programme });
if (url) {
try {
window.location.assign(url);
// Fallback in case assign is blocked
setTimeout(() => {
if (window.location.hash !== url.replace(/^.*#/, '')) {
window.location.href = url;
}
}, 200);
} catch (e) {
window.location.href = url;
}
}
}
</script>
<button
class="w-full flex flex-col px-2 py-1.5 rounded-lg select-none cursor-pointer group transition-colors duration-100
{isSelected ? 'bg-zinc-900/5 dark:bg-white/10 text-zinc-900 dark:text-white' : 'hover:bg-zinc-500/5 dark:hover:bg-white/5 text-zinc-800 dark:text-zinc-200'}"
onclick={() => { handleClick(); if (typeof onclick === 'function') onclick(); }}
onclick={onclick}
>
<div class="flex items-center w-full">
<div class="flex-none w-8 h-8 text-xl font-IconFamily flex items-center justify-center {isSelected ? 'text-zinc-900 dark:text-white' : 'text-zinc-600 dark:text-zinc-400'}">
{item.metadata?.type === 'assessments' ? '\ueac3' : '\ueb4d'}
</div>
<span class="ml-4 text-lg truncate">
{@html stripHtmlButKeepHighlights(highlightMatch(item.text, searchTerm, matches))}
<HighlightedText text={item.text} term={searchTerm} matches={matches} />
</span>
<span class="flex-none ml-auto text-xs text-zinc-500 dark:text-zinc-400">
{item.metadata?.subjectCode}
@@ -57,7 +30,7 @@
</div>
{#if item.content}
<div class="mt-1 ml-12 text-sm text-zinc-600 dark:text-zinc-400 line-clamp-2 text-start">
{@html stripHtmlButKeepHighlights(highlightMatch(item.content, searchTerm, matches))}
<HighlightedText text={item.content} term={searchTerm} matches={matches} />
</div>
{/if}
</button>
@@ -28,11 +28,10 @@ export const subjectsJob: Job = {
// Boost for active subjects
if (item.metadata?.isActive) {
score += 15; // Boost for active subjects
console.log("active subject:", item.metadata.subjectName);
} else {
console.log("inactive subject:", item.metadata.subjectName);
}
// Boost for year level
const yearLevel = item.metadata?.yearLevel || 0;
score += yearLevel;
return score;
},
@@ -81,9 +80,7 @@ export const subjectsJob: Job = {
const id = `${semester.code}-${subject.code}-${subject.metaclass}`;
if (existingIds.has(id)) continue;
// Extract year level from subject code (assuming format like "YEAR10" or "10ENG")
const yearLevel = subject.code.match(/^YEAR(\d+)|^(\d+)/i)?.[1] || subject.code.match(/^(\d+)/)?.[1] || 0;
const isActive = subject.active === 1;
const isActive = semester.active === 1;
// Create two items for each subject - one for assessments and one for course
items.push(
@@ -101,7 +98,6 @@ export const subjectsJob: Job = {
semesterCode: semester.code,
semesterDescription: semester.description,
type: "assessments",
yearLevel: yearLevel ? Number(yearLevel) : 0,
isActive
},
actionId: "subjectassessment",
@@ -121,7 +117,6 @@ export const subjectsJob: Job = {
semesterCode: semester.code,
semesterDescription: semester.description,
type: "course",
yearLevel: yearLevel ? Number(yearLevel) : 0,
isActive
},
actionId: "subjectcourse",
@@ -2,15 +2,10 @@ import type { SvelteComponent } from "svelte";
import AssessmentItem from "../components/items/AssessmentItem.svelte";
import ForumItem from "../components/items/ForumItem.svelte";
import SubjectItem from "../components/items/SubjectItem.svelte";
import type { IndexItem } from "./types";
import { highlightMatch } from "../utils/highlight";
// import other components as needed
export const renderComponentMap: Record<string, typeof SvelteComponent> = {
assessment: AssessmentItem as unknown as typeof SvelteComponent,
message: AssessmentItem as unknown as typeof SvelteComponent,
forum: ForumItem as unknown as typeof SvelteComponent,
subject: SubjectItem as unknown as typeof SvelteComponent,
// subject: SubjectComponent,
// etc...
};
@@ -25,10 +25,6 @@ export function createSearchIndexes() {
{ name: "text", weight: 2 },
{ name: "content", weight: 1 },
{ name: "category", weight: 1 },
{ name: "metadata.subjectName", weight: 3 },
{ name: "metadata.subjectCode", weight: 2.5 },
{ name: "metadata.semesterDescription", weight: 1 },
{ name: "metadata.yearLevel", weight: 1.5 }
],
includeScore: true,
includeMatches: true,
@@ -117,12 +113,6 @@ export function searchDynamicItems(
let score = fuseScore;
// apply boost criteria if it exists
const boost = jobs[item.category].boostCriteria?.(item, query);
if (boost) {
score += boost;
}
const ageInDays = (now - item.dateAdded) / (1000 * 60 * 60 * 24);
const recencyBoost = sortByRecent ? 1 / (ageInDays + 1) : 0;
score += recencyBoost;
@@ -196,10 +186,18 @@ export async function performSearch(
if (!seenIds.has(id)) {
// This is a semantic match that Fuse missed - add it with the vector similarity as score
let score = v.similarity * 0.5; // High base score for semantic matches
const job = jobs[v.object.category];
if (job && typeof job.boostCriteria === 'function') {
const boost = job.boostCriteria(v.object, query);
if (boost) {
score += boost;
}
}
resultMap.set(id, {
id,
type: "dynamic" as const,
score: v.similarity * 0.9, // High base score for semantic matches
score,
item: v.object,
});
}
@@ -0,0 +1,80 @@
<script lang="ts">
import type { FuseResultMatch } from '../../core/types';
const { text, term, matches } = $props<{
text: string;
term: string;
matches?: readonly FuseResultMatch[];
}>();
const segments = $derived(getSegments(text, term, matches));
// Build highlight map (copied and adapted from highlightMatch)
function getSegments(text: string, term: string, matches?: readonly FuseResultMatch[]) {
if (!term.trim() || !matches || matches.length === 0) return [{ text, highlight: false }];
try {
const fieldMatches = matches.find(
(match) =>
match.key === 'text' ||
(match.key === 'allContent' && match.value?.includes(text)),
);
if (!fieldMatches || !fieldMatches.indices || fieldMatches.indices.length === 0) {
return [{ text, highlight: false }];
}
const highlightMap = new Array(text.length).fill(false);
fieldMatches.indices.forEach((indices) => {
const start = indices[0];
const end = indices[1];
if (fieldMatches.key === 'allContent') {
const allContent = fieldMatches.value;
const textPos = allContent?.indexOf(text) ?? -1;
if (textPos >= 0) {
const relStart = start - textPos;
const relEnd = end - textPos;
if (relEnd >= 0 && relStart < text.length) {
for (let i = Math.max(0, relStart); i <= Math.min(text.length - 1, relEnd); i++) {
highlightMap[i] = true;
}
}
}
} else {
if (start >= 0 && end < text.length) {
for (let i = start; i <= end; i++) {
highlightMap[i] = true;
}
}
}
});
// Build segments
const segments: { text: string; highlight: boolean }[] = [];
let current = '';
let currentHighlight = highlightMap[0] || false;
for (let i = 0; i < text.length; i++) {
const isHighlight = highlightMap[i] || false;
if (isHighlight !== currentHighlight) {
segments.push({ text: current, highlight: currentHighlight });
current = '';
currentHighlight = isHighlight;
}
current += text[i];
}
if (current) {
segments.push({ text: current, highlight: currentHighlight });
}
return segments;
} catch (e) {
return [{ text, highlight: false }];
}
}
</script>
<span>
{#each segments as segment}
{#if segment.highlight}
<span class="highlight">{segment.text}</span>
{:else}
{segment.text}
{/if}
{/each}
</span>
@@ -1,274 +0,0 @@
import type { FuseResultMatch, MatchIndices } from "../core/types";
/**
* Simple utility to remove HTML tags from a string.
*/
export function stripHtmlTags(html: string): string {
if (!html) return "";
return html.replace(/<[^>]*>/g, "").replace("\n", " ");
}
/**
* Removes HTML tags from a string, but preserves <span class="highlight"> tags.
*/
export function stripHtmlButKeepHighlights(html: string): string {
if (!html) return "";
// Use a placeholder for highlight tags, strip others, then restore placeholders.
const highlightOpenPlaceholder = "__HIGHLIGHT_OPEN__";
const highlightClosePlaceholder = "__HIGHLIGHT_CLOSE__";
let processed = html.replace(
/<span class="highlight">/g,
highlightOpenPlaceholder,
);
processed = processed.replace(/<\/span>/g, (match, offset, fullString) => {
// Only replace </span> if it likely corresponds to our highlight span
// This is imperfect but helps avoid replacing unrelated spans.
// Look backwards for the nearest opening placeholder.
const lastPlaceholder = fullString.lastIndexOf(
highlightOpenPlaceholder,
offset,
);
if (lastPlaceholder !== -1) {
// Check if there's another opening tag between the placeholder and the closing span
const interveningContent = fullString.substring(
lastPlaceholder + highlightOpenPlaceholder.length,
offset,
);
if (!/<span/i.test(interveningContent)) {
return highlightClosePlaceholder;
}
}
return match; // Keep the original </span> if unsure
});
// Strip all remaining HTML tags
processed = processed.replace(/<[^>]*>/g, "");
// Restore the highlight tags
processed = processed.replace(
new RegExp(highlightOpenPlaceholder, "g"),
'<span class="highlight">',
);
processed = processed.replace(
new RegExp(highlightClosePlaceholder, "g"),
"</span>",
);
return processed;
}
export function highlightMatch(
text: string,
term: string,
matches?: readonly FuseResultMatch[],
): string {
if (!term.trim() || !matches || matches.length === 0) return text;
try {
// Find matches for the text field or allContent that contains the text
const fieldMatches = matches.find(
(match) =>
match.key === "text" ||
(match.key === "allContent" && match.value?.includes(text)),
);
if (
!fieldMatches ||
!fieldMatches.indices ||
fieldMatches.indices.length === 0
) {
return text;
}
// Create a map of character positions to mark which ones need highlighting
const highlightMap = new Array(text.length).fill(false);
fieldMatches.indices.forEach((indices: MatchIndices) => {
const start = indices[0];
const end = indices[1];
if (fieldMatches.key === "allContent") {
// Find where our text appears in the allContent
const allContent = fieldMatches.value;
const textPos = allContent?.indexOf(text) ?? -1;
// Only highlight if the match overlaps with our text
if (textPos >= 0) {
// Adjust start and end to be relative to our text field
const relStart = start - textPos;
const relEnd = end - textPos;
// Only highlight if the match actually overlaps with our text field
if (relEnd >= 0 && relStart < text.length) {
// Mark the overlapping characters
for (
let i = Math.max(0, relStart);
i <= Math.min(text.length - 1, relEnd);
i++
) {
highlightMap[i] = true;
}
}
}
} else {
// Regular text field match - ensure indices are within bounds
if (start >= 0 && end < text.length) {
for (let i = start; i <= end; i++) {
highlightMap[i] = true;
}
}
}
});
let result = "";
let inHighlight = false;
for (let i = 0; i < text.length; i++) {
if (highlightMap[i] && !inHighlight) {
result += '<span class="highlight">';
inHighlight = true;
} else if (!highlightMap[i] && inHighlight) {
result += "</span>";
inHighlight = false;
}
result += text.charAt(i);
}
if (inHighlight) {
result += "</span>";
}
return result;
} catch (e) {
console.error("Error highlighting match:", e);
return text;
}
}
// Function to extract and highlight content snippet using Fuse matches
export function highlightSnippet(
content: string,
term: string,
matches?: readonly FuseResultMatch[],
): string {
if (!content || !term.trim() || !matches || matches.length === 0)
return content;
try {
// Find matches for content field or allContent that contains the content
const contentMatches = matches.find(
(match) =>
match.key === "content" ||
(match.key === "allContent" && match.value?.includes(content)),
);
if (
!contentMatches ||
!contentMatches.indices ||
contentMatches.indices.length === 0
) {
// No content matches, return plain content
return content.length > 100 ? content.substring(0, 100) + "..." : content;
}
// Find the match indices
let allIndices: MatchIndices[] = contentMatches.indices as MatchIndices[];
// If matching against allContent, adjust indices to be relative to content
if (contentMatches.key === "allContent") {
const allContent = contentMatches.value;
const contentPos = allContent?.indexOf(content) ?? -1;
if (contentPos >= 0) {
// Adjust indices to be relative to the content field
allIndices = allIndices
.map(
(indices) =>
[
indices[0] - contentPos,
indices[1] - contentPos,
] as MatchIndices,
)
.filter((indices) => indices[1] >= 0 && indices[0] < content.length);
}
}
if (allIndices.length === 0) {
return content.length > 100 ? content.substring(0, 100) + "..." : content;
}
// Find a good center point for our snippet (average of first match)
const firstMatch = allIndices[0];
const matchCenter = Math.floor((firstMatch[0] + firstMatch[1]) / 2);
// Extract a window around the match
const windowSize = 100;
const start = Math.max(0, matchCenter - windowSize / 2);
const end = Math.min(content.length, matchCenter + windowSize / 2);
// Create the basic snippet
let snippet = content.substring(start, end);
if (start > 0) snippet = "..." + snippet;
if (end < content.length) snippet += "...";
// Create a highlighting map for the snippet
const snippetLength = snippet.length;
const highlightMap = new Array(snippetLength).fill(false);
// Calculate offset for the highlighting
const startOffset = start > 0 ? start - 3 : start; // Account for '...' if present
// Mark each matched character in the snippet
allIndices.forEach((indices: MatchIndices) => {
const matchStart = indices[0];
const matchEnd = indices[1];
// Skip matches outside our snippet window
if (matchEnd < start || matchStart > end) return;
// Adjust match indices to be relative to snippet
const snippetMatchStart = Math.max(0, matchStart - startOffset);
const snippetMatchEnd = Math.min(
snippetLength - 1,
matchEnd - startOffset,
);
// Mark characters for highlighting
for (let i = snippetMatchStart; i <= snippetMatchEnd; i++) {
if (i >= 0 && i < snippetLength) {
highlightMap[i] = true;
}
}
});
// Build the highlighted snippet
let result = "";
let inHighlight = false;
for (let i = 0; i < snippetLength; i++) {
// If highlighting state changes, add appropriate tags
if (highlightMap[i] && !inHighlight) {
result += '<span class="highlight">';
inHighlight = true;
} else if (!highlightMap[i] && inHighlight) {
result += "</span>";
inHighlight = false;
}
// Add the current character
result += snippet.charAt(i);
}
// Close highlight tag if we're still in one at the end
if (inHighlight) {
result += "</span>";
}
return result;
} catch (e) {
console.error("Error highlighting snippet:", e);
return content.length > 100 ? content.substring(0, 100) + "..." : content;
}
}