From 2e7d705b90de5b6a5fdd14727adbaad97c3d420a Mon Sep 17 00:00:00 2001 From: noatdk Date: Wed, 3 Jun 2026 22:26:44 +0700 Subject: [PATCH 1/9] =?UTF-8?q?feat:=20Allow=20look=20up=20of=20fuseji=20(?= =?UTF-8?q?eg.=20=E3=83=9E=E2=97=8B=E3=83=89=E3=83=8A=E2=97=8B=E3=83=89)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ext/data/schemas/options-schema.json | 12 +- ext/js/background/backend.js | 4 + ext/js/data/options-util.js | 15 ++ ext/js/language/translator.js | 171 +++++++++++++++++- ext/settings.html | 18 ++ test/data/database-test-cases.json | 6 +- .../valid-dictionary1/term_bank_1.json | 6 +- test/fuseji.test.js | 135 ++++++++++++++ test/options-util.test.js | 6 +- test/utilities/translator.js | 4 + types/ext/settings.d.ts | 2 + types/ext/translation.d.ts | 8 + types/test/translator.d.ts | 2 + 13 files changed, 382 insertions(+), 7 deletions(-) create mode 100644 test/fuseji.test.js diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index 0360cdef61..be9dcdaaec 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -855,7 +855,9 @@ "type": "object", "required": [ "textReplacements", - "searchResolution" + "searchResolution", + "enableFusejiLookup", + "fusejiTriggers" ], "properties": { "searchResolution": { @@ -866,6 +868,14 @@ ], "default": "letter" }, + "enableFusejiLookup": { + "type": "boolean", + "default": false + }, + "fusejiTriggers": { + "type": "string", + "default": "◯○〇●" + }, "textReplacements": { "type": "object", "required": [ diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 940fc09e48..e4846f0b02 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -2720,6 +2720,8 @@ export class Backend { translation: { textReplacements: textReplacementsOptions, searchResolution, + enableFusejiLookup, + fusejiTriggers, }, } = options; const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions); @@ -2745,6 +2747,8 @@ export class Backend { removeNonJapaneseCharacters: !alphanumeric, searchResolution, textReplacements, + enableFusejiLookup, + fusejiTriggers, enabledDictionaryMap, excludeDictionaryDefinitions, language, diff --git a/ext/js/data/options-util.js b/ext/js/data/options-util.js index f36bfee17b..644022abd9 100644 --- a/ext/js/data/options-util.js +++ b/ext/js/data/options-util.js @@ -362,6 +362,8 @@ export class OptionsUtil { convertHiraganaToKatakana: 'false', convertKatakanaToHiragana: 'variant', collapseEmphaticSequences: 'false', + enableFusejiLookup: false, + fusejiTriggers: '◯○〇●', }, dictionaries: {}, @@ -588,6 +590,7 @@ export class OptionsUtil { this._updateVersion74, this._updateVersion75, this._updateVersion76, + this._updateVersion77, ]; /* eslint-enable @typescript-eslint/unbound-method */ if (typeof targetVersion === 'number' && targetVersion < result.length) { @@ -1851,6 +1854,18 @@ export class OptionsUtil { } } + /** + * - Added translation.enableFusejiLookup. + * - Added translation.fusejiTriggers. + * @type {import('options-util').UpdateFunction} + */ + async _updateVersion77(options) { + for (const profile of options.profiles) { + profile.options.translation.enableFusejiLookup = false; + profile.options.translation.fusejiTriggers = '◯○〇●'; + } + } + /** * @param {string} url * @returns {Promise} diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 9cf0c2af2f..ab09b58bc6 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -26,6 +26,10 @@ import {MultiLanguageTransformer} from './multi-language-transformer.js'; import {MAX_PROCESS_VARIANTS} from './text-processors.js'; import {isCodePointChinese} from './zh/chinese.js'; +// For suffix lookup case (e.g. ○○ルド), each candidate suffix length is one database lookup, +// This caps those lookups to a plausible length. +const FUSEJI_MAX_SUFFIX_LOOKUP_LENGTH = 8; + /** * Class which finds term and kanji dictionary entries for text. */ @@ -86,8 +90,21 @@ export class Translator { async findTerms(mode, text, options) { safePerformance.mark('translator:findTerms:start'); const {enabledDictionaryMap, excludeDictionaryDefinitions, sortFrequencyDictionary, sortFrequencyDictionaryOrder, language, primaryReading, useAllFrequencyDictionaries} = options; + const fusejiTriggerSet = options.enableFusejiLookup ? new Set(options.fusejiTriggers ?? '') : null; const tagAggregator = new TranslatorTagAggregator(); - let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, options, tagAggregator, primaryReading); + /** @type {import('translation-internal').TermDictionaryEntry[]} */ + let dictionaryEntries = []; + let originalTextLength = 0; + if (fusejiTriggerSet !== null && fusejiTriggerSet.size > 0) { + const fusejiDetails = this._getFusejiTriggerDetails(text, fusejiTriggerSet); + if (fusejiDetails.firstTriggerIndex >= 0) { + ({dictionaryEntries, originalTextLength} = await this._findFusejiTerms(text, options, fusejiDetails, fusejiTriggerSet, tagAggregator, primaryReading)); + } + } + + if (dictionaryEntries.length === 0) { + ({dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, options, tagAggregator, primaryReading)); + } switch (mode) { case 'group': @@ -253,6 +270,158 @@ export class Translator { return this._getDictionaryEntries(deinflections, enabledDictionaryMap, tagAggregator, primaryReading); } + /** + * Splits the text into characters and locates the positions of the first and last + * fuseji (mask) characters, which bound the region that needs wildcard matching. + * @param {string} text + * @param {Set} triggerSet + * @returns {{characters: string[], firstTriggerIndex: number, lastTriggerIndex: number}} -1 for indexes if not present. + */ + _getFusejiTriggerDetails(text, triggerSet) { + const characters = [...text]; + let firstTriggerIndex = -1; + let lastTriggerIndex = -1; + for (let i = 0; i < characters.length; ++i) { + if (!triggerSet.has(characters[i])) { continue; } + if (firstTriggerIndex < 0) { + firstTriggerIndex = i; + } + lastTriggerIndex = i; + } + return {characters, firstTriggerIndex, lastTriggerIndex}; + } + + /** + * Finds dictionary entries for masked text (伏せ字), where some characters are replaced by trigger + * symbols such as `◯`. The unmasked text around the triggers is used to query the database via a + * prefix lookup (when text precedes the first trigger) or a series of suffix lookups (when the text + * begins with a trigger), and the candidates are then filtered against the full masked pattern. + * @param {string} text + * @param {import('translation').FindTermsOptions} options + * @param {{characters: string[], firstTriggerIndex: number, lastTriggerIndex: number}} fusejiDetails The trigger information for `text`, as produced by {@link Translator._getFusejiTriggerDetails}. + * @param {Set} triggerSet The set of characters treated as single-character wildcards. + * @param {TranslatorTagAggregator} tagAggregator + * @param {string} primaryReading + * @returns {Promise<{dictionaryEntries: import('translation-internal').TermDictionaryEntry[], originalTextLength: number}>} + */ + async _findFusejiTerms(text, options, fusejiDetails, triggerSet, tagAggregator, primaryReading) { + const {prefixLookupText, suffixLookupText} = this._getFusejiLookupTexts(fusejiDetails); + if (prefixLookupText.length === 0 && suffixLookupText.length === 0) { + return {dictionaryEntries: [], originalTextLength: 0}; + } + + /** @type {import('translation-internal').TermDictionaryEntry[]} */ + let dictionaryEntries = []; + if (prefixLookupText.length > 0) { + const lookupOptions = /** @type {import('translation').FindTermsOptions} */ ({...options, matchType: 'prefix', deinflect: false}); + ({dictionaryEntries} = await this._findTermsInternal(prefixLookupText, lookupOptions, tagAggregator, primaryReading)); + } else { + // fallback to normal lookup + const lookupOptions = /** @type {import('translation').FindTermsOptions} */ ({...options, matchType: 'suffix', deinflect: false}); + for (const lookupText of this._getFusejiSuffixLookupTexts(suffixLookupText)) { + ({dictionaryEntries} = await this._findTermsInternal(lookupText, lookupOptions, tagAggregator, primaryReading)); + if (dictionaryEntries.length > 0) { break; } + } + } + return this._filterFusejiDictionaryEntries(dictionaryEntries, text, triggerSet); + } + + /** + * Extracts the everything before the first trigger character + * and everything after the last one. These anchors drive the database lookup. + * @param {{characters: string[], firstTriggerIndex: number, lastTriggerIndex: number}} fusejiDetails The trigger information, as produced by {@link Translator._getFusejiTriggerDetails}. + * @returns {{prefixLookupText: string, suffixLookupText: string}} + */ + _getFusejiLookupTexts(fusejiDetails) { + const {characters, firstTriggerIndex, lastTriggerIndex} = fusejiDetails; + if (firstTriggerIndex < 0) { + return {prefixLookupText: '', suffixLookupText: ''}; + } + + const prefixLookupText = characters.slice(0, firstTriggerIndex).join(''); + const suffixLookupText = characters.slice(lastTriggerIndex + 1).join(''); + return {prefixLookupText, suffixLookupText}; + } + + /** + * Generates the candidate query strings for suffix lookup where only the + * trailing unmasked text is known (e.g. `◯◯ルド`). This returns progressively + * shorter prefixes of the unmasked suffix (longest first, capped at {@link FUSEJI_MAX_SUFFIX_LOOKUP_LENGTH}) + * to be tried in turn as database suffix lookups. + * @param {string} suffixLookupText The unmasked text following the last trigger character. + * @returns {string[]} + */ + _getFusejiSuffixLookupTexts(suffixLookupText) { + const suffixChars = [...suffixLookupText]; + const maxLength = Math.min(suffixChars.length, FUSEJI_MAX_SUFFIX_LOOKUP_LENGTH); + /** @type {string[]} */ + const results = []; + for (let i = maxLength; i > 0; --i) { + results.push(suffixChars.slice(0, i).join('')); + } + return results; + } + + /** + * Narrows a set of candidate entries down to those + * whose term or reading actually fits the masked pattern, treating trigger characters as wildcards. + * @param {import('translation-internal').TermDictionaryEntry[]} dictionaryEntries + * @param {string} patternText The original masked text, e.g. `マ◯ド◯ルド`. + * @param {Set} triggerSet + * @returns {{dictionaryEntries: import('translation-internal').TermDictionaryEntry[], originalTextLength: number}} + */ + _filterFusejiDictionaryEntries(dictionaryEntries, patternText, triggerSet) { + const matcher = this._createFusejiPatternMatcher(patternText, triggerSet); + let originalTextLength = 0; + /** @type {import('translation-internal').TermDictionaryEntry[]} */ + const filteredDictionaryEntries = []; + for (const dictionaryEntry of dictionaryEntries) { + for (const headword of dictionaryEntry.headwords) { + const candidates = headword.reading.length === 0 ? [headword.term] : [headword.term, headword.reading]; + const matchLength = Math.max(...candidates.map(matcher)); + if (matchLength > 0) { + originalTextLength = Math.max(originalTextLength, matchLength); + filteredDictionaryEntries.push({...dictionaryEntry, maxOriginalTextLength: Math.max(dictionaryEntry.maxOriginalTextLength, matchLength)}); + break; + } + } + } + return {dictionaryEntries: filteredDictionaryEntries, originalTextLength}; + } + + /** + * Builds a matcher function that tests a candidate string against the masked pattern. + * Candidates shorter than the pattern are allowed to match a leading portion of it (partial match). + * @param {string} patternText The masked text to match against, e.g. `マ◯ド◯ルド`. + * @param {Set} triggerSet + * @returns {(text: string) => number} A function returning the matched length in code units, or `0` if the candidate does not fit the pattern. + */ + _createFusejiPatternMatcher(patternText, triggerSet) { + const patternChars = [...patternText]; + const patternTextLengths = [0]; + for (const character of patternChars) { + patternTextLengths.push(patternTextLengths[patternTextLengths.length - 1] + character.length); + } + return (text) => { + const textChars = [...text]; + if (textChars.length > patternChars.length) { + return 0; + } + for (let i = 0; i < patternChars.length; ++i) { + if (i >= textChars.length) { + return patternTextLengths[textChars.length]; + } + if (triggerSet.has(patternChars[i])) { + continue; + } + if (patternChars[i] !== textChars[i]) { + return 0; + } + } + return patternTextLengths[textChars.length]; + }; + } + /** * @param {import('translation-internal').DatabaseDeinflection[]} deinflections * @param {import('translation').TermEnabledDictionaryMap} enabledDictionaryMap diff --git a/ext/settings.html b/ext/settings.html index 724f15946c..9f9d77af50 100644 --- a/ext/settings.html +++ b/ext/settings.html @@ -1796,6 +1796,24 @@

Yomitan Settings

+
+
+
Enable fuseji lookup
+
Look up masked words (伏せ字) containing trigger characters, such as マ◯ド◯ルド.
+
+
+ +
+
+
+
+
Fuseji trigger characters
+
Characters that act as one-character wildcards during fuseji lookup.
+
+
+ +
+
Configure custom text replacement patterns…
diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 3d7bdc90a7..0b3471bf05 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -29,7 +29,7 @@ "ipa": 1 }, "terms": { - "total": 38 + "total": 42 } } }, @@ -38,7 +38,7 @@ { "kanji": 2, "kanjiMeta": 6, - "terms": 38, + "terms": 42, "termMeta": 40, "tagMeta": 15, "media": 6 @@ -47,7 +47,7 @@ "total": { "kanji": 2, "kanjiMeta": 6, - "terms": 38, + "terms": 42, "termMeta": 40, "tagMeta": 15, "media": 6 diff --git a/test/data/dictionaries/valid-dictionary1/term_bank_1.json b/test/data/dictionaries/valid-dictionary1/term_bank_1.json index f8746c6997..0a5c4b18d8 100644 --- a/test/data/dictionaries/valid-dictionary1/term_bank_1.json +++ b/test/data/dictionaries/valid-dictionary1/term_bank_1.json @@ -352,5 +352,9 @@ ["oppidum", "", "n", "n2s", 5, ["oppidum definition - town"], 26, ""], ["oppido", "oppidō", "adv", "adv", 5, ["oppido definition - very"], 27, ""], ["oppido", "", "", "", 1, [["oppidum", ["ablative"]]], 28, ""], - ["oppidorum", "", "", "", 1, [["oppidum", ["genitive plural"]]], 29, ""] + ["oppidorum", "", "", "", 1, [["oppidum", ["genitive plural"]]], 29, ""], + ["マ", "マ", "n", "n", 1, ["ma definition"], 200, ""], + ["マギ", "マギ", "n", "n", 1, ["magi definition"], 202, ""], + ["マキマ", "マキマ", "n", "n", 1, ["makima definition"], 203, ""], + ["マクドナルド", "マクドナルド", "n", "n", 1, ["makudonarudo definition"], 201, ""] ] diff --git a/test/fuseji.test.js b/test/fuseji.test.js new file mode 100644 index 0000000000..74f2b2d7f2 --- /dev/null +++ b/test/fuseji.test.js @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2023-2026 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {readFileSync} from 'fs'; +import {fileURLToPath} from 'node:url'; +import path from 'path'; +import {describe, expect} from 'vitest'; +import {parseJson} from '../dev/json.js'; +import {createTranslatorTest} from './fixtures/translator-test.js'; +import {setupStubs} from './utilities/database.js'; +import {createFindTermsOptions} from './utilities/translator.js'; + +setupStubs(); + +const dirname = path.dirname(fileURLToPath(import.meta.url)); +const dictionaryName = 'Test Dictionary 2'; +const translatorTest = await createTranslatorTest(void 0, path.join(dirname, 'data/dictionaries/valid-dictionary1'), dictionaryName); +/** @type {import('test/translator').TranslatorTestInputs} */ +const {optionsPresets} = parseJson(readFileSync(path.join(dirname, 'data/translator-test-inputs.json'), {encoding: 'utf8'})); + +describe('Fuseji lookup', () => { + translatorTest('does not find fuseji terms when disabled', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + + const {dictionaryEntries} = await translator.findTerms('split', '打〇込む', options); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(false); + }); + + translatorTest('finds terms using custom fuseji trigger characters', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + options.fusejiTriggers = '〇●'; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', '打〇込む', options); + expect(originalTextLength).toBe(4); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); + }); + + translatorTest('does not treat circle variants as triggers unless configured', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + options.fusejiTriggers = '●'; + + const {dictionaryEntries} = await translator.findTerms('split', '打〇込む', options); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(false); + }); + + translatorTest('falls back to a normal lookup for an unmasked word preceding a later trigger', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', '打ち込むマ○ド', options); + expect(originalTextLength).toBe(4); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); + }); + + translatorTest('finds fuseji terms at the start of scanned sentence text', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', '打〇込むという', options); + expect(originalTextLength).toBe(4); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); + }); + + translatorTest('finds multi-mask katakana terms', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', 'マ○ド○ルド', options); + expect(originalTextLength).toBe(6); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === 'マクドナルド'))).toBe(true); + }); + + translatorTest('finds multi-mask katakana terms at the start of scanned sentence text', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', 'マ○ド○ルドに行きたい', options); + expect(originalTextLength).toBe(6); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === 'マクドナルド'))).toBe(true); + }); + + translatorTest('sorts fuller fuseji matches before shorter partial matches', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', 'マ○ドナ○ドに行きたい', options); + expect(originalTextLength).toBe(6); + expect(dictionaryEntries.length).toBeGreaterThan(1); + expect(dictionaryEntries[0].headwords.some(({term}) => term === 'マクドナルド')).toBe(true); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === 'マギ'))).toBe(true); + }); + + translatorTest('finds terms with leading fuseji triggers', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', '〇ち込む', options); + expect(originalTextLength).toBe(4); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); + }); + + translatorTest('finds leading-trigger fuseji terms at the start of scanned sentence text', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', '〇ち込むという', options); + expect(originalTextLength).toBe(4); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); + }); + + translatorTest('finds terms with trailing fuseji triggers', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', '打ち込〇', options); + expect(originalTextLength).toBe(4); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); + }); +}); diff --git a/test/options-util.test.js b/test/options-util.test.js index 8febf0fc4a..9a96d4c292 100644 --- a/test/options-util.test.js +++ b/test/options-util.test.js @@ -106,6 +106,8 @@ function createProfileOptionsTestData1() { convertHiraganaToKatakana: 'false', convertKatakanaToHiragana: 'variant', collapseEmphaticSequences: 'false', + enableFusejiLookup: false, + fusejiTriggers: '◯○〇●', }, dictionaries: { 'Test Dictionary 1': { @@ -467,6 +469,8 @@ function createProfileOptionsUpdatedTestData1() { }, translation: { searchResolution: 'letter', + enableFusejiLookup: false, + fusejiTriggers: '◯○〇●', textReplacements: { searchOriginal: true, groups: [], @@ -707,7 +711,7 @@ function createOptionsUpdatedTestData1() { }, ], profileCurrent: 0, - version: 76, + version: 77, global: { database: { prefixWildcardsSupported: false, diff --git a/test/utilities/translator.js b/test/utilities/translator.js index bd69aafb1c..fb6f7410a6 100644 --- a/test/utilities/translator.js +++ b/test/utilities/translator.js @@ -127,6 +127,8 @@ export function createFindTermsOptions(dictionaryName, optionsPresets, optionsAr primaryReading, excludeDictionaryDefinitions, searchResolution, + enableFusejiLookup, + fusejiTriggers, language, } = preset; @@ -142,6 +144,8 @@ export function createFindTermsOptions(dictionaryName, optionsPresets, optionsAr enabledDictionaryMap, excludeDictionaryDefinitions: Array.isArray(excludeDictionaryDefinitions) ? new Set(excludeDictionaryDefinitions) : null, searchResolution: typeof searchResolution !== 'undefined' ? searchResolution : 'letter', + enableFusejiLookup: typeof enableFusejiLookup !== 'undefined' ? enableFusejiLookup : false, + fusejiTriggers: typeof fusejiTriggers !== 'undefined' ? fusejiTriggers : '◯○〇●', language: typeof language !== 'undefined' ? language : 'ja', useAllFrequencyDictionaries: false, }; diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts index 924875a7cc..4d85aac1cc 100644 --- a/types/ext/settings.d.ts +++ b/types/ext/settings.d.ts @@ -261,6 +261,8 @@ export type TranslationOptions = { collapseEmphaticSequences: TranslationCollapseEmphaticSequences; textReplacements: TranslationTextReplacementOptions; searchResolution: SearchResolution; + enableFusejiLookup: boolean; + fusejiTriggers: string; }; export type SearchResolution = 'letter' | 'word'; diff --git a/types/ext/translation.d.ts b/types/ext/translation.d.ts index 8617e714b4..a0918cbe64 100644 --- a/types/ext/translation.d.ts +++ b/types/ext/translation.d.ts @@ -64,6 +64,14 @@ export type FindTermsOptions = { * Whether or not deinflection should be performed. */ deinflect: boolean; + /** + * Whether or not masked lookup should be performed. + */ + enableFusejiLookup: boolean; + /** + * The set of characters which should be treated as gap markers in masked lookup. + */ + fusejiTriggers?: string; /** * The reading which will be sorted to the top of the results, if provided as a query parameter. */ diff --git a/types/test/translator.d.ts b/types/test/translator.d.ts index 919bf1c427..ed363ab966 100644 --- a/types/test/translator.d.ts +++ b/types/test/translator.d.ts @@ -49,6 +49,8 @@ export type FindTermsOptionsPreset = { enabledDictionaryMap?: [key: string, value: FindTermDictionary][]; excludeDictionaryDefinitions?: string[] | null; searchResolution?: SearchResolution; + enableFusejiLookup?: boolean; + fusejiTriggers?: string; language?: string; }; From 6d9b214feec3e7a3329ea4bebd2a57397424b1f5 Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 00:01:02 +0700 Subject: [PATCH 2/9] refactor: Leaner db path for fuseji. Pass predicate to findTermsBulk to filter by key before materializing --- ext/js/dictionary/dictionary-database.js | 6 ++- ext/js/language/translator.js | 53 ++++++++++++++++++------ 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/ext/js/dictionary/dictionary-database.js b/ext/js/dictionary/dictionary-database.js index 6b3d083d7f..0a5e61034d 100644 --- a/ext/js/dictionary/dictionary-database.js +++ b/ext/js/dictionary/dictionary-database.js @@ -283,13 +283,17 @@ export class DictionaryDatabase { * @param {string[]} termList * @param {import('dictionary-database').DictionarySet} dictionaries * @param {import('dictionary-database').MatchType} matchType + * @param {?((term: string) => boolean)} [keyFilter] When provided, a record is kept only if its expression or reading + * satisfies this predicate. * @returns {Promise} */ - findTermsBulk(termList, dictionaries, matchType) { + findTermsBulk(termList, dictionaries, matchType, keyFilter = null) { const visited = new Set(); /** @type {import('dictionary-database').FindPredicate} */ const predicate = (row) => { if (!dictionaries.has(row.dictionary)) { return false; } + // The full record is already in hand, so match the forward expression/reading directly — no index-key reversal. + if (keyFilter !== null && !keyFilter(row.expression) && !(row.reading.length > 0 && keyFilter(row.reading))) { return false; } const {id} = row; if (visited.has(id)) { return false; } visited.add(id); diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index ab09b58bc6..e0215bae87 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -310,20 +310,51 @@ export class Translator { return {dictionaryEntries: [], originalTextLength: 0}; } + // The full masked pattern is pushed down to the database. + const matcher = this._createFusejiPatternMatcher(text, triggerSet); + const termKeyFilter = (/** @type {string} */ term) => matcher(term) > 0; + + // NOTE: inflected masked spans are unsupported. If the mask covers the stem (eg. ◯んでる), visible + // tail never matches a dictionary-form headword and deinflection doesnt help — the lookup text + // here is the stripped anchor, not the whole word. Supporting it means deinflecting the visible + // tail and reconstructing the pattern (◯んでる → ◯む) before lookup. + // Arguably not desirable since it pollutes the results? /** @type {import('translation-internal').TermDictionaryEntry[]} */ let dictionaryEntries = []; if (prefixLookupText.length > 0) { - const lookupOptions = /** @type {import('translation').FindTermsOptions} */ ({...options, matchType: 'prefix', deinflect: false}); - ({dictionaryEntries} = await this._findTermsInternal(prefixLookupText, lookupOptions, tagAggregator, primaryReading)); + ({dictionaryEntries} = await this._findFusejiTermsForAnchor(prefixLookupText, 'prefix', options, tagAggregator, primaryReading, termKeyFilter)); } else { - // fallback to normal lookup - const lookupOptions = /** @type {import('translation').FindTermsOptions} */ ({...options, matchType: 'suffix', deinflect: false}); for (const lookupText of this._getFusejiSuffixLookupTexts(suffixLookupText)) { - ({dictionaryEntries} = await this._findTermsInternal(lookupText, lookupOptions, tagAggregator, primaryReading)); + ({dictionaryEntries} = await this._findFusejiTermsForAnchor(lookupText, 'suffix', options, tagAggregator, primaryReading, termKeyFilter)); if (dictionaryEntries.length > 0) { break; } } } - return this._filterFusejiDictionaryEntries(dictionaryEntries, text, triggerSet); + return this._filterFusejiDictionaryEntries(dictionaryEntries, matcher); + } + + /** + * Looks up dictionary entries for a single fuseji anchor (the unmasked text before the first trigger, + * or after the last). Unlike {@link Translator._findTermsInternal} this skips the deinflection pipeline + * (algorithm deinflection and {@link Translator._getDictionaryDeinflections}). + * @param {string} anchorText The unmasked prefix/suffix to scan for. + * @param {import('dictionary').TermSourceMatchType} matchType `'prefix'` or `'suffix'`. + * @param {import('translation').FindTermsOptions} options + * @param {TranslatorTagAggregator} tagAggregator + * @param {string} primaryReading + * @param {(term: string) => boolean} termKeyFilter Keeps only records whose term/reading fits the masked pattern. + * @returns {Promise<{dictionaryEntries: import('translation-internal').TermDictionaryEntry[], originalTextLength: number}>} + */ + async _findFusejiTermsForAnchor(anchorText, matchType, options, tagAggregator, primaryReading, termKeyFilter) { + const {enabledDictionaryMap} = options; + const databaseEntries = await this._database.findTermsBulk([anchorText], enabledDictionaryMap, matchType, termKeyFilter); + for (const entry of databaseEntries) { entry.definitions = entry.definitions.filter((d) => !Array.isArray(d)); } + const matchedEntries = databaseEntries.filter((entry) => entry.definitions.length > 0); + + if (matchedEntries.length === 0) { return {dictionaryEntries: [], originalTextLength: 0}; } + + const deinflection = this._createDeinflection(anchorText, anchorText, anchorText, 0, [], []); + deinflection.databaseEntries = matchedEntries; + return this._getDictionaryEntries([deinflection], enabledDictionaryMap, tagAggregator, primaryReading); } /** @@ -363,15 +394,13 @@ export class Translator { } /** - * Narrows a set of candidate entries down to those - * whose term or reading actually fits the masked pattern, treating trigger characters as wildcards. + * Narrows a set of candidate entries down to those whose term or reading fits the masked + * pattern, and records the matched source length on each survivor. * @param {import('translation-internal').TermDictionaryEntry[]} dictionaryEntries - * @param {string} patternText The original masked text, e.g. `マ◯ド◯ルド`. - * @param {Set} triggerSet + * @param {(text: string) => number} matcher The pattern matcher from {@link Translator._createFusejiPatternMatcher}. * @returns {{dictionaryEntries: import('translation-internal').TermDictionaryEntry[], originalTextLength: number}} */ - _filterFusejiDictionaryEntries(dictionaryEntries, patternText, triggerSet) { - const matcher = this._createFusejiPatternMatcher(patternText, triggerSet); + _filterFusejiDictionaryEntries(dictionaryEntries, matcher) { let originalTextLength = 0; /** @type {import('translation-internal').TermDictionaryEntry[]} */ const filteredDictionaryEntries = []; From 51489698cf814ecf509d2bd7a54daf913ef22f7d Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 00:12:30 +0700 Subject: [PATCH 3/9] refactor: Optimize fuseji pattern matcher --- ext/js/language/translator.js | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index e0215bae87..de61baddd3 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -432,22 +432,18 @@ export class Translator { patternTextLengths.push(patternTextLengths[patternTextLengths.length - 1] + character.length); } return (text) => { - const textChars = [...text]; - if (textChars.length > patternChars.length) { - return 0; - } - for (let i = 0; i < patternChars.length; ++i) { - if (i >= textChars.length) { - return patternTextLengths[textChars.length]; - } - if (triggerSet.has(patternChars[i])) { - continue; + let matchedCount = 0; + for (const character of text) { + if (matchedCount >= patternChars.length) { + return 0; // candidate has more characters than the pattern } - if (patternChars[i] !== textChars[i]) { - return 0; + const patternCharacter = patternChars[matchedCount]; + if (patternCharacter !== character && !triggerSet.has(patternCharacter)) { + return 0; // literal mismatch } + ++matchedCount; } - return patternTextLengths[textChars.length]; + return patternTextLengths[matchedCount]; }; } From a6834c28910f305d5787808fbc06ff5dd97b902a Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 13:40:02 +0700 Subject: [PATCH 4/9] perf: Skip-scan fuseji prefix lookups to avoid full anchor-range scan --- ext/js/data/database.js | 27 +++ ext/js/dictionary/dictionary-database.js | 213 ++++++++++++++++++++++- ext/js/language/translator.js | 30 ++-- types/ext/dictionary-database.d.ts | 6 + 4 files changed, 259 insertions(+), 17 deletions(-) diff --git a/ext/js/data/database.js b/ext/js/data/database.js index 8d37f8dcfb..5b062e3d19 100644 --- a/ext/js/data/database.js +++ b/ext/js/data/database.js @@ -216,6 +216,33 @@ export class Database { } } + /** + * Collects the primary keys whose index key satisfies `keyPredicate`. Reads only index keys (no record + * values), so it is cheaper than {@link Database.getAll} when most of the range is discarded. + * @param {IDBObjectStore|IDBIndex} objectStoreOrIndex + * @param {?IDBValidKey|IDBKeyRange} query + * @param {(key: IDBValidKey) => boolean} keyPredicate + * @param {(primaryKeys: IDBValidKey[]) => void} onSuccess + * @param {(reason?: unknown) => void} onError + */ + getPrimaryKeysWhere(objectStoreOrIndex, query, keyPredicate, onSuccess, onError) { + /** @type {IDBValidKey[]} */ + const results = []; + const request = objectStoreOrIndex.openKeyCursor(query, 'next'); + request.onerror = (e) => onError(/** @type {IDBRequest} */ (e.target).error); + request.onsuccess = (e) => { + const cursor = /** @type {IDBRequest} */ (e.target).result; + if (cursor) { + if (keyPredicate(cursor.key)) { + results.push(cursor.primaryKey); + } + cursor.continue(); + } else { + onSuccess(results); + } + }; + } + /** * @template [TPredicateArg=unknown] * @template [TResult=unknown] diff --git a/ext/js/dictionary/dictionary-database.js b/ext/js/dictionary/dictionary-database.js index 0a5e61034d..e44852fcf4 100644 --- a/ext/js/dictionary/dictionary-database.js +++ b/ext/js/dictionary/dictionary-database.js @@ -283,17 +283,13 @@ export class DictionaryDatabase { * @param {string[]} termList * @param {import('dictionary-database').DictionarySet} dictionaries * @param {import('dictionary-database').MatchType} matchType - * @param {?((term: string) => boolean)} [keyFilter] When provided, a record is kept only if its expression or reading - * satisfies this predicate. * @returns {Promise} */ - findTermsBulk(termList, dictionaries, matchType, keyFilter = null) { + findTermsBulk(termList, dictionaries, matchType) { const visited = new Set(); /** @type {import('dictionary-database').FindPredicate} */ const predicate = (row) => { if (!dictionaries.has(row.dictionary)) { return false; } - // The full record is already in hand, so match the forward expression/reading directly — no index-key reversal. - if (keyFilter !== null && !keyFilter(row.expression) && !(row.reading.length > 0 && keyFilter(row.reading))) { return false; } const {id} = row; if (visited.has(id)) { return false; } visited.add(id); @@ -317,6 +313,213 @@ export class DictionaryDatabase { return this._findMultiBulk('terms', indexNames, termList, createQuery, predicate, createResult); } + /** + * Finds term records for a fuseji (masked) lookup from one unmasked anchor; a record is kept when its + * expression OR reading fits the masked pattern. Prefix anchors use a skip-scan (jumps over the range + * via the pattern's literals); suffix anchors (already selective) use a plain key cursor. + * @param {string} anchor Unmasked prefix/suffix literal bounding the index range. + * @param {import('dictionary-database').MatchType} matchType + * @param {import('dictionary-database').DictionarySet} dictionaries + * @param {(term: string) => boolean} keyMatcher Tests a forward expression/reading against the pattern. + * @param {?import('dictionary-database').MaskedPattern} [pattern] Enables skip-scan when present. + * @returns {Promise} + */ + findTermsByMaskedQueryBulk(anchor, matchType, dictionaries, keyMatcher, pattern = null) { + if (matchType === 'prefix' && pattern !== null) { + return this._fusejiFindViaSkipScan(anchor, dictionaries, pattern); + } + return this._fusejiFindViaCursor(anchor, matchType, dictionaries, keyMatcher); + } + + /** + * @param {string} anchor + * @param {import('dictionary-database').DictionarySet} dictionaries + * @param {import('dictionary-database').MaskedPattern} pattern + * @returns {Promise} + */ + _fusejiFindViaSkipScan(anchor, dictionaries, pattern) { + return new Promise((resolve, reject) => { + const indexNames = ['expression', 'reading']; + const transaction = this._db.transaction(['terms'], 'readonly'); + const objectStore = transaction.objectStore('terms'); + + // Survivor -> matching index; lower index wins so expression (0) beats reading (1) for matchSource. + /** @type {Map} */ + const primaryKeyToIndexIndex = new Map(); + let completed = 0; + + for (let j = 0; j < indexNames.length; ++j) { + const indexIndex = j; + this._fusejiSkipScanIndex(objectStore, indexNames[j], anchor, pattern, (primaryKeys) => { + for (const primaryKey of primaryKeys) { + const existing = primaryKeyToIndexIndex.get(primaryKey); + if (existing === void 0 || indexIndex < existing) { + primaryKeyToIndexIndex.set(primaryKey, indexIndex); + } + } + if (++completed >= indexNames.length) { + this._fusejiFetchAndBuild(objectStore, primaryKeyToIndexIndex, anchor, 'prefix', dictionaries, resolve, reject); + } + }, reject); + } + }); + } + + /** + * Skip-scan over one index: seeks to the required char at literal positions, lets the cursor enumerate + * only the chars present at mask positions, and skips non-matching subtrees. A match is any key no longer + * than the pattern that satisfies every literal it spans. + * @param {IDBObjectStore} objectStore + * @param {string} indexName + * @param {string} anchor Leading literal run; also the range bound. + * @param {import('dictionary-database').MaskedPattern} pattern + * @param {(primaryKeys: IDBValidKey[]) => void} onComplete + * @param {(reason?: unknown) => void} onError + */ + _fusejiSkipScanIndex(objectStore, indexName, anchor, pattern, onComplete, onError) { + const {chars: patternChars, isMask} = pattern; + const patternLength = patternChars.length; + const index = objectStore.index(indexName); + const request = index.openKeyCursor(this._createBoundQuery1(anchor), 'next'); + /** @type {IDBValidKey[]} */ + const primaryKeys = []; + request.onerror = (e) => onError(/** @type {IDBRequest} */ (e.target).error); + request.onsuccess = (e) => { + const cursor = /** @type {IDBRequest} */ (e.target).result; + if (cursor === null) { + onComplete(primaryKeys); + return; + } + const keyChars = [...(/** @type {string} */ (cursor.key))]; + const keyLength = keyChars.length; + + // first pattern violation in the key, if any. + let violation = -1; + const checkLength = Math.min(keyLength, patternLength); + for (let i = 0; i < checkLength; ++i) { + if (!isMask[i] && patternChars[i] !== keyChars[i]) { + violation = i; + break; + } + } + + if (violation === -1) { + if (keyLength <= patternLength) { + // match: record, then step to the next key. + primaryKeys.push(cursor.primaryKey); + cursor.continue(); + } else { + // longer than the pattern: skip this subtree. + cursor.continue(keyChars.slice(0, patternLength).join('') + '\uffff'); + } + return; + } + + // literal below the required char: seek forward to it. + if (keyChars[violation] < patternChars[violation]) { + cursor.continue(keyChars.slice(0, violation).join('') + patternChars[violation]); + return; + } + + // literal already past the required char: carry to the nearest preceding mask and skip its subtree. + let carry = -1; + for (let i = violation - 1; i >= 0; --i) { + if (isMask[i]) { + carry = i; + break; + } + } + if (carry === -1) { + onComplete(primaryKeys); + return; + } + cursor.continue(keyChars.slice(0, carry + 1).join('') + '\uffff'); + }; + } + + /** + * Plain key-cursor scan of the (reverse, for suffix) index, matching each key and keeping survivors. + * @param {string} anchor + * @param {import('dictionary-database').MatchType} matchType + * @param {import('dictionary-database').DictionarySet} dictionaries + * @param {(term: string) => boolean} keyMatcher + * @returns {Promise} + */ + _fusejiFindViaCursor(anchor, matchType, dictionaries, keyMatcher) { + return new Promise((resolve, reject) => { + const isSuffix = (matchType === 'suffix'); + const indexNames = isSuffix ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; + const createQuery = isSuffix ? this._createBoundQuery2 : this._createBoundQuery1; + const query = createQuery(anchor); + + const transaction = this._db.transaction(['terms'], 'readonly'); + const objectStore = transaction.objectStore('terms'); + + /** @type {Map} */ + const primaryKeyToIndexIndex = new Map(); + let completedCursors = 0; + + for (let j = 0; j < indexNames.length; ++j) { + const indexIndex = j; + const index = objectStore.index(indexNames[j]); + /** @type {(key: IDBValidKey) => boolean} */ + const keyPredicate = (key) => { + const forward = isSuffix ? stringReverse(/** @type {string} */ (key)) : /** @type {string} */ (key); + return keyMatcher(forward); + }; + /** @type {(primaryKeys: IDBValidKey[]) => void} */ + const onKeys = (primaryKeys) => { + for (const primaryKey of primaryKeys) { + const existing = primaryKeyToIndexIndex.get(primaryKey); + if (existing === void 0 || indexIndex < existing) { + primaryKeyToIndexIndex.set(primaryKey, indexIndex); + } + } + if (++completedCursors >= indexNames.length) { + this._fusejiFetchAndBuild(objectStore, primaryKeyToIndexIndex, anchor, matchType, dictionaries, resolve, reject); + } + }; + this._db.getPrimaryKeysWhere(index, query, keyPredicate, onKeys, reject); + } + }); + } + + /** + * Fetches the survivor records, filters by dictionary, builds term entries, and resolves. + * @param {IDBObjectStore} objectStore + * @param {Map} primaryKeyToIndexIndex Survivor -> matching index (0 expression, 1 reading). + * @param {string} anchor + * @param {import('dictionary-database').MatchType} matchType + * @param {import('dictionary-database').DictionarySet} dictionaries + * @param {(results: import('dictionary-database').TermEntry[]) => void} resolve + * @param {(reason?: unknown) => void} reject + */ + _fusejiFetchAndBuild(objectStore, primaryKeyToIndexIndex, anchor, matchType, dictionaries, resolve, reject) { + const entries = [...primaryKeyToIndexIndex]; + if (entries.length === 0) { + resolve([]); + return; + } + /** @type {import('dictionary-database').TermEntry[]} */ + const results = []; + let remaining = entries.length; + for (const [primaryKey, indexIndex] of entries) { + const request = objectStore.get(primaryKey); + request.onerror = (e) => reject(/** @type {IDBRequest} */ (e.target).error); + request.onsuccess = (e) => { + const row = /** @type {IDBRequest} */ (e.target).result; + if (row !== null && row !== void 0 && dictionaries.has(row.dictionary)) { + /** @type {import('dictionary-database').FindMultiBulkData} */ + const data = {item: anchor, itemIndex: 0, indexIndex}; + results.push(this._createTermGeneric(matchType, row, data)); + } + if (--remaining === 0) { + resolve(results); + } + }; + } + } + /** * @param {import('dictionary-database').TermExactRequest[]} termList * @param {import('dictionary-database').DictionarySet} dictionaries diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index de61baddd3..372f465e7a 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -310,9 +310,12 @@ export class Translator { return {dictionaryEntries: [], originalTextLength: 0}; } - // The full masked pattern is pushed down to the database. const matcher = this._createFusejiPatternMatcher(text, triggerSet); const termKeyFilter = (/** @type {string} */ term) => matcher(term) > 0; + // Same pattern in structured form, used to drive the database skip-scan. + /** @type {import('dictionary-database').MaskedPattern} */ + const pattern = {chars: fusejiDetails.characters, isMask: fusejiDetails.characters.map((c) => triggerSet.has(c))}; + // NOTE: inflected masked spans are unsupported. If the mask covers the stem (eg. ◯んでる), visible // tail never matches a dictionary-form headword and deinflection doesnt help — the lookup text @@ -322,14 +325,14 @@ export class Translator { /** @type {import('translation-internal').TermDictionaryEntry[]} */ let dictionaryEntries = []; if (prefixLookupText.length > 0) { - ({dictionaryEntries} = await this._findFusejiTermsForAnchor(prefixLookupText, 'prefix', options, tagAggregator, primaryReading, termKeyFilter)); + ({dictionaryEntries} = await this._findFusejiTermsForAnchor(prefixLookupText, 'prefix', options, tagAggregator, primaryReading, termKeyFilter, pattern)); } else { for (const lookupText of this._getFusejiSuffixLookupTexts(suffixLookupText)) { - ({dictionaryEntries} = await this._findFusejiTermsForAnchor(lookupText, 'suffix', options, tagAggregator, primaryReading, termKeyFilter)); + ({dictionaryEntries} = await this._findFusejiTermsForAnchor(lookupText, 'suffix', options, tagAggregator, primaryReading, termKeyFilter, null)); if (dictionaryEntries.length > 0) { break; } } } - return this._filterFusejiDictionaryEntries(dictionaryEntries, matcher); + return this._applyFusejiMatchLengths(dictionaryEntries, matcher); } /** @@ -342,11 +345,12 @@ export class Translator { * @param {TranslatorTagAggregator} tagAggregator * @param {string} primaryReading * @param {(term: string) => boolean} termKeyFilter Keeps only records whose term/reading fits the masked pattern. + * @param {?import('dictionary-database').MaskedPattern} pattern The forward masked pattern, enabling skip-scan for prefix anchors. * @returns {Promise<{dictionaryEntries: import('translation-internal').TermDictionaryEntry[], originalTextLength: number}>} */ - async _findFusejiTermsForAnchor(anchorText, matchType, options, tagAggregator, primaryReading, termKeyFilter) { + async _findFusejiTermsForAnchor(anchorText, matchType, options, tagAggregator, primaryReading, termKeyFilter, pattern) { const {enabledDictionaryMap} = options; - const databaseEntries = await this._database.findTermsBulk([anchorText], enabledDictionaryMap, matchType, termKeyFilter); + const databaseEntries = await this._database.findTermsByMaskedQueryBulk(anchorText, matchType, enabledDictionaryMap, termKeyFilter, pattern); for (const entry of databaseEntries) { entry.definitions = entry.definitions.filter((d) => !Array.isArray(d)); } const matchedEntries = databaseEntries.filter((entry) => entry.definitions.length > 0); @@ -394,28 +398,30 @@ export class Translator { } /** - * Narrows a set of candidate entries down to those whose term or reading fits the masked - * pattern, and records the matched source length on each survivor. + * Records the matched source length on each fuseji entry and returns the overall original text length. + * Entries are already confirmed against the pattern at the database level (see + * {@link Translator._findFusejiTermsForAnchor}), so this does not filter — the `matchLength > 0` check is + * a defensive guard, not a meaningful narrowing. * @param {import('translation-internal').TermDictionaryEntry[]} dictionaryEntries * @param {(text: string) => number} matcher The pattern matcher from {@link Translator._createFusejiPatternMatcher}. * @returns {{dictionaryEntries: import('translation-internal').TermDictionaryEntry[], originalTextLength: number}} */ - _filterFusejiDictionaryEntries(dictionaryEntries, matcher) { + _applyFusejiMatchLengths(dictionaryEntries, matcher) { let originalTextLength = 0; /** @type {import('translation-internal').TermDictionaryEntry[]} */ - const filteredDictionaryEntries = []; + const matchedDictionaryEntries = []; for (const dictionaryEntry of dictionaryEntries) { for (const headword of dictionaryEntry.headwords) { const candidates = headword.reading.length === 0 ? [headword.term] : [headword.term, headword.reading]; const matchLength = Math.max(...candidates.map(matcher)); if (matchLength > 0) { originalTextLength = Math.max(originalTextLength, matchLength); - filteredDictionaryEntries.push({...dictionaryEntry, maxOriginalTextLength: Math.max(dictionaryEntry.maxOriginalTextLength, matchLength)}); + matchedDictionaryEntries.push({...dictionaryEntry, maxOriginalTextLength: Math.max(dictionaryEntry.maxOriginalTextLength, matchLength)}); break; } } } - return {dictionaryEntries: filteredDictionaryEntries, originalTextLength}; + return {dictionaryEntries: matchedDictionaryEntries, originalTextLength}; } /** diff --git a/types/ext/dictionary-database.d.ts b/types/ext/dictionary-database.d.ts index 28918b9fec..7a571751ff 100644 --- a/types/ext/dictionary-database.d.ts +++ b/types/ext/dictionary-database.d.ts @@ -229,6 +229,12 @@ export type DeleteDictionaryProgressCallback = (data: DeleteDictionaryProgressDa export type MatchType = Dictionary.TermSourceMatchType; +/** Forward fuseji pattern: `chars` are the code points; `isMask[i]` marks a wildcard position. */ +export type MaskedPattern = { + chars: string[]; + isMask: boolean[]; +}; + export type MatchSource = Dictionary.TermSourceMatchSource; export type DictionaryAndQueryRequest = { From 7c4ee520f20a6c60b5a7a4b3a194f25b7713c6a5 Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 14:10:59 +0700 Subject: [PATCH 5/9] refactor: Fold termKeyFilter into the new MaskedPattern --- ext/js/language/translator.js | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 372f465e7a..536f7cda56 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -310,12 +310,11 @@ export class Translator { return {dictionaryEntries: [], originalTextLength: 0}; } - const matcher = this._createFusejiPatternMatcher(text, triggerSet); - const termKeyFilter = (/** @type {string} */ term) => matcher(term) > 0; - // Same pattern in structured form, used to drive the database skip-scan. + // Single decomposition of the masked pattern, shared by the in-memory matcher and the database skip-scan. /** @type {import('dictionary-database').MaskedPattern} */ const pattern = {chars: fusejiDetails.characters, isMask: fusejiDetails.characters.map((c) => triggerSet.has(c))}; - + const matcher = this._createFusejiPatternMatcher(pattern); + const termKeyFilter = (/** @type {string} */ term) => matcher(term) > 0; // NOTE: inflected masked spans are unsupported. If the mask covers the stem (eg. ◯んでる), visible // tail never matches a dictionary-form headword and deinflection doesnt help — the lookup text @@ -425,14 +424,13 @@ export class Translator { } /** - * Builds a matcher function that tests a candidate string against the masked pattern. + * Builds a matcher function that tests a candidate string against a masked pattern. * Candidates shorter than the pattern are allowed to match a leading portion of it (partial match). - * @param {string} patternText The masked text to match against, e.g. `マ◯ド◯ルド`. - * @param {Set} triggerSet + * @param {import('dictionary-database').MaskedPattern} pattern The decomposed masked pattern, e.g. `マ◯ド◯ルド`. * @returns {(text: string) => number} A function returning the matched length in code units, or `0` if the candidate does not fit the pattern. */ - _createFusejiPatternMatcher(patternText, triggerSet) { - const patternChars = [...patternText]; + _createFusejiPatternMatcher(pattern) { + const {chars: patternChars, isMask} = pattern; const patternTextLengths = [0]; for (const character of patternChars) { patternTextLengths.push(patternTextLengths[patternTextLengths.length - 1] + character.length); @@ -443,8 +441,7 @@ export class Translator { if (matchedCount >= patternChars.length) { return 0; // candidate has more characters than the pattern } - const patternCharacter = patternChars[matchedCount]; - if (patternCharacter !== character && !triggerSet.has(patternCharacter)) { + if (!isMask[matchedCount] && patternChars[matchedCount] !== character) { return 0; // literal mismatch } ++matchedCount; From 3f4899540fac93477e4fdb8e8c2d811232987a30 Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 16:44:31 +0700 Subject: [PATCH 6/9] feat: Test findTermsByMaskedQueryBulk --- test/data/database-test-cases.json | 57 ++++++++++++++++++++++++++ test/database.test.js | 66 ++++++++++++++++++++++++++++++ types/test/database.d.ts | 20 ++++++++- 3 files changed, 142 insertions(+), 1 deletion(-) diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 0b3471bf05..4705f2d4b3 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -185,6 +185,63 @@ } } ], + "findTermsByMaskedQueryBulk": [ + { + "inputs": [ + {"maskedText": "マ○ド○ルド", "triggers": "○", "anchor": "マ", "matchType": "prefix", "usePattern": true}, + {"maskedText": "マ○ド○ルド", "triggers": "○", "anchor": "マ", "matchType": "prefix", "usePattern": false} + ], + "expectedResults": { + "total": 3, + "terms": [["マ", 1], ["マギ", 1], ["マクドナルド", 1], ["マキマ", 0]], + "readings": [], + "matchTypes": [["exact", 1], ["prefix", 2]], + "matchSources": [["term", 3]] + } + }, + { + "inputs": [ + {"maskedText": "マ○", "triggers": "○", "anchor": "マ", "matchType": "prefix", "usePattern": true} + ], + "expectedResults": { + "total": 2, + "terms": [["マ", 1], ["マギ", 1], ["マキマ", 0], ["マクドナルド", 0]], + "readings": [] + } + }, + { + "inputs": [ + {"maskedText": "う○こむ", "triggers": "○", "anchor": "う", "matchType": "prefix", "usePattern": true} + ], + "expectedResults": { + "total": 4, + "terms": [["打ち込む", 2], ["打つ", 2]], + "readings": [["うちこむ", 2], ["うつ", 2]], + "matchSources": [["reading", 4], ["term", 0]] + } + }, + { + "inputs": [ + {"maskedText": "○○○ナルド", "triggers": "○", "anchor": "ナルド", "matchType": "suffix", "usePattern": false} + ], + "expectedResults": { + "total": 1, + "terms": [["マクドナルド", 1]], + "readings": [], + "matchTypes": [["suffix", 1]] + } + }, + { + "inputs": [ + {"maskedText": "ゑ○", "triggers": "○", "anchor": "ゑ", "matchType": "prefix", "usePattern": true} + ], + "expectedResults": { + "total": 0, + "terms": [], + "readings": [] + } + } + ], "findTermsExactBulk": [ { "inputs": [ diff --git a/test/database.test.js b/test/database.test.js index 418afe97d1..e2dabb1268 100644 --- a/test/database.test.js +++ b/test/database.test.js @@ -103,6 +103,51 @@ function countKanjiWithCharacter(kanji, character) { return i; } +/** + * @param {import('dictionary-database').TermEntry[]} dictionaryDatabaseEntries + * @param {import('dictionary-database').MatchType} matchType + * @returns {number} + */ +function countDictionaryDatabaseEntriesWithMatchType(dictionaryDatabaseEntries, matchType) { + return dictionaryDatabaseEntries.reduce((i, v) => (i + (v.matchType === matchType ? 1 : 0)), 0); +} + +/** + * @param {import('dictionary-database').TermEntry[]} dictionaryDatabaseEntries + * @param {import('dictionary-database').MatchSource} matchSource + * @returns {number} + */ +function countDictionaryDatabaseEntriesWithMatchSource(dictionaryDatabaseEntries, matchSource) { + return dictionaryDatabaseEntries.reduce((i, v) => (i + (v.matchSource === matchSource ? 1 : 0)), 0); +} + +/** + * Builds the `(pattern, keyMatcher)` pair that {@link DictionaryDatabase.findTermsByMaskedQueryBulk} expects + * from a masked query string, mirroring what the translator constructs. Characters in `triggers` act as + * single-character wildcards; a candidate matches if it is no longer than the pattern and every literal it + * spans is equal (leading-portion / partial match). + * @param {string} maskedText + * @param {string} triggers + * @returns {{pattern: import('dictionary-database').MaskedPattern, keyMatcher: (term: string) => boolean}} + */ +function createMaskedQuery(maskedText, triggers) { + const triggerSet = new Set(triggers); + const chars = [...maskedText]; + const isMask = chars.map((character) => triggerSet.has(character)); + /** @type {import('dictionary-database').MaskedPattern} */ + const pattern = {chars, isMask}; + /** @type {(term: string) => boolean} */ + const keyMatcher = (term) => { + const termChars = [...term]; + if (termChars.length > chars.length) { return false; } + for (let i = 0; i < termChars.length; ++i) { + if (!isMask[i] && chars[i] !== termChars[i]) { return false; } + } + return true; + }; + return {pattern, keyMatcher}; +} + /** */ describe('Database', () => { @@ -272,6 +317,27 @@ describe('Database', () => { } } + // Test findTermsByMaskedQueryBulk + for (const {inputs, expectedResults} of testData.tests.findTermsByMaskedQueryBulk) { + for (const {maskedText, triggers, anchor, matchType, usePattern} of inputs) { + const {pattern, keyMatcher} = createMaskedQuery(maskedText, triggers); + const results = await dictionaryDatabase.findTermsByMaskedQueryBulk(anchor, matchType, titles, keyMatcher, usePattern ? pattern : null); + expect.soft(results.length).toStrictEqual(expectedResults.total); + for (const [term, count] of expectedResults.terms) { + expect.soft(countDictionaryDatabaseEntriesWithTerm(results, term)).toStrictEqual(count); + } + for (const [reading, count] of expectedResults.readings) { + expect.soft(countDictionaryDatabaseEntriesWithReading(results, reading)).toStrictEqual(count); + } + for (const [type, count] of expectedResults.matchTypes ?? []) { + expect.soft(countDictionaryDatabaseEntriesWithMatchType(results, type)).toStrictEqual(count); + } + for (const [source, count] of expectedResults.matchSources ?? []) { + expect.soft(countDictionaryDatabaseEntriesWithMatchSource(results, source)).toStrictEqual(count); + } + } + } + // Test findKanjiBulk for (const {inputs, expectedResults} of testData.tests.findKanjiBulk) { for (const {kanjiList} of inputs) { diff --git a/types/test/database.d.ts b/types/test/database.d.ts index 79b8c5ae72..5d53a91afe 100644 --- a/types/test/database.d.ts +++ b/types/test/database.d.ts @@ -16,7 +16,7 @@ */ import type {Summary} from '../ext/dictionary-importer'; -import type {Tag, MatchType, TermMetaType, KanjiMetaType, TermExactRequest, DictionaryCounts} from '../ext/dictionary-database'; +import type {Tag, MatchType, MatchSource, TermMetaType, KanjiMetaType, TermExactRequest, DictionaryCounts} from '../ext/dictionary-database'; export type DatabaseTestData = { expectedSummary: Summary; @@ -26,6 +26,7 @@ export type DatabaseTestData = { findTermsExactBulk: FindTermsExactBulkTestCase[]; findTermsBySequenceBulk: FindTermsBySequenceBulkTestCase[]; findTermMetaBulk: FindTermMetaBulkTestCase[]; + findTermsByMaskedQueryBulk: FindTermsByMaskedQueryBulkTestCase[]; findKanjiBulk: FindKanjiBulkTestCase[]; findKanjiMetaBulk: FindKanjiMetaBulkTestCase[]; findTagForTitle: FindTagForTitleTestCase[]; @@ -78,6 +79,23 @@ export type FindTermMetaBulkTestCase = { }; }; +export type FindTermsByMaskedQueryBulkTestCase = { + inputs: { + maskedText: string; + triggers: string; + anchor: string; + matchType: MatchType; + usePattern: boolean; + }[]; + expectedResults: { + total: number; + terms: ItemCount[]; + readings: ItemCount[]; + matchTypes?: ItemCount[]; + matchSources?: ItemCount[]; + }; +}; + export type FindKanjiBulkTestCase = { inputs: { kanjiList: string[]; From 98ad6975826dca4067ec2af27cac2d8330fda388 Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 18:30:56 +0700 Subject: [PATCH 7/9] perf: Skip-scan fuseji suffix lookups via the reverse index --- ext/js/dictionary/dictionary-database.js | 159 +++++++++++++---------- test/data/database-test-cases.json | 3 +- 2 files changed, 90 insertions(+), 72 deletions(-) diff --git a/ext/js/dictionary/dictionary-database.js b/ext/js/dictionary/dictionary-database.js index e44852fcf4..79270a6203 100644 --- a/ext/js/dictionary/dictionary-database.js +++ b/ext/js/dictionary/dictionary-database.js @@ -314,43 +314,104 @@ export class DictionaryDatabase { } /** - * Finds term records for a fuseji (masked) lookup from one unmasked anchor; a record is kept when its - * expression OR reading fits the masked pattern. Prefix anchors use a skip-scan (jumps over the range - * via the pattern's literals); suffix anchors (already selective) use a plain key cursor. + * Finds fuseji (masked) term records from one unmasked anchor, keeping records whose expression OR reading + * fits the masked `pattern`. With a `pattern`, skip-scans the anchor range via the pattern's literals; + * without one, falls back to a plain key-cursor scan (the reference path). + * + * Suffix anchors scan the reverse indices, where the sub-pattern (masks + anchor) is reversed so the same + * skip-scan applies; `keyMatcher` then guards each hit, as the reverse range over-includes other lengths. * @param {string} anchor Unmasked prefix/suffix literal bounding the index range. * @param {import('dictionary-database').MatchType} matchType * @param {import('dictionary-database').DictionarySet} dictionaries * @param {(term: string) => boolean} keyMatcher Tests a forward expression/reading against the pattern. - * @param {?import('dictionary-database').MaskedPattern} [pattern] Enables skip-scan when present. + * @param {?import('dictionary-database').MaskedPattern} [pattern] Forward masked pattern; enables skip-scan. * @returns {Promise} */ findTermsByMaskedQueryBulk(anchor, matchType, dictionaries, keyMatcher, pattern = null) { - if (matchType === 'prefix' && pattern !== null) { - return this._fusejiFindViaSkipScan(anchor, dictionaries, pattern); + if (pattern === null) { + return this._fusejiFindViaCursor(anchor, matchType, dictionaries, keyMatcher); } - return this._fusejiFindViaCursor(anchor, matchType, dictionaries, keyMatcher); + return new Promise((resolve, reject) => { + const isSuffix = (matchType === 'suffix'); + const indexNames = isSuffix ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; + const query = isSuffix ? this._createBoundQuery2(anchor) : this._createBoundQuery1(anchor); + // Reverse-index scan needs the pattern in reversed key space; guard hits with the forward matcher. + const scanPattern = isSuffix ? this._reverseMaskedSubPattern(pattern, anchor) : pattern; + /** @type {?(key: string) => boolean} */ + const recordGuard = isSuffix ? (key) => keyMatcher(stringReverse(key)) : null; + + const transaction = this._db.transaction(['terms'], 'readonly'); + const objectStore = transaction.objectStore('terms'); + + // Survivor -> matching index; lower index wins so expression (0) beats reading (1) for matchSource. + /** @type {Map} */ + const primaryKeyToIndexIndex = new Map(); + let completed = 0; + + for (let j = 0; j < indexNames.length; ++j) { + const indexIndex = j; + this._fusejiSkipScanIndex(objectStore, indexNames[j], query, scanPattern, recordGuard, (primaryKeys) => { + for (const primaryKey of primaryKeys) { + const existing = primaryKeyToIndexIndex.get(primaryKey); + if (existing === void 0 || indexIndex < existing) { + primaryKeyToIndexIndex.set(primaryKey, indexIndex); + } + } + if (++completed >= indexNames.length) { + this._fusejiFetchAndBuild(objectStore, primaryKeyToIndexIndex, anchor, matchType, dictionaries, resolve, reject); + } + }, reject); + } + }); } /** + * Reverses the masks-plus-anchor slice (positions `0 .. lastMask + anchorLength`) of a forward pattern + * into reverse-index key space for a suffix scan. + * @param {import('dictionary-database').MaskedPattern} pattern Forward masked pattern. * @param {string} anchor + * @returns {import('dictionary-database').MaskedPattern} + */ + _reverseMaskedSubPattern(pattern, anchor) { + const {chars, isMask} = pattern; + let lastMask = -1; + for (let i = 0; i < isMask.length; ++i) { + if (isMask[i]) { lastMask = i; } + } + const subLength = lastMask + 1 + [...anchor].length; + return { + chars: chars.slice(0, subLength).reverse(), + isMask: isMask.slice(0, subLength).reverse(), + }; + } + + /** + * Reference path (no pattern): plain key-cursor scan of the anchor range, keeping every matching key. + * @param {string} anchor + * @param {import('dictionary-database').MatchType} matchType * @param {import('dictionary-database').DictionarySet} dictionaries - * @param {import('dictionary-database').MaskedPattern} pattern + * @param {(term: string) => boolean} keyMatcher * @returns {Promise} */ - _fusejiFindViaSkipScan(anchor, dictionaries, pattern) { + _fusejiFindViaCursor(anchor, matchType, dictionaries, keyMatcher) { return new Promise((resolve, reject) => { - const indexNames = ['expression', 'reading']; + const isSuffix = (matchType === 'suffix'); + const indexNames = isSuffix ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; + const query = isSuffix ? this._createBoundQuery2(anchor) : this._createBoundQuery1(anchor); + const transaction = this._db.transaction(['terms'], 'readonly'); const objectStore = transaction.objectStore('terms'); - // Survivor -> matching index; lower index wins so expression (0) beats reading (1) for matchSource. /** @type {Map} */ const primaryKeyToIndexIndex = new Map(); let completed = 0; for (let j = 0; j < indexNames.length; ++j) { const indexIndex = j; - this._fusejiSkipScanIndex(objectStore, indexNames[j], anchor, pattern, (primaryKeys) => { + /** @type {(key: IDBValidKey) => boolean} */ + const keyPredicate = (key) => keyMatcher(isSuffix ? stringReverse(/** @type {string} */ (key)) : /** @type {string} */ (key)); + /** @type {(primaryKeys: IDBValidKey[]) => void} */ + const onKeys = (primaryKeys) => { for (const primaryKey of primaryKeys) { const existing = primaryKeyToIndexIndex.get(primaryKey); if (existing === void 0 || indexIndex < existing) { @@ -358,29 +419,31 @@ export class DictionaryDatabase { } } if (++completed >= indexNames.length) { - this._fusejiFetchAndBuild(objectStore, primaryKeyToIndexIndex, anchor, 'prefix', dictionaries, resolve, reject); + this._fusejiFetchAndBuild(objectStore, primaryKeyToIndexIndex, anchor, matchType, dictionaries, resolve, reject); } - }, reject); + }; + this._db.getPrimaryKeysWhere(objectStore.index(indexNames[j]), query, keyPredicate, onKeys, reject); } }); } /** - * Skip-scan over one index: seeks to the required char at literal positions, lets the cursor enumerate - * only the chars present at mask positions, and skips non-matching subtrees. A match is any key no longer - * than the pattern that satisfies every literal it spans. + * Skip-scan over one index: seeks the required char at literal positions, enumerates the chars present at + * mask positions, and skips non-matching subtrees. A match is any key no longer than the pattern that + * satisfies every literal it spans (and `recordGuard`, if set). * @param {IDBObjectStore} objectStore * @param {string} indexName - * @param {string} anchor Leading literal run; also the range bound. - * @param {import('dictionary-database').MaskedPattern} pattern + * @param {?IDBValidKey|IDBKeyRange} query Anchor range bound (forward for prefix, reversed for suffix). + * @param {import('dictionary-database').MaskedPattern} pattern Pattern in this index's key space. + * @param {?(key: string) => boolean} recordGuard Extra filter on matched keys, or `null` to record all. * @param {(primaryKeys: IDBValidKey[]) => void} onComplete * @param {(reason?: unknown) => void} onError */ - _fusejiSkipScanIndex(objectStore, indexName, anchor, pattern, onComplete, onError) { + _fusejiSkipScanIndex(objectStore, indexName, query, pattern, recordGuard, onComplete, onError) { const {chars: patternChars, isMask} = pattern; const patternLength = patternChars.length; const index = objectStore.index(indexName); - const request = index.openKeyCursor(this._createBoundQuery1(anchor), 'next'); + const request = index.openKeyCursor(query, 'next'); /** @type {IDBValidKey[]} */ const primaryKeys = []; request.onerror = (e) => onError(/** @type {IDBRequest} */ (e.target).error); @@ -390,7 +453,8 @@ export class DictionaryDatabase { onComplete(primaryKeys); return; } - const keyChars = [...(/** @type {string} */ (cursor.key))]; + const key = /** @type {string} */ (cursor.key); + const keyChars = [...key]; const keyLength = keyChars.length; // first pattern violation in the key, if any. @@ -405,8 +469,8 @@ export class DictionaryDatabase { if (violation === -1) { if (keyLength <= patternLength) { - // match: record, then step to the next key. - primaryKeys.push(cursor.primaryKey); + // match: record (if it passes the guard), then step to the next key. + if (recordGuard === null || recordGuard(key)) { primaryKeys.push(cursor.primaryKey); } cursor.continue(); } else { // longer than the pattern: skip this subtree. @@ -437,53 +501,6 @@ export class DictionaryDatabase { }; } - /** - * Plain key-cursor scan of the (reverse, for suffix) index, matching each key and keeping survivors. - * @param {string} anchor - * @param {import('dictionary-database').MatchType} matchType - * @param {import('dictionary-database').DictionarySet} dictionaries - * @param {(term: string) => boolean} keyMatcher - * @returns {Promise} - */ - _fusejiFindViaCursor(anchor, matchType, dictionaries, keyMatcher) { - return new Promise((resolve, reject) => { - const isSuffix = (matchType === 'suffix'); - const indexNames = isSuffix ? ['expressionReverse', 'readingReverse'] : ['expression', 'reading']; - const createQuery = isSuffix ? this._createBoundQuery2 : this._createBoundQuery1; - const query = createQuery(anchor); - - const transaction = this._db.transaction(['terms'], 'readonly'); - const objectStore = transaction.objectStore('terms'); - - /** @type {Map} */ - const primaryKeyToIndexIndex = new Map(); - let completedCursors = 0; - - for (let j = 0; j < indexNames.length; ++j) { - const indexIndex = j; - const index = objectStore.index(indexNames[j]); - /** @type {(key: IDBValidKey) => boolean} */ - const keyPredicate = (key) => { - const forward = isSuffix ? stringReverse(/** @type {string} */ (key)) : /** @type {string} */ (key); - return keyMatcher(forward); - }; - /** @type {(primaryKeys: IDBValidKey[]) => void} */ - const onKeys = (primaryKeys) => { - for (const primaryKey of primaryKeys) { - const existing = primaryKeyToIndexIndex.get(primaryKey); - if (existing === void 0 || indexIndex < existing) { - primaryKeyToIndexIndex.set(primaryKey, indexIndex); - } - } - if (++completedCursors >= indexNames.length) { - this._fusejiFetchAndBuild(objectStore, primaryKeyToIndexIndex, anchor, matchType, dictionaries, resolve, reject); - } - }; - this._db.getPrimaryKeysWhere(index, query, keyPredicate, onKeys, reject); - } - }); - } - /** * Fetches the survivor records, filters by dictionary, builds term entries, and resolves. * @param {IDBObjectStore} objectStore diff --git a/test/data/database-test-cases.json b/test/data/database-test-cases.json index 4705f2d4b3..4c9c99384a 100644 --- a/test/data/database-test-cases.json +++ b/test/data/database-test-cases.json @@ -222,7 +222,8 @@ }, { "inputs": [ - {"maskedText": "○○○ナルド", "triggers": "○", "anchor": "ナルド", "matchType": "suffix", "usePattern": false} + {"maskedText": "○○○ナルド", "triggers": "○", "anchor": "ナルド", "matchType": "suffix", "usePattern": false}, + {"maskedText": "○○○ナルド", "triggers": "○", "anchor": "ナルド", "matchType": "suffix", "usePattern": true} ], "expectedResults": { "total": 1, From 57a2e1e9b2d7d246cff813f625dc9fce99f1defe Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 18:37:50 +0700 Subject: [PATCH 8/9] fix: Terminate fuseji span at the first non-word character --- ext/js/language/translator.js | 26 ++++++++++++++++---------- test/fuseji.test.js | 9 +++++++++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 536f7cda56..048ee8b0b6 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -18,7 +18,7 @@ import {safePerformance} from '../core/safe-performance.js'; import {applyTextReplacement} from '../general/regex-util.js'; -import {isCodePointJapanese} from './ja/japanese.js'; +import {isCodePointJapanese, isCodePointKana, isCodePointKanji} from './ja/japanese.js'; import {isCodePointKorean} from './ko/korean.js'; import {LanguageTransformer} from './language-transformer.js'; import {getAllLanguageReadingNormalizers, getAllLanguageTextProcessors} from './languages.js'; @@ -278,16 +278,22 @@ export class Translator { * @returns {{characters: string[], firstTriggerIndex: number, lastTriggerIndex: number}} -1 for indexes if not present. */ _getFusejiTriggerDetails(text, triggerSet) { - const characters = [...text]; + const allCharacters = [...text]; let firstTriggerIndex = -1; let lastTriggerIndex = -1; - for (let i = 0; i < characters.length; ++i) { - if (!triggerSet.has(characters[i])) { continue; } - if (firstTriggerIndex < 0) { - firstTriggerIndex = i; - } - lastTriggerIndex = i; - } + let end = 0; + for (; end < allCharacters.length; ++end) { + const character = allCharacters[end]; + if (triggerSet.has(character)) { + if (firstTriggerIndex < 0) { firstTriggerIndex = end; } + lastTriggerIndex = end; + continue; + } + const codePoint = character.codePointAt(0) ?? 0; + // stops at functuations etc + if (!isCodePointKana(codePoint) && !isCodePointKanji(codePoint)) { break; } + } + const characters = allCharacters.slice(0, end); return {characters, firstTriggerIndex, lastTriggerIndex}; } @@ -327,7 +333,7 @@ export class Translator { ({dictionaryEntries} = await this._findFusejiTermsForAnchor(prefixLookupText, 'prefix', options, tagAggregator, primaryReading, termKeyFilter, pattern)); } else { for (const lookupText of this._getFusejiSuffixLookupTexts(suffixLookupText)) { - ({dictionaryEntries} = await this._findFusejiTermsForAnchor(lookupText, 'suffix', options, tagAggregator, primaryReading, termKeyFilter, null)); + ({dictionaryEntries} = await this._findFusejiTermsForAnchor(lookupText, 'suffix', options, tagAggregator, primaryReading, termKeyFilter, pattern)); if (dictionaryEntries.length > 0) { break; } } } diff --git a/test/fuseji.test.js b/test/fuseji.test.js index 74f2b2d7f2..92fb49a9f0 100644 --- a/test/fuseji.test.js +++ b/test/fuseji.test.js @@ -132,4 +132,13 @@ describe('Fuseji lookup', () => { expect(originalTextLength).toBe(4); expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); }); + + translatorTest('bounds the masked word to the first group when a later masked word is in range', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', '〇ち込む」「マ○ド', options); + expect(originalTextLength).toBe(4); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); + }); }); From 2a09aec3e753ee1903b9ba3bd8b12cfbda2c5afb Mon Sep 17 00:00:00 2001 From: noatdk Date: Thu, 4 Jun 2026 19:51:52 +0700 Subject: [PATCH 9/9] fix: Patch regression for masked pattern involving latin characters --- ext/js/language/translator.js | 7 +++---- test/fuseji.test.js | 9 +++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index 048ee8b0b6..2f9d7a037b 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -18,7 +18,7 @@ import {safePerformance} from '../core/safe-performance.js'; import {applyTextReplacement} from '../general/regex-util.js'; -import {isCodePointJapanese, isCodePointKana, isCodePointKanji} from './ja/japanese.js'; +import {isCodePointJapanese} from './ja/japanese.js'; import {isCodePointKorean} from './ko/korean.js'; import {LanguageTransformer} from './language-transformer.js'; import {getAllLanguageReadingNormalizers, getAllLanguageTextProcessors} from './languages.js'; @@ -289,9 +289,8 @@ export class Translator { lastTriggerIndex = end; continue; } - const codePoint = character.codePointAt(0) ?? 0; - // stops at functuations etc - if (!isCodePointKana(codePoint) && !isCodePointKanji(codePoint)) { break; } + // stops at word delimitors (punctuation/space/symbol). + if (!/[\p{L}\p{N}\p{M}]/u.test(character)) { break; } } const characters = allCharacters.slice(0, end); return {characters, firstTriggerIndex, lastTriggerIndex}; diff --git a/test/fuseji.test.js b/test/fuseji.test.js index 92fb49a9f0..aa40d0cee7 100644 --- a/test/fuseji.test.js +++ b/test/fuseji.test.js @@ -133,6 +133,15 @@ describe('Fuseji lookup', () => { expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === '打ち込む'))).toBe(true); }); + translatorTest('matches masked latin terms', async ({translator}) => { + const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); + options.enableFusejiLookup = true; + + const {dictionaryEntries, originalTextLength} = await translator.findTerms('split', 'Eng○ish', options); + expect(originalTextLength).toBe(7); + expect(dictionaryEntries.some(({headwords}) => headwords.some(({term}) => term === 'English'))).toBe(true); + }); + translatorTest('bounds the masked word to the first group when a later masked word is in range', async ({translator}) => { const options = createFindTermsOptions(dictionaryName, optionsPresets, 'default'); options.enableFusejiLookup = true;