Skip to content

Commit

Permalink
Add title argument to rehypeAddDataId and enhance source_texts tests (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ttizze authored Dec 16, 2024
2 parents 0c94e23 + 55ed8d7 commit 6c1effe
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { prisma } from "~/utils/prisma";
import { generateHashForText } from "../utils/generateHashForText";

export async function upsertPageWithHtml(
pageSlug: string,
Expand All @@ -24,26 +23,10 @@ export async function upsertPageWithHtml(
export async function upsertTitle(pageSlug: string, title: string) {
const page = await prisma.page.findUnique({ where: { slug: pageSlug } });
if (!page) return;
const titleHash = generateHashForText(title, 0);
await prisma.page.update({
where: { id: page.id },
data: { title: title },
});
return await prisma.sourceText.upsert({
where: {
pageId_textAndOccurrenceHash: {
pageId: page.id,
textAndOccurrenceHash: titleHash,
},
},
update: { text: title },
create: {
pageId: page.id,
textAndOccurrenceHash: titleHash,
text: title,
number: 0,
},
});
}

export async function upsertTags(tags: string[], pageId: number) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,68 @@ describe("processHtmlContent", () => {
// 各タイトル occurrence が異なる ID を持つことを確認
expect(titleOccurrences[0].id).not.toBe(titleOccurrences[1].id);
});

test("同一HTMLを再度処理した場合に、編集していない箇所のsource_textsが維持されるか確認", async () => {
const pageSlug = "html-no-edit-test-page";
const title = "No Edit Title";
const htmlInput = `
<p>Line A</p>
<p>Line B</p>
<p>Line C</p>
`;

const user = await prisma.user.upsert({
where: { id: 13 },
create: {
id: 13,
userName: "noedit",
email: "[email protected]",
displayName: "noedit",
icon: "noedit",
},
update: {},
});

// 初回処理
await processHtmlContent(title, htmlInput, pageSlug, user.id, "en", true);

const dbPage1 = await prisma.page.findUnique({
where: { slug: pageSlug },
include: { sourceTexts: true },
});
expect(dbPage1).not.toBeNull();
if (!dbPage1) return;

// 初回処理時のIDを記憶
const originalTextIdMap = new Map<string, number>();
for (const st of dbPage1.sourceTexts) {
originalTextIdMap.set(st.text, st.id);
}
expect(originalTextIdMap.size).toBeGreaterThanOrEqual(3);

// 変更なしで再度同一HTMLを処理
await processHtmlContent(title, htmlInput, pageSlug, user.id, "en", true);

const dbPage2 = await prisma.page.findUnique({
where: { slug: pageSlug },
include: { sourceTexts: true },
});
expect(dbPage2).not.toBeNull();
if (!dbPage2) return;

// 再処理後のIDマッピングを取得
const afterTextIdMap = new Map<string, number>();
for (const st of dbPage2.sourceTexts) {
afterTextIdMap.set(st.text, st.id);
}

// 全てのテキストでIDが変わっていないことを確認
for (const [text, originalId] of originalTextIdMap.entries()) {
console.log(text, originalId);
expect(afterTextIdMap.get(text)).toBe(originalId);
}

// source_textsの数が増減していないこと(無駄な消去がないこと)
expect(dbPage2.sourceTexts.length).toBe(dbPage1.sourceTexts.length);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ function extractTextFromHAST(node: Parent): string {
});
return result;
}
export function rehypeAddDataId(pageId: number): Plugin<[], Root> {
export function rehypeAddDataId(
pageId: number,
title: string,
): Plugin<[], Root> {
return function attacher() {
return async (tree: Root, file: VFile) => {
const textOccurrenceMap = new Map<string, number>();
Expand Down Expand Up @@ -80,6 +83,12 @@ export function rehypeAddDataId(pageId: number): Plugin<[], Root> {
number: index + 1,
}));

allTextsForDb.push({
text: title,
textAndOccurrenceHash: generateHashForText(title, 0),
number: 0,
});

const hashToId = await synchronizePageSourceTexts(pageId, allTextsForDb);

// 各ブロック要素を<span data-id="...">で子要素全体を包む
Expand Down Expand Up @@ -127,7 +136,7 @@ export async function processHtmlContent(
.use(rehypeRemark) // HAST→MDAST
.use(remarkGfm) // GFM拡張
.use(remarkRehype, { allowDangerousHtml: true }) // MDAST→HAST
.use(rehypeAddDataId(page.id))
.use(rehypeAddDataId(page.id, title))
.use(rehypeRaw) // 生HTMLを処理
.use(rehypeStringify, { allowDangerousHtml: true }) // HAST→HTML
.process(htmlInput);
Expand Down
2 changes: 1 addition & 1 deletion web/scripts/processMarkdownContent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export async function processMarkdownContent(
const file = await remark()
.use(remarkGfm)
.use(remarkRehype)
.use(rehypeAddDataId(page.id))
.use(rehypeAddDataId(page.id, title))
.use(rehypeRaw)
.use(rehypeStringify, { allowDangerousHtml: true })
.process(body);
Expand Down

0 comments on commit 6c1effe

Please sign in to comment.