Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 38 additions & 9 deletions convert/convertBooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { queries, postQueries, freeze } from '../sab-proskomma-tools';
import { convertMarkdownsToMilestones } from './convertMarkdown';
import { verifyGlossaryEntries } from './verifyGlossaryEntries';
import { hasAudioExtension, hasImageExtension } from './stringUtils';
import { convertStorybookElements } from './storybook';

/**
* Loops through bookCollections property of configData.
Expand All @@ -22,10 +23,16 @@ function replaceVideoTags(text: string, _bcId: string, _bookId: string): string
return text.replace(/\\video (.*)/g, '\\zvideo-s |id="$1"\\*\\zvideo-e\\*');
}

// This is the start of supporting story books, but it still fails if there is no chapter.
function replacePageTags(text: string, _bcId: string, _bookId: string): string {
return text.replace(/\\page (.*)/g, '\\zpage-s |id="$1"\\*\\zpage-e\\*');
/**
* Replace the USFM book ID with the given book ID.
*
* While uncommon, it is possible to use the same USFM for multiple books.
* In this case, we must use the unique ID specified in config.
*/
function replaceId(text: string, _bcId: string, bookId: string): string {
return text.replace(/\\id \w+/, `\\id ${bookId}`);
}

function loadGlossary(collection: any, dataDir: string): string[] {
const glossary: string[] = [];
for (const book of collection.books) {
Expand Down Expand Up @@ -101,16 +108,23 @@ function isImageMissing(imageSource: string): boolean {
const filterFunctions: ((text: string, bcId: string, bookId: string) => string)[] = [
removeStrongNumberReferences,
replaceVideoTags,
replacePageTags,
convertMarkdownsToMilestones,
removeMissingFigures
removeMissingFigures,
replaceId
];

function applyFilters(text: string, bcId: string, bookId: string): string {
function applyFilters(text: string, bcId: string, bookId: string, bookType?: string): string {
let filteredText = text;
for (const filterFn of filterFunctions) {
filteredText = filterFn(filteredText, bcId, bookId);
}
if (bookType === 'story') {
filteredText = convertStorybookElements(filteredText);
}
// Debugging
// if (bcId == 'C01') {
// console.log(filteredText.slice(0, 1000));
// }
return filteredText;
}

Expand Down Expand Up @@ -204,7 +218,6 @@ export async function convertBooks(
for (const book of collection.books) {
let bookConverted = false;
switch (book.type) {
case 'story':
case 'songs':
case 'audio-only':
case 'bloom-player':
Expand Down Expand Up @@ -490,7 +503,7 @@ function convertScriptureBook(
function processBookContent(resolve: () => void, err: any, content: string) {
//process.stdout.write(`processBookContent: bookId:${book.id}, error:${err}\n`);
if (err) throw err;
content = applyFilters(content, context.bcId, book.id);
content = applyFilters(content, context.bcId, book.id, book.type);
if (context.configData.traits['has-glossary']) {
content = verifyGlossaryEntries(content, bcGlossary);
}
Expand Down Expand Up @@ -567,7 +580,23 @@ function convertScriptureBook(
fileContents.push(fs.readFileSync(filePath, 'utf-8'));
});

processBookContent(resolve, null, fileContents.join(''));
// Collect the file contents into a single document
let usfm: string;

if (book.type == 'story') {
// The first file contains meta-content (id, title, etc)
usfm = fileContents[0];

// Subsequent files represent storybook pages.
// SAB deletes the \page tags. Replace them with chapter tags.
for (let i = 1; i < fileContents.length; i++) {
usfm += `\\c ${i} ${fileContents[i]}`;
}
} else {
usfm = fileContents.join('');
}

processBookContent(resolve, null, usfm);
}
})
);
Expand Down
136 changes: 113 additions & 23 deletions convert/convertConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ type BookCollectionAudio = {
timingFile: string;
};

type StorybookImage = {
page: string;
filename: string;
// TODO: Add motion parameters
};

type Style = {
font: string;
textSize: number;
Expand Down Expand Up @@ -49,6 +55,7 @@ export type Book = {
audio: BookCollectionAudio[];
features: any;
quizFeatures?: any;
storybookImages?: StorybookImage[];
footer?: HTML;
style?: Style;
styles?: {
Expand Down Expand Up @@ -419,6 +426,109 @@ function convertCollectionFooter(collectionTag: Element, document: Document) {
return footer;
}

function shortenBookCode(id: string, allIds: string[]): string | null {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed. Proskomma was changed to handle longer book code.

const short = id.replace(/^(\w)0(\d\d)$/, '$1$2');
return id === short || allIds.includes(short) ? null : short;
}

function lengthenBookCode(id: string, allIds: string[]): string | null {
if (id.length === 1) {
id = '00' + id;
} else if (id.length === 2) {
id = '0' + id;
}
return allIds.includes(id) ? null : id;
}

function convertBookCodes(books: Element[]) {
for (const bk of books) {
bk.setAttribute('fullId', bk.id);
const ids = books.map((b) => b.id);
const shortened = shortenBookCode(bk.id, ids);
const lengthened = lengthenBookCode(bk.id, ids);
if (shortened) {
console.log(` shortening book code: ${bk.id} => ${shortened}`);
bk.id = shortened;
} else if (lengthened) {
console.log(` lengthening book code: ${bk.id} => ${lengthened}`);
bk.id = lengthened;
}
}
checkBookCodes(books);
}

function checkBookCodes(books: Element[]) {
const invalid = books.map((b) => b.id).filter((id) => id.length !== 3);
if (invalid.length) {
console.log(
'\n WARNING: The following book codes are not 3 characters. Some may not load properly:'
);
console.log(` ${invalid.join(' ')}`);
}
}

function getBookAudio(book: Element, verbose: number) {
const audio: BookCollectionAudio[] = [];
for (const page of book.getElementsByTagName('page')) {
if (verbose >= 2) console.log(`.. page: ${page.attributes[0].value}`);
const audioTag = page.getElementsByTagName('audio')[0];
if (!audioTag) continue;
if (audioTag.attributes.getNamedItem('background')?.value === 'continue') {
// Happens when a storybook uses a single audio file for multiple pages.
// TODO: Implement this feature
continue;
}
const fTag = audioTag.getElementsByTagName('f')[0];
if (verbose >= 2)
console.log(`... audioTag: ${audioTag.outerHTML}, fTag:${fTag.outerHTML}`);
audio.push({
num: parseInt(page.attributes.getNamedItem('num')!.value),
filename: fTag.innerHTML,
len: fTag.hasAttribute('len')
? parseInt(fTag.attributes.getNamedItem('len')!.value)
: undefined,
size: fTag.hasAttribute('size')
? parseInt(fTag.attributes.getNamedItem('size')!.value)
: undefined,
src: fTag.attributes.getNamedItem('src')!.value,
timingFile: audioTag.getElementsByTagName('y')[0]?.innerHTML
});
if (verbose >= 3) console.log(`.... audio: `, JSON.stringify(audio[0]));
}
return audio;
}

function imageFromPage(
page: Element,
collection: string,
book: string,
imageFiles: string[]
): StorybookImage | null {
const filenameElement = page.getElementsByTagName('image-filename')[0];

// In testing, the image filename took one of the following two forms
const filename1 = filenameElement?.textContent;
const filename2 = `${collection}-${book}-${filename1}`;
const file = imageFiles.find((f) => [filename1, filename2].includes(f));

const num = page.getAttribute('num');
return file && num
? {
filename: file,
page: num
}
: null;
}

function getStorybookImages(book: Element, collection: string, dataDir: string): StorybookImage[] {
const id = book.getAttribute('fullId') ?? book.id;
const pages = Array.from(book.getElementsByTagName('page'));
const imageFiles = readdirSync(path.join(dataDir, 'illustrations'));
return pages
.map((page) => imageFromPage(page, collection, id, imageFiles))
.filter((image) => image) as StorybookImage[];
}

function convertConfig(dataDir: string, verbose: number) {
const dom = new jsdom.JSDOM(readFileSync(path.join(dataDir, 'appdef.xml')).toString(), {
contentType: 'text/xml'
Expand Down Expand Up @@ -578,30 +688,9 @@ function convertConfig(dataDir: string, verbose: number) {
}
const books: BookCollection['books'] = [];
const bookTags = tag.getElementsByTagName('book');
convertBookCodes(Array.from(bookTags));
for (const book of bookTags) {
if (verbose >= 2) console.log(`. book: ${book.id}`);
const audio: BookCollectionAudio[] = [];
for (const page of book.getElementsByTagName('page')) {
if (verbose >= 2) console.log(`.. page: ${page.attributes[0].value}`);
const audioTag = page.getElementsByTagName('audio')[0];
if (!audioTag) continue;
const fTag = audioTag.getElementsByTagName('f')[0];
if (verbose >= 2)
console.log(`... audioTag: ${audioTag.outerHTML}, fTag:${fTag.outerHTML}`);
audio.push({
num: parseInt(page.attributes.getNamedItem('num')!.value),
filename: fTag.innerHTML,
len: fTag.hasAttribute('len')
? parseInt(fTag.attributes.getNamedItem('len')!.value)
: undefined,
size: fTag.hasAttribute('size')
? parseInt(fTag.attributes.getNamedItem('size')!.value)
: undefined,
src: fTag.attributes.getNamedItem('src')!.value,
timingFile: audioTag.getElementsByTagName('y')[0]?.innerHTML
});
if (verbose >= 3) console.log(`.... audio: `, JSON.stringify(audio[0]));
}
const bookFeaturesTag = book
.querySelector('features[type=book]')
?.getElementsByTagName('e');
Expand Down Expand Up @@ -658,7 +747,8 @@ function convertConfig(dataDir: string, verbose: number) {
section: book.getElementsByTagName('sg')[0]?.innerHTML,
testament: book.getElementsByTagName('g')[0]?.innerHTML,
abbreviation: book.getElementsByTagName('v')[0]?.innerHTML,
audio,
audio: getBookAudio(book, verbose),
storybookImages: getStorybookImages(book, tag.id, dataDir),
file: book.getElementsByTagName('f')[0]?.innerHTML.replace(/\.\w*$/, '.usfm'),
features: bookFeatures,
quizFeatures,
Expand Down
Loading