Skip to content

Commit 4924853

Browse files
authored
Merge pull request #175 from Dyalog/174-chm-search-listing-names
Fix CHM search listing names showing as "unknown" Closes #174
2 parents 78e8db8 + 4affbf3 commit 4924853

File tree

1 file changed

+56
-13
lines changed

1 file changed

+56
-13
lines changed

Diff for: chm/mkdocs2chm.py

+56-13
Original file line numberDiff line numberDiff line change
@@ -381,12 +381,14 @@ def convert_to_html(filenames: List[str], css: str, macros: dict, transforms: Li
381381
"""
382382
converted: List[str] = []
383383

384-
head = """
384+
head_template = """
385385
<!DOCTYPE html>
386386
<html lang="en">
387387
<head>
388388
<meta charset="UTF-8">
389+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
389390
<meta name="viewport" content="width=device-width, initial-scale=1.0">
391+
<title>{title}</title>
390392
"""
391393

392394
converted: List[str] = []
@@ -436,6 +438,22 @@ def convert_to_html(filenames: List[str], css: str, macros: dict, transforms: Li
436438

437439
body = fix_links_html(body)
438440

441+
# Extract the H1 content for use in the title tag, setting for_title=True
442+
# to only extract the name part (excluding command span)
443+
title = extract_h1(body, for_title=True)
444+
445+
# Use a default title if no H1 is found
446+
if not title:
447+
# Use the filename without extension as a fallback title
448+
title = os.path.splitext(os.path.basename(file))[0].replace('_', ' ').title()
449+
450+
# For special files like welcome.md, use a more appropriate title
451+
if file.endswith('welcome.md'):
452+
title = "Welcome to Dyalog APL"
453+
454+
# Format the head with the title
455+
head = head_template.format(title=title)
456+
439457
# Optimise CSS specifically for this page: only use selectors referring to
440458
# ids, classes and tags on the actual page.
441459
optimised_css = purge_css(css, body)
@@ -508,21 +526,25 @@ def static_assets(src_dir='assets', project='project') -> Tuple[List[str], str,
508526
return assets, css, css_files
509527

510528

511-
def extract_h1(data: str) -> str:
529+
def extract_h1(data: str, for_title: bool = False) -> str:
512530
"""
513-
Some files will have a raw HTML <h1> for styling reasons.
531+
Extract text from the first h1 tag, handling special styling with spans.
514532
"""
515533
soup = BeautifulSoup(data, 'html.parser')
516534
if h1 := soup.find('h1'):
517535
if name_span := h1.find('span', class_='name'):
536+
# If for_title is True, return only the name
537+
if for_title:
538+
return name_span.get_text().strip().replace('"', '').replace('`', '')
539+
540+
# Otherwise, return name + command if command exists
518541
command_span = h1.find('span', class_='command')
519-
name = name_span.get_text() if name_span else None
520-
command = command_span.get_text() if command_span else None
521-
if name and command:
522-
return f"{name} {command}".strip()
523-
elif name:
524-
return name.strip().replace('"', '').replace('`', '')
542+
if command_span:
543+
return f"{name_span.get_text()} {command_span.get_text()}".strip()
544+
else:
545+
return name_span.get_text().strip().replace('"', '').replace('`', '')
525546
else:
547+
# No special spans, just return the full h1 text
526548
return h1.get_text().strip().replace('"', '').replace('`', '')
527549
return ''
528550

@@ -612,7 +634,7 @@ def find_nav_files_and_dirs(filename: str, remove: List[str]) -> Tuple[List[str]
612634
return included_dirs, standalone_files
613635

614636

615-
def generate_hfp(project: str, chmfile: str, files: List[str], images: List[str], assets: List[str], title: str) -> None:
637+
def generate_hfp(project: str, chmfile: str, files: List[str], images: List[str], assets: List[str], title: str, codepage: int = 65001) -> None:
616638
"""
617639
Generates a project .hfp file for the chmcmd chm compiler, listing all .htm-files
618640
in the directory tree, plus the _index.hhk and _table_of_contents.hhc files.
@@ -634,7 +656,7 @@ def generate_hfp(project: str, chmfile: str, files: List[str], images: List[str]
634656
cfg.appendChild(filegroup)
635657

636658
count = doc.createElement("Count")
637-
count.setAttribute("Value", str(len(files) + len(images)))
659+
count.setAttribute("Value", str(len(files) + len(images) + len(assets)))
638660
filegroup.appendChild(count)
639661

640662
fcount = 0
@@ -665,7 +687,7 @@ def generate_hfp(project: str, chmfile: str, files: List[str], images: List[str]
665687
settings.appendChild(searchable)
666688

667689
if start_page is None:
668-
start_page = 'index.htm'
690+
start_page = 'welcome.htm'
669691
dflt = doc.createElement("DefaultPage")
670692
dflt.setAttribute("Value", start_page)
671693
settings.appendChild(dflt)
@@ -681,6 +703,25 @@ def generate_hfp(project: str, chmfile: str, files: List[str], images: List[str]
681703
fnt = doc.createElement("DefaultFont")
682704
fnt.setAttribute("Value", "")
683705
settings.appendChild(fnt)
706+
707+
# Add CodePage setting
708+
cp = doc.createElement("CodePage")
709+
cp.setAttribute("Value", str(codepage))
710+
settings.appendChild(cp)
711+
712+
# Hard-coded Language ID for UK English (0x0809)
713+
lang = doc.createElement("Language")
714+
lang.setAttribute("Value", "0x0809")
715+
settings.appendChild(lang)
716+
717+
# Add additional CHM-specific settings that might help with search
718+
binary_index = doc.createElement("BinaryIndex")
719+
binary_index.setAttribute("Value", "True")
720+
settings.appendChild(binary_index)
721+
722+
full_text_search = doc.createElement("FullTextSearch")
723+
full_text_search.setAttribute("Value", "True")
724+
settings.appendChild(full_text_search)
684725

685726
with open(outfile, 'w', encoding="utf-8") as f:
686727
f.write(doc.toprettyxml(indent=" "))
@@ -877,6 +918,8 @@ def filter_unused_images(md_files: List[str], css_files: List[str], image_files:
877918
parser.add_argument('--config', type=str, help='JSON config file for additional options')
878919
parser.add_argument('--git-info', type=str, help='Git branch and commit info')
879920
parser.add_argument('--build-date', type=str, help='Build date')
921+
parser.add_argument('--codepage', type=int, default=65001, help='CodePage to use (default: 65001 for UTF-8)')
922+
880923

881924
args = parser.parse_args()
882925

@@ -961,7 +1004,7 @@ def filter_unused_images(md_files: List[str], css_files: List[str], image_files:
9611004

9621005
# Generate the CHM project config file
9631006
chm_name = "dyalog.chm"
964-
generate_hfp(args.project_dir, chm_name, html_files, copied_images, assets, title=f'Dyalog version {version}')
1007+
generate_hfp(args.project_dir, chm_name, html_files, copied_images, assets, title=f'Dyalog version {version}', codepage=args.codepage)
9651008

9661009
# Run the compiler
9671010
output = Popen(['chmcmd', 'dyalog.hfp'], cwd=args.project_dir)

0 commit comments

Comments
 (0)