Skip to content

Commit b284d61

Browse files
committed
add a script to copy the book sources across
1 parent cb28f68 commit b284d61

File tree

3 files changed

+188
-0
lines changed

3 files changed

+188
-0
lines changed

Diff for: Makefile

+7
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,10 @@ build:
66

77
watch-build:
88
ls **/*.md **/*.html *.py | entr ./generate-html.py
9+
10+
update-book:
11+
cd book && make html
12+
./prep-book-html.py
13+
rsync -a -v book/images _site/book/images/
14+
15+

Diff for: prep-book-html.py

+179
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#!/usr/bin/env python
2+
from dataclasses import dataclass
3+
from pathlib import Path
4+
import json
5+
from lxml import html
6+
import subprocess
7+
8+
DEST = Path('_site/book')
9+
10+
CHAPTERS = [
11+
c.replace('.asciidoc', '.html')
12+
for c in json.loads(Path('book/atlas.json').read_text())['files']
13+
if c.partition('.')[0] not in [
14+
'cover',
15+
'titlepage',
16+
'copyright',
17+
'toc',
18+
'ix',
19+
'author_bio',
20+
'colo',
21+
]
22+
]
23+
24+
25+
26+
27+
def parse_chapters():
28+
for chapter in CHAPTERS:
29+
path = Path('book') / chapter
30+
yield chapter, html.fromstring(path.read_text())
31+
32+
33+
def get_anchor_targets(parsed_html):
34+
ignores = {'header', 'content', 'footnotes', 'footer', 'footer-text'}
35+
all_ids = [
36+
a.get('id') for a in parsed_html.cssselect('*[id]')
37+
]
38+
return [i for i in all_ids if not i.startswith('_') and i not in ignores]
39+
40+
@dataclass
41+
class ChapterInfo:
42+
href_id: str
43+
chapter_title: str
44+
subheaders: list
45+
xrefs: list
46+
47+
48+
def get_chapter_info():
49+
chapter_info = {}
50+
appendix_numbers = list('ABCDEFGHIJKL')
51+
chapter_numbers = list(range(1, 100))
52+
part_numbers = list(range(1, 10))
53+
54+
for chapter, parsed_html in parse_chapters():
55+
print('getting info from', chapter)
56+
57+
if not parsed_html.cssselect('h2'):
58+
header = parsed_html.cssselect('h1')[0]
59+
else:
60+
header = parsed_html.cssselect('h2')[0]
61+
href_id = header.get('id')
62+
if href_id is None:
63+
href_id = parsed_html.cssselect('body')[0].get('id')
64+
subheaders = [h.get('id') for h in parsed_html.cssselect('h3')]
65+
66+
chapter_title = header.text_content()
67+
chapter_title = chapter_title.replace('Appendix A: ', '')
68+
69+
if chapter.startswith('chapter_'):
70+
chapter_no = chapter_numbers.pop(0)
71+
chapter_title = f'Chapter {chapter_no}: {chapter_title}'
72+
73+
if chapter.startswith('appendix_'):
74+
appendix_no = appendix_numbers.pop(0)
75+
chapter_title = f'Appendix {appendix_no}: {chapter_title}'
76+
77+
if chapter.startswith('part'):
78+
part_no = part_numbers.pop(0)
79+
chapter_title = f'Part {part_no}: {chapter_title}'
80+
81+
if chapter.startswith('epilogue'):
82+
chapter_title = f'Epilogue: {chapter_title}'
83+
84+
xrefs = get_anchor_targets(parsed_html)
85+
chapter_info[chapter] = ChapterInfo(href_id, chapter_title, subheaders, xrefs)
86+
87+
return chapter_info
88+
89+
90+
def fix_xrefs(contents, chapter, chapter_info):
91+
parsed = html.fromstring(contents)
92+
links = parsed.cssselect('a[href^=\#]')
93+
for link in links:
94+
for other_chap in CHAPTERS:
95+
if other_chap == chapter:
96+
continue
97+
chapter_id = chapter_info[other_chap].href_id
98+
href = link.get('href')
99+
targets = ['#' + x for x in chapter_info[other_chap].xrefs]
100+
if href == '#' + chapter_id:
101+
link.set('href', f'/book/{other_chap}')
102+
elif href in targets:
103+
link.set('href', f'/book/{other_chap}{href}')
104+
105+
return html.tostring(parsed)
106+
107+
108+
def fix_title(contents, chapter, chapter_info):
109+
parsed = html.fromstring(contents)
110+
titles = parsed.cssselect('h2')
111+
if titles and titles[0].text.startswith('Appendix A'):
112+
title = titles[0]
113+
title.text = chapter_info[chapter].chapter_title
114+
return html.tostring(parsed)
115+
116+
def copy_chapters_across_with_fixes(chapter_info, fixed_toc):
117+
# comments_html = open('disqus_comments.html').read()
118+
# buy_book_div = html.fromstring(open('buy_the_book_banner.html').read())
119+
# analytics_div = html.fromstring(open('analytics.html').read())
120+
# load_toc_script = open('load_toc.js').read()
121+
122+
for chapter in CHAPTERS:
123+
old_contents = Path(f'book/{chapter}').read_text()
124+
new_contents = fix_xrefs(old_contents, chapter, chapter_info)
125+
new_contents = fix_title(new_contents, chapter, chapter_info)
126+
parsed = html.fromstring(new_contents)
127+
body = parsed.cssselect('body')[0]
128+
if header := parsed.cssselect('#header'):
129+
# head = parsed.cssselect('head')[0]
130+
# head.append(html.fragment_fromstring('<script>' + load_toc_script + '</script>'))
131+
body.set('class', 'article toc2 toc-left')
132+
header[0].append(fixed_toc)
133+
# body.insert(0, buy_book_div)
134+
# body.append(html.fromstring(
135+
# comments_html.replace('CHAPTER_NAME', chapter.split('.')[0])
136+
# ))
137+
# body.append(analytics_div)
138+
fixed_contents = html.tostring(parsed)
139+
target = DEST / chapter
140+
print('writing', target)
141+
target.write_bytes(fixed_contents)
142+
143+
144+
145+
def extract_toc_from_book():
146+
parsed = html.fromstring(Path('book/book.html').read_text())
147+
return parsed.cssselect('#toc')[0]
148+
149+
150+
151+
def fix_toc(toc, chapter_info):
152+
href_mappings = {}
153+
for chapter in CHAPTERS:
154+
chap = chapter_info[chapter]
155+
if chap.href_id:
156+
href_mappings['#' + chap.href_id] = f'/book/{chapter}'
157+
for subheader in chap.subheaders:
158+
href_mappings['#' + subheader] = f'/book/{chapter}#{subheader}'
159+
160+
def fix_link(href):
161+
if href in href_mappings:
162+
return href_mappings[href]
163+
return href
164+
165+
toc.rewrite_links(fix_link)
166+
toc.set('class', 'toc2')
167+
return toc
168+
169+
170+
def main():
171+
toc = extract_toc_from_book()
172+
chapter_info = get_chapter_info()
173+
fixed_toc = fix_toc(toc, chapter_info)
174+
copy_chapters_across_with_fixes(chapter_info, fixed_toc)
175+
rsync_images()
176+
177+
178+
if __name__ == '__main__':
179+
main()

Diff for: requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
markdown
22
jinja2
3+
pygments
4+
lxml[cssselect]

0 commit comments

Comments
 (0)