-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_docs.py
166 lines (137 loc) · 5.55 KB
/
generate_docs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
##############################################################################################################################
# author: @mattische #
# Hej! Masse här! #
# detta script är en del av workflow/gihub action som genererar filer när jag pushar/uppdaterar innehållet i index.md #
##############################################################################################################################
import os
import re
from markdown2 import markdown
from docx import Document
from fpdf import FPDF
from bs4 import BeautifulSoup
from weasyprint import HTML
INDEX_FILE = "index.md"
README_FILE = "README.md"
OUTPUT_DIR = "files"
OUTPUT_PREFIX = "MattiasSchertell"
def create_output_dir():
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
def read_markdown_file(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
return file.read()
def write_file(filepath, content):
with open(filepath, 'w', encoding='utf-8') as file:
file.write(content)
# från md till html
def markdown_to_html(md_content):
return markdown(md_content)
# från md till html - ink bootstrap
def generate_html(md_content):
html_content = markdown_to_html_with_template(md_content)
html_file = os.path.join(OUTPUT_DIR, "index.html")
with open(html_file, "w", encoding="utf-8") as file:
file.write(html_content)
# inkludera bootstrap cdn
def markdown_to_html_with_template(md_content):
# Konvertera Markdown till HTML
body_content = markdown(md_content)
soup = BeautifulSoup(body_content, "html.parser")
# tabeller
for table in soup.find_all("table"):
table["class"] = "table table-striped table-bordered"
table["style"] = "margin-top: 1rem; width: 100%;"
html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Mattias Schertell CV</title>
<!-- Bootstrap -->
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-rbsA2VBKQ9AR+I9AYhkfxQjCfSKV9VV2i59lFWEL8BnhE9r5qD65VohKp35uEF5e"
crossorigin="anonymous">
<style>
body {{
padding: 2rem;
font-family: Arial, sans-serif;
}}
h1, h2, h3 {{
margin-top: 1.5rem;
margin-bottom: 1rem;
}}
p {{
margin-bottom: 1rem;
}}
</style>
</head>
<body>
<div class="container">
{soup}
</div>
<!-- Bootstrap -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"
integrity="sha384-OERcA2GHZg6UAAETMcFp2xn5p6b9BjNx16bX4R7aB9QdEWw5mCXK5kFnfNIKqM+A"
crossorigin="anonymous"></script>
</body>
</html>
"""
return html_template
# från html till docx
def html_to_docx(html_content, output_path):
doc = Document()
soup = BeautifulSoup(html_content, "html.parser")
for element in soup.descendants:
if element.name == "h1":
doc.add_heading(element.text, level=1)
elif element.name == "h2":
doc.add_heading(element.text, level=2)
elif element.name == "h3":
doc.add_heading(element.text, level=3)
elif element.name == "strong":
paragraph = doc.add_paragraph()
run = paragraph.add_run(element.text)
run.bold = True # Ställ in fetstil
elif element.name == "ul":
for li in element.find_all("li"):
doc.add_paragraph(li.text, style="List Bullet")
elif element.name == "ol":
for li in element.find_all("li"):
doc.add_paragraph(li.text, style="List Number")
elif element.name == "table":
rows = element.find_all("tr")
if rows:
table = doc.add_table(rows=len(rows), cols=len(rows[0].find_all(["td", "th"])))
for i, row in enumerate(rows):
for j, cell in enumerate(row.find_all(["td", "th"])):
table.rows[i].cells[j].text = cell.get_text(strip=True)
elif element.name == "p":
doc.add_paragraph(element.text)
doc.save(output_path)
# från html till pdf
def html_to_pdf(html_content, output_path):
HTML(string=html_content).write_pdf(output_path)
# Ta bort tecken som inte stöds av "latin-1" (Unicode problem)
def sanitize_content(content):
return re.sub(r'[^\x00-\xFF]', '', content)
# kopiera till README.md
def update_readme(md_content):
write_file(README_FILE, md_content)
def main():
# Skapa output mapp
create_output_dir()
md_content = read_markdown_file(INDEX_FILE)
# md till html
html_content = markdown_to_html_with_template(md_content)
html_path = os.path.join(OUTPUT_DIR, OUTPUT_PREFIX + ".html")
write_file(html_path, html_content)
# docx, pdf från html
docx_path = os.path.join(OUTPUT_DIR, OUTPUT_PREFIX + ".docx")
pdf_path = os.path.join(OUTPUT_DIR, OUTPUT_PREFIX + ".pdf")
html_to_docx(html_content, docx_path)
html_to_pdf(html_content, pdf_path)
# Uppdatera README.md
update_readme(md_content)
if __name__ == "__main__":
main()