17
17
// Script logic
18
18
////////////////
19
19
20
+ // Check we have required options
20
21
if (empty ($ bookSlug ) || empty ($ outFolder )) {
21
22
errorOut ("Both a book slug and output folder must be provided " );
22
23
}
23
24
25
+ // Create the output folder if it does not exist
24
26
if (!is_dir ($ outFolder )) {
25
27
mkdir ($ outFolder , 0777 , true );
26
28
}
27
29
30
+ // Get full output directory and book details
28
31
$ outDir = realpath ($ outFolder );
29
32
$ book = getBookBySlug ($ bookSlug );
30
33
34
+ // Error out if we don't have a book
31
35
if (is_null ($ book )) {
32
36
errorOut ("Could not find book with the URL slug: {$ bookSlug }" );
33
37
}
34
38
39
+ // Get all chapters and pages within the book
35
40
$ chapters = getAllOfAtListEndpoint ("api/chapters " , ['filter[book_id] ' => $ book ['id ' ]]);
36
41
$ pages = getAllOfAtListEndpoint ("api/pages " , ['filter[book_id] ' => $ book ['id ' ]]);
37
42
43
+ // Get the full content for each page
38
44
foreach ($ pages as $ index => $ page ) {
39
45
$ pages [$ index ] = apiGetJson ("api/pages/ {$ page ['id ' ]}" );
40
46
}
41
47
48
+ // Create the image output directory
42
49
if (!is_dir ($ outDir . "/images " )) {
43
50
mkdir ($ outDir . "/images " , 0777 , true );
44
51
}
45
52
53
+ // Find the pages that are not within a chapter
46
54
$ directBookPages = array_filter ($ pages , function ($ page ) {
47
55
return empty ($ page ['chapter_id ' ]);
48
56
});
49
57
50
58
// Create book index file
51
- $ bookIndex = getBookContent ($ book , $ chapters , $ directBookPages );
59
+ $ bookIndex = getBookHtmlOutput ($ book , $ chapters , $ directBookPages );
52
60
file_put_contents ($ outDir . "/index.html " , $ bookIndex );
53
61
62
+ // Create a HTML file for each chapter
63
+ // in addition to each page within those chapters
54
64
foreach ($ chapters as $ chapter ) {
55
65
$ childPages = array_filter ($ pages , function ($ page ) use ($ chapter ) {
56
66
return $ page ['chapter_id ' ] == $ chapter ['id ' ];
57
67
});
58
- $ chapterPage = getChapterContent ($ chapter , $ childPages );
68
+ $ chapterPage = getChapterHtmlOutput ($ chapter , $ childPages );
59
69
file_put_contents ($ outDir . "/chapter- {$ chapter ['slug ' ]}.html " , $ chapterPage );
60
70
61
71
foreach ($ childPages as $ childPage ) {
62
- $ childPageContent = getPageContent ($ childPage , $ chapter );
72
+ $ childPageContent = getPageHtmlOutput ($ childPage , $ chapter );
63
73
$ childPageContent = extractImagesFromHtml ($ childPageContent );
64
74
file_put_contents ($ outDir . "/page- {$ childPage ['slug ' ]}.html " , $ childPageContent );
65
75
}
66
76
}
67
77
78
+ // Create a file for each direct child book page
68
79
foreach ($ directBookPages as $ directPage ) {
69
- $ directPageContent = getPageContent ($ directPage , null );
80
+ $ directPageContent = getPageHtmlOutput ($ directPage , null );
70
81
$ directPageContent = extractImagesFromHtml ($ directPageContent );
71
82
file_put_contents ($ outDir . "/page- {$ directPage ['slug ' ]}.html " , $ directPageContent );
72
83
}
73
84
85
+ /**
86
+ * Scan the given HTML for image URL's and extract those images
87
+ * to save them locally and update the HTML references to point
88
+ * to the local files.
89
+ */
74
90
function extractImagesFromHtml (string $ html ): string {
75
91
global $ outDir ;
92
+ static $ savedImages = [];
76
93
$ matches = [];
77
94
preg_match_all ('/<img.*?src=[" \'](.*?)[ \'"].*?>/i ' , $ html , $ matches );
78
95
foreach (array_unique ($ matches [1 ] ?? []) as $ url ) {
79
- $ image = file_get_contents ($ url );
96
+ $ image = getImageFile ($ url );
97
+ if ($ image === false ) {
98
+ continue ;
99
+ }
100
+
80
101
$ name = basename ($ url );
81
102
$ fileName = $ name ;
82
103
$ count = 1 ;
83
- while (file_exists ( $ outDir . " /images/ " . $ fileName )) {
104
+ while (isset ( $ savedImages [ $ fileName] )) {
84
105
$ fileName = $ count . '- ' . $ name ;
106
+ $ count ++;
85
107
}
108
+
109
+ $ savedImages [$ fileName ] = true ;
86
110
file_put_contents ($ outDir . "/images/ " . $ fileName , $ image );
87
111
$ html = str_replace ($ url , "./images/ " . $ fileName , $ html );
88
112
}
89
113
return $ html ;
90
114
}
91
115
92
- function getImageFile ($ url ): string {
116
+ /**
117
+ * Get an image file from the given URL.
118
+ * Checks if it's hosted on the same instance as the API we're
119
+ * using so that auth details can be provided for BookStack images
120
+ * in case local_secure images are in use.
121
+ */
122
+ function getImageFile (string $ url ): string {
93
123
global $ apiUrl ;
94
124
if (strpos (strtolower ($ url ), strtolower ($ apiUrl )) === 0 ) {
95
125
$ url = substr ($ url , strlen ($ apiUrl ));
96
126
return apiGet ($ url );
97
127
}
98
- return file_get_contents ($ url );
128
+ return @ file_get_contents ($ url );
99
129
}
100
130
101
- function getBookContent (array $ book , array $ chapters , array $ pages ): string {
131
+ /**
132
+ * Get the HTML representation of a book.
133
+ */
134
+ function getBookHtmlOutput (array $ book , array $ chapters , array $ pages ): string {
102
135
$ content = "<h1> {$ book ['name ' ]}</h1> " ;
103
136
$ content .= "<p> {$ book ['description ' ]}</p> " ;
104
137
$ content .= "<hr> " ;
@@ -119,7 +152,10 @@ function getBookContent(array $book, array $chapters, array $pages): string {
119
152
return $ content ;
120
153
}
121
154
122
- function getChapterContent (array $ chapter , array $ pages ): string {
155
+ /**
156
+ * Get the HTML representation of a chapter.
157
+ */
158
+ function getChapterHtmlOutput (array $ chapter , array $ pages ): string {
123
159
$ content = "<p><a href='./index.html'>Back to book</a></p> " ;
124
160
$ content .= "<h1> {$ chapter ['name ' ]}</h1> " ;
125
161
$ content .= "<p> {$ chapter ['description ' ]}</p> " ;
@@ -134,7 +170,10 @@ function getChapterContent(array $chapter, array $pages): string {
134
170
return $ content ;
135
171
}
136
172
137
- function getPageContent (array $ page , ?array $ parentChapter ): string {
173
+ /**
174
+ * Get the HTML representation of a page.
175
+ */
176
+ function getPageHtmlOutput (array $ page , ?array $ parentChapter ): string {
138
177
if (is_null ($ parentChapter )) {
139
178
$ content = "<p><a href='./index.html'>Back to book</a></p> " ;
140
179
} else {
@@ -189,7 +228,7 @@ function apiGet(string $endpoint): string {
189
228
$ url = rtrim ($ apiUrl , '/ ' ) . '/ ' . ltrim ($ endpoint , '/ ' );
190
229
$ opts = ['http ' => ['header ' => "Authorization: Token {$ clientId }: {$ clientSecret }" ]];
191
230
$ context = stream_context_create ($ opts );
192
- return file_get_contents ($ url , false , $ context );
231
+ return @ file_get_contents ($ url , false , $ context );
193
232
}
194
233
195
234
/**
@@ -211,6 +250,9 @@ function dd(...$args) {
211
250
exit (1 );
212
251
}
213
252
253
+ /**
254
+ * Alert of an error then exit the script.
255
+ */
214
256
function errorOut (string $ text ) {
215
257
echo "ERROR: " . $ text ;
216
258
exit (1 );
0 commit comments