1
+ #!/usr/bin/env php
2
+ <?php
3
+
4
+ // API Credentials
5
+ // You can either provide them as environment variables
6
+ // or hard-code them in the empty strings below.
7
+ $ apiUrl = getenv ('BS_URL ' ) ?: '' ;
8
+ $ clientId = getenv ('BS_TOKEN_ID ' ) ?: '' ;
9
+ $ clientSecret = getenv ('BS_TOKEN_SECRET ' ) ?: '' ;
10
+
11
+ // Output Folder
12
+ // Can be provided as a arguments when calling the script
13
+ // or be hard-coded as strings below.
14
+ $ bookSlug = $ argv [1 ] ?? '' ;
15
+ $ outFolder = $ argv [2 ] ?? './out ' ;
16
+
17
+ // Script logic
18
+ ////////////////
19
+
20
+ if (empty ($ bookSlug ) || empty ($ outFolder )) {
21
+ errorOut ("Both a book slug and output folder must be provided " );
22
+ }
23
+
24
+ if (!is_dir ($ outFolder )) {
25
+ mkdir ($ outFolder , 0777 , true );
26
+ }
27
+
28
+ $ outDir = realpath ($ outFolder );
29
+ $ book = getBookBySlug ($ bookSlug );
30
+
31
+ if (is_null ($ book )) {
32
+ errorOut ("Could not find book with the URL slug: {$ bookSlug }" );
33
+ }
34
+
35
+ $ chapters = getAllOfAtListEndpoint ("api/chapters " , ['filter[book_id] ' => $ book ['id ' ]]);
36
+ $ pages = getAllOfAtListEndpoint ("api/pages " , ['filter[book_id] ' => $ book ['id ' ]]);
37
+
38
+ foreach ($ pages as $ index => $ page ) {
39
+ $ pages [$ index ] = apiGetJson ("api/pages/ {$ page ['id ' ]}" );
40
+ }
41
+
42
+ if (!is_dir ($ outDir . "/images " )) {
43
+ mkdir ($ outDir . "/images " , 0777 , true );
44
+ }
45
+
46
+ $ directBookPages = array_filter ($ pages , function ($ page ) {
47
+ return empty ($ page ['chapter_id ' ]);
48
+ });
49
+
50
+ // Create book index file
51
+ $ bookIndex = getBookContent ($ book , $ chapters , $ directBookPages );
52
+ file_put_contents ($ outDir . "/index.html " , $ bookIndex );
53
+
54
+ foreach ($ chapters as $ chapter ) {
55
+ $ childPages = array_filter ($ pages , function ($ page ) use ($ chapter ) {
56
+ return $ page ['chapter_id ' ] == $ chapter ['id ' ];
57
+ });
58
+ $ chapterPage = getChapterContent ($ chapter , $ childPages );
59
+ file_put_contents ($ outDir . "/chapter- {$ chapter ['slug ' ]}.html " , $ chapterPage );
60
+
61
+ foreach ($ childPages as $ childPage ) {
62
+ $ childPageContent = getPageContent ($ childPage , $ chapter );
63
+ $ childPageContent = extractImagesFromHtml ($ childPageContent );
64
+ file_put_contents ($ outDir . "/page- {$ childPage ['slug ' ]}.html " , $ childPageContent );
65
+ }
66
+ }
67
+
68
+ foreach ($ directBookPages as $ directPage ) {
69
+ $ directPageContent = getPageContent ($ directPage , null );
70
+ $ directPageContent = extractImagesFromHtml ($ directPageContent );
71
+ file_put_contents ($ outDir . "/page- {$ directPage ['slug ' ]}.html " , $ directPageContent );
72
+ }
73
+
74
+ function extractImagesFromHtml (string $ html ): string {
75
+ global $ outDir ;
76
+ $ matches = [];
77
+ preg_match_all ('/<img.*?src=[" \'](.*?)[ \'"].*?>/i ' , $ html , $ matches );
78
+ foreach (array_unique ($ matches [1 ] ?? []) as $ url ) {
79
+ $ image = file_get_contents ($ url );
80
+ $ name = basename ($ url );
81
+ $ fileName = $ name ;
82
+ $ count = 1 ;
83
+ while (file_exists ($ outDir . "/images/ " . $ fileName )) {
84
+ $ fileName = $ count . '- ' . $ name ;
85
+ }
86
+ file_put_contents ($ outDir . "/images/ " . $ fileName , $ image );
87
+ $ html = str_replace ($ url , "./images/ " . $ fileName , $ html );
88
+ }
89
+ return $ html ;
90
+ }
91
+
92
+ function getImageFile ($ url ): string {
93
+ global $ apiUrl ;
94
+ if (strpos (strtolower ($ url ), strtolower ($ apiUrl )) === 0 ) {
95
+ $ url = substr ($ url , strlen ($ apiUrl ));
96
+ return apiGet ($ url );
97
+ }
98
+ return file_get_contents ($ url );
99
+ }
100
+
101
+ function getBookContent (array $ book , array $ chapters , array $ pages ): string {
102
+ $ content = "<h1> {$ book ['name ' ]}</h1> " ;
103
+ $ content .= "<p> {$ book ['description ' ]}</p> " ;
104
+ $ content .= "<hr> " ;
105
+ if (count ($ chapters ) > 0 ) {
106
+ $ content .= "<h3>Chapters</h3><ul> " ;
107
+ foreach ($ chapters as $ chapter ) {
108
+ $ content .= "<li><a href='./chapter- {$ chapter ['slug ' ]}.html'> {$ chapter ['name ' ]}</a></li> " ;
109
+ }
110
+ $ content .= "</ul> " ;
111
+ }
112
+ if (count ($ pages ) > 0 ) {
113
+ $ content .= "<h3>Pages</h3><ul> " ;
114
+ foreach ($ pages as $ page ) {
115
+ $ content .= "<li><a href='./page- {$ page ['slug ' ]}.html'> {$ page ['name ' ]}</a></li> " ;
116
+ }
117
+ $ content .= "</ul> " ;
118
+ }
119
+ return $ content ;
120
+ }
121
+
122
+ function getChapterContent (array $ chapter , array $ pages ): string {
123
+ $ content = "<p><a href='./index.html'>Back to book</a></p> " ;
124
+ $ content .= "<h1> {$ chapter ['name ' ]}</h1> " ;
125
+ $ content .= "<p> {$ chapter ['description ' ]}</p> " ;
126
+ $ content .= "<hr> " ;
127
+ if (count ($ pages ) > 0 ) {
128
+ $ content .= "<h3>Pages</h3><ul> " ;
129
+ foreach ($ pages as $ page ) {
130
+ $ content .= "<li><a href='./page- {$ page ['slug ' ]}.html'> {$ page ['name ' ]}</a></li> " ;
131
+ }
132
+ $ content .= "</ul> " ;
133
+ }
134
+ return $ content ;
135
+ }
136
+
137
+ function getPageContent (array $ page , ?array $ parentChapter ): string {
138
+ if (is_null ($ parentChapter )) {
139
+ $ content = "<p><a href='./index.html'>Back to book</a></p> " ;
140
+ } else {
141
+ $ content = "<p><a href='./chapter- {$ parentChapter ['slug ' ]}.html'>Back to chapter</a></p> " ;
142
+ }
143
+ $ content .= "<h1> {$ page ['name ' ]}</h1> " ;
144
+ $ content .= "<div> {$ page ['html ' ]}</div> " ;
145
+ return $ content ;
146
+ }
147
+
148
+ /**
149
+ * Get a single book by the slug or return null if not exists.
150
+ */
151
+ function getBookBySlug (string $ slug ): ?array {
152
+ $ endpoint = 'api/books? ' . http_build_query (['filter[slug] ' => $ slug ]);
153
+ $ resp = apiGetJson ($ endpoint );
154
+ $ book = $ resp ['data ' ][0 ] ?? null ;
155
+
156
+ if (!is_null ($ book )) {
157
+ $ book = apiGetJson ("api/books/ {$ book ['id ' ]}" ) ?? null ;
158
+ }
159
+ return $ book ;
160
+ }
161
+
162
+ /**
163
+ * Get all books from the system API.
164
+ */
165
+ function getAllOfAtListEndpoint (string $ endpoint , array $ params ): array {
166
+ $ count = 100 ;
167
+ $ offset = 0 ;
168
+ $ total = 0 ;
169
+ $ all = [];
170
+
171
+ do {
172
+ $ endpoint = $ endpoint . '? ' . http_build_query (array_merge ($ params , ['count ' => $ count , 'offset ' => $ offset ]));
173
+ $ resp = apiGetJson ($ endpoint );
174
+
175
+ $ total = $ resp ['total ' ] ?? 0 ;
176
+ $ new = $ resp ['data ' ] ?? [];
177
+ array_push ($ all , ...$ new );
178
+ $ offset += $ count ;
179
+ } while ($ offset < $ total );
180
+
181
+ return $ all ;
182
+ }
183
+
184
+ /**
185
+ * Make a simple GET HTTP request to the API.
186
+ */
187
+ function apiGet (string $ endpoint ): string {
188
+ global $ apiUrl , $ clientId , $ clientSecret ;
189
+ $ url = rtrim ($ apiUrl , '/ ' ) . '/ ' . ltrim ($ endpoint , '/ ' );
190
+ $ opts = ['http ' => ['header ' => "Authorization: Token {$ clientId }: {$ clientSecret }" ]];
191
+ $ context = stream_context_create ($ opts );
192
+ return file_get_contents ($ url , false , $ context );
193
+ }
194
+
195
+ /**
196
+ * Make a simple GET HTTP request to the API &
197
+ * decode the JSON response to an array.
198
+ */
199
+ function apiGetJson (string $ endpoint ): array {
200
+ $ data = apiGet ($ endpoint );
201
+ return json_decode ($ data , true );
202
+ }
203
+
204
+ /**
205
+ * DEBUG: Dump out the given variables and exit.
206
+ */
207
+ function dd (...$ args ) {
208
+ foreach ($ args as $ arg ) {
209
+ var_dump ($ arg );
210
+ }
211
+ exit (1 );
212
+ }
213
+
214
+ function errorOut (string $ text ) {
215
+ echo "ERROR: " . $ text ;
216
+ exit (1 );
217
+ }
0 commit comments