1
+ #!/usr/bin/env php
2
+ <?php
3
+
4
+ // API Credentials
5
+ // You can either provide them as environment variables
6
+ // or hard-code them in the empty strings below.
7
+ $ baseUrl = getenv ('BS_URL ' ) ?: '' ;
8
+ $ clientId = getenv ('BS_TOKEN_ID ' ) ?: '' ;
9
+ $ clientSecret = getenv ('BS_TOKEN_SECRET ' ) ?: '' ;
10
+
11
+ // Output File
12
+ // Can be provided as a arguments when calling the script
13
+ // or be hard-coded as strings below.
14
+ $ outputFile = $ argv [1 ] ?? './sitemap.xml ' ;
15
+
16
+ // Script logic
17
+ ////////////////
18
+
19
+ // Check we have required options
20
+ if (empty ($ outputFile )) {
21
+ errorOut ("An output file needs to be provided " );
22
+ }
23
+
24
+ // Create the output folder if it does not exist
25
+ $ outDir = dirname ($ outputFile );
26
+ if (!is_dir ($ outDir )) {
27
+ mkdir ($ outDir , 0777 , true );
28
+ }
29
+
30
+ // Clean up the base path
31
+ $ baseUrl = rtrim ($ baseUrl , '/ ' );
32
+
33
+ // Additional endpoints not fetched via API entities
34
+ $ nowDate = date_format (new DateTime (), 'Y-m-d ' );
35
+ $ additionalEndpoints = [
36
+ ['endpoint ' => '/ ' , 'updated ' => $ nowDate ],
37
+ ['endpoint ' => '/books ' , 'updated ' => $ nowDate ],
38
+ ['endpoint ' => '/search ' , 'updated ' => $ nowDate ],
39
+ ['endpoint ' => '/login ' , 'updated ' => $ nowDate ],
40
+ ];
41
+
42
+ // Get all shelf URLs
43
+ $ shelves = getAllOfAtListEndpoint ("api/shelves " , []);
44
+ $ shelfEndpoints = array_map (function ($ shelf ) {
45
+ return ['endpoint ' => '/shelves/ ' . $ shelf ['slug ' ], 'updated ' => $ shelf ['updated_at ' ]];
46
+ }, $ shelves );
47
+
48
+ // Get all book URLs and map for chapters & pages
49
+ $ books = getAllOfAtListEndpoint ("api/books " , []);
50
+ $ bookSlugsById = [];
51
+ $ bookEndpoints = array_map (function ($ book ) use (&$ bookSlugsById ) {
52
+ $ bookSlugsById [$ book ['id ' ]] = $ book ['slug ' ];
53
+ return ['endpoint ' => '/books/ ' . $ book ['slug ' ], 'updated ' => $ book ['updated_at ' ]];
54
+ }, $ books );
55
+
56
+ // Get all chapter URLs and map for pages
57
+ $ chapters = getAllOfAtListEndpoint ("api/chapters " , []);
58
+ $ chapterEndpoints = array_map (function ($ chapter ) use ($ bookSlugsById ) {
59
+ $ bookSlug = $ bookSlugsById [$ chapter ['book_id ' ]];
60
+ return ['endpoint ' => '/books/ ' . $ bookSlug . '/chapter/ ' . $ chapter ['slug ' ], 'updated ' => $ chapter ['updated_at ' ]];
61
+ }, $ chapters );
62
+
63
+ // Get all page URLs
64
+ $ pages = getAllOfAtListEndpoint ("api/pages " , []);
65
+ $ pageEndpoints = array_map (function ($ page ) use ($ bookSlugsById ) {
66
+ $ bookSlug = $ bookSlugsById [$ page ['book_id ' ]];
67
+ return ['endpoint ' => '/books/ ' . $ bookSlug . '/page/ ' . $ page ['slug ' ], 'updated ' => $ page ['updated_at ' ]];
68
+ }, $ pages );
69
+
70
+ // Gather all our endpoints
71
+ $ allEndpoints = $ additionalEndpoints
72
+ + $ pageEndpoints
73
+ + $ chapterEndpoints
74
+ + $ bookEndpoints
75
+ + $ shelfEndpoints ;
76
+
77
+ // Fetch our sitemap XML
78
+ $ xmlSitemap = generateSitemapXml ($ allEndpoints );
79
+ // Write to the output file
80
+ file_put_contents ($ outputFile , $ xmlSitemap );
81
+
82
+ /**
83
+ * Generate out the XML content for a sitemap
84
+ * for the given URLs.
85
+ */
86
+ function generateSitemapXml (array $ endpoints ): string
87
+ {
88
+ global $ baseUrl ;
89
+ $ doc = new DOMDocument ("1.0 " , "UTF-8 " );
90
+ $ urlset = $ doc ->createElement ('urlset ' );
91
+ $ urlset ->setAttribute ('xmlns ' , 'http://www.sitemaps.org/schemas/sitemap/0.9 ' );
92
+
93
+ $ doc ->appendChild ($ urlset );
94
+ foreach ($ endpoints as $ endpointInfo ) {
95
+ $ date = (new DateTime ($ endpointInfo ['updated ' ]))->format ('Y-m-d ' );
96
+ $ url = $ doc ->createElement ('url ' );
97
+ $ loc = $ url ->appendChild ($ doc ->createElement ('loc ' ));
98
+ $ urlText = $ doc ->createTextNode ($ baseUrl . $ endpointInfo ['endpoint ' ]);
99
+ $ loc ->appendChild ($ urlText );
100
+ $ url ->appendChild ($ doc ->createElement ('lastmod ' , $ date ));
101
+ $ url ->appendChild ($ doc ->createElement ('changefreq ' , 'monthly ' ));
102
+ $ url ->appendChild ($ doc ->createElement ('priority ' , '0.8 ' ));
103
+ $ urlset ->appendChild ($ url );
104
+ }
105
+
106
+ return $ doc ->saveXML ();
107
+ }
108
+
109
+ /**
110
+ * Consume all items from the given API listing endpoint.
111
+ */
112
+ function getAllOfAtListEndpoint (string $ endpoint , array $ params ): array
113
+ {
114
+ $ count = 100 ;
115
+ $ offset = 0 ;
116
+ $ all = [];
117
+
118
+ do {
119
+ $ endpoint = $ endpoint . '? ' . http_build_query (array_merge ($ params , ['count ' => $ count , 'offset ' => $ offset ]));
120
+ $ resp = apiGetJson ($ endpoint );
121
+
122
+ $ total = $ resp ['total ' ] ?? 0 ;
123
+ $ new = $ resp ['data ' ] ?? [];
124
+ array_push ($ all , ...$ new );
125
+ $ offset += $ count ;
126
+ } while ($ offset < $ total );
127
+
128
+ return $ all ;
129
+ }
130
+
131
+ /**
132
+ * Make a simple GET HTTP request to the API.
133
+ */
134
+ function apiGet (string $ endpoint ): string
135
+ {
136
+ global $ baseUrl , $ clientId , $ clientSecret ;
137
+ $ url = rtrim ($ baseUrl , '/ ' ) . '/ ' . ltrim ($ endpoint , '/ ' );
138
+ $ opts = ['http ' => ['header ' => "Authorization: Token {$ clientId }: {$ clientSecret }" ]];
139
+ $ context = stream_context_create ($ opts );
140
+ return @file_get_contents ($ url , false , $ context );
141
+ }
142
+
143
+ /**
144
+ * Make a simple GET HTTP request to the API &
145
+ * decode the JSON response to an array.
146
+ */
147
+ function apiGetJson (string $ endpoint ): array
148
+ {
149
+ $ data = apiGet ($ endpoint );
150
+ return json_decode ($ data , true );
151
+ }
152
+
153
+ /**
154
+ * DEBUG: Dump out the given variables and exit.
155
+ */
156
+ function dd (...$ args )
157
+ {
158
+ foreach ($ args as $ arg ) {
159
+ var_dump ($ arg );
160
+ }
161
+ exit (1 );
162
+ }
163
+
164
+ /**
165
+ * Alert of an error then exit the script.
166
+ */
167
+ function errorOut (string $ text )
168
+ {
169
+ echo "ERROR: " . $ text ;
170
+ exit (1 );
171
+ }
0 commit comments