1+ #!/usr/bin/env php
2+ <?php
3+
4+ // API Credentials
5+ // You can either provide them as environment variables
6+ // or hard-code them in the empty strings below.
7+ $ baseUrl = getenv ('BS_URL ' ) ?: '' ;
8+ $ clientId = getenv ('BS_TOKEN_ID ' ) ?: '' ;
9+ $ clientSecret = getenv ('BS_TOKEN_SECRET ' ) ?: '' ;
10+
11+ // Output File
12+ // Can be provided as a arguments when calling the script
13+ // or be hard-coded as strings below.
14+ $ outputFile = $ argv [1 ] ?? './sitemap.xml ' ;
15+
16+ // Script logic
17+ ////////////////
18+
19+ // Check we have required options
20+ if (empty ($ outputFile )) {
21+ errorOut ("An output file needs to be provided " );
22+ }
23+
24+ // Create the output folder if it does not exist
25+ $ outDir = dirname ($ outputFile );
26+ if (!is_dir ($ outDir )) {
27+ mkdir ($ outDir , 0777 , true );
28+ }
29+
30+ // Clean up the base path
31+ $ baseUrl = rtrim ($ baseUrl , '/ ' );
32+
33+ // Additional endpoints not fetched via API entities
34+ $ additionalEndpoints = [
35+ '/ ' ,
36+ '/books ' ,
37+ '/search ' ,
38+ '/login ' ,
39+ ];
40+
41+ // Get all shelf URLs
42+ $ shelves = getAllOfAtListEndpoint ("api/shelves " , []);
43+ $ shelfEndpoints = array_map (function ($ shelf ) {
44+ return '/shelves/ ' . $ shelf ['slug ' ];
45+ }, $ shelves );
46+
47+ // Get all book URLs and map for chapters & pages
48+ $ books = getAllOfAtListEndpoint ("api/books " , []);
49+ $ bookSlugsById = [];
50+ $ bookEndpoints = array_map (function ($ book ) use (&$ bookSlugsById ) {
51+ $ bookSlugsById [$ book ['id ' ]] = $ book ['slug ' ];
52+ return '/books/ ' . $ book ['slug ' ];
53+ }, $ books );
54+
55+ // Get all chapter URLs and map for pages
56+ $ chapters = getAllOfAtListEndpoint ("api/chapters " , []);
57+ $ chapterEndpoints = array_map (function ($ chapter ) use ($ bookSlugsById ) {
58+ $ bookSlug = $ bookSlugsById [$ chapter ['book_id ' ]];
59+ return '/books/ ' . $ bookSlug . '/chapter/ ' . $ chapter ['slug ' ];
60+ }, $ chapters );
61+
62+ // Get all page URLs
63+ $ pages = getAllOfAtListEndpoint ("api/pages " , []);
64+ $ pageEndpoints = array_map (function ($ page ) use ($ bookSlugsById ) {
65+ $ bookSlug = $ bookSlugsById [$ page ['book_id ' ]];
66+ return '/books/ ' . $ bookSlug . '/page/ ' . $ page ['slug ' ];
67+ }, $ pages );
68+
69+ // Gather all our endpoints
70+ $ allEndpoints = $ additionalEndpoints
71+ + $ pageEndpoints
72+ + $ chapterEndpoints
73+ + $ bookEndpoints
74+ + $ shelfEndpoints ;
75+
76+ // Fetch our sitemap XML
77+ $ xmlSitemap = generateSitemapXml ($ allEndpoints );
78+ // Write to the output file
79+ file_put_contents ($ outputFile , $ xmlSitemap );
80+
81+ /**
82+ * Generate out the XML content for a sitemap
83+ * for the given URLs.
84+ */
85+ function generateSitemapXml (array $ endpoints ): string
86+ {
87+ global $ baseUrl ;
88+ $ nowDate = date_format (new DateTime (), 'Y-m-d ' );
89+ $ doc = new DOMDocument ("1.0 " , "UTF-8 " );
90+ $ urlset = $ doc ->createElement ('urlset ' );
91+ $ urlset ->setAttribute ('xmlns ' , 'http://www.sitemaps.org/schemas/sitemap/0.9 ' );
92+
93+ $ doc ->appendChild ($ urlset );
94+ foreach ($ endpoints as $ endpoint ) {
95+ $ url = $ doc ->createElement ('url ' );
96+ $ loc = $ url ->appendChild ($ doc ->createElement ('loc ' ));
97+ $ urlText = $ doc ->createTextNode ($ baseUrl . $ endpoint );
98+ $ loc ->appendChild ($ urlText );
99+ $ url ->appendChild ($ doc ->createElement ('lastmod ' , $ nowDate ));
100+ $ url ->appendChild ($ doc ->createElement ('changefreq ' , 'monthly ' ));
101+ $ url ->appendChild ($ doc ->createElement ('priority ' , '0.8 ' ));
102+ $ urlset ->appendChild ($ url );
103+ }
104+
105+ return $ doc ->saveXML ();
106+ }
107+
108+ /**
109+ * Consume all items from the given API listing endpoint.
110+ */
111+ function getAllOfAtListEndpoint (string $ endpoint , array $ params ): array
112+ {
113+ $ count = 100 ;
114+ $ offset = 0 ;
115+ $ all = [];
116+
117+ do {
118+ $ endpoint = $ endpoint . '? ' . http_build_query (array_merge ($ params , ['count ' => $ count , 'offset ' => $ offset ]));
119+ $ resp = apiGetJson ($ endpoint );
120+
121+ $ total = $ resp ['total ' ] ?? 0 ;
122+ $ new = $ resp ['data ' ] ?? [];
123+ array_push ($ all , ...$ new );
124+ $ offset += $ count ;
125+ } while ($ offset < $ total );
126+
127+ return $ all ;
128+ }
129+
130+ /**
131+ * Make a simple GET HTTP request to the API.
132+ */
133+ function apiGet (string $ endpoint ): string
134+ {
135+ global $ baseUrl , $ clientId , $ clientSecret ;
136+ $ url = rtrim ($ baseUrl , '/ ' ) . '/ ' . ltrim ($ endpoint , '/ ' );
137+ $ opts = ['http ' => ['header ' => "Authorization: Token {$ clientId }: {$ clientSecret }" ]];
138+ $ context = stream_context_create ($ opts );
139+ return @file_get_contents ($ url , false , $ context );
140+ }
141+
142+ /**
143+ * Make a simple GET HTTP request to the API &
144+ * decode the JSON response to an array.
145+ */
146+ function apiGetJson (string $ endpoint ): array
147+ {
148+ $ data = apiGet ($ endpoint );
149+ return json_decode ($ data , true );
150+ }
151+
152+ /**
153+ * DEBUG: Dump out the given variables and exit.
154+ */
155+ function dd (...$ args )
156+ {
157+ foreach ($ args as $ arg ) {
158+ var_dump ($ arg );
159+ }
160+ exit (1 );
161+ }
162+
163+ /**
164+ * Alert of an error then exit the script.
165+ */
166+ function errorOut (string $ text )
167+ {
168+ echo "ERROR: " . $ text ;
169+ exit (1 );
170+ }
0 commit comments