From 0256bc93d4d7fe0c6905a0513f68ff9060ecb5ae Mon Sep 17 00:00:00 2001 From: Matthew Bond Date: Fri, 26 Apr 2024 13:06:49 +0100 Subject: [PATCH 1/3] feat: add a basic function we can use to sort an index by file size and then export it to a file --- javascript/find_largest_records_on_index.js | 83 +++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 javascript/find_largest_records_on_index.js diff --git a/javascript/find_largest_records_on_index.js b/javascript/find_largest_records_on_index.js new file mode 100644 index 0000000..3265f4f --- /dev/null +++ b/javascript/find_largest_records_on_index.js @@ -0,0 +1,83 @@ +/* +Backup Index +This script will export an index, including records, settings, rules and synonyms to the current directory. +It can be used in conjunction with restore.js to backup and restore an index to an application. +*/ + +// Install the API client: https://www.algolia.com/doc/api-client/getting-started/install/javascript/?client=javascript +const algoliasearch = require("algoliasearch"); +const dotenv = require("dotenv"); + +dotenv.config(); + +// Get your Algolia Application ID and (admin) API key from the dashboard: https://www.algolia.com/account/api-keys +// and choose a name for your index. Add these environment variables to a `.env` file: +const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID; +const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY; +const ALGOLIA_INDEX_NAME = process.env.ALGOLIA_INDEX_NAME; + +// Start the API client +// https://www.algolia.com/doc/api-client/getting-started/instantiate-client-index/ +const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY); + +// Create an index (or connect to it, if an index with the name `ALGOLIA_INDEX_NAME` already exists) +// https://www.algolia.com/doc/api-client/getting-started/instantiate-client-index/#initialize-an-index +const index = client.initIndex(ALGOLIA_INDEX_NAME); + +// Requiring fs module in which writeFile function is defined. +const fs = require("fs"); + +let records = [], + settings = [], + rules = [], + synonyms = []; + +(async () => { + // retrieve all records from index + console.log(`Retrieving records...`); + try { + await index.browseObjects({ + batch: (batch) => { + + // This method gets an approximation of the size of the record (total string length) we can use for sorting purposes + for (let i = 0; i < batch.length; i++) { + batch[i].string_length = JSON.stringify(batch[i]).length; + } + records = records.concat(batch); + } + }); + + console.log(`${records.length} records retrieved`); + + console.log(`Sorting Records By Size...`); + + // Sort the result so the largest string length is at the beggining + records.sort((a, b) => b.string_length - a.string_length); + + + } catch (error) { + console.log(`Error retrieving data ${error}`); + } + + // write json files to current directory + function createJson(data, name) { + if (data) { + fs.writeFile( + `${ALGOLIA_INDEX_NAME}_${name}.json`, + JSON.stringify(data), + (err) => { + if (err) throw err; + } + ); + } else + (error) => { + console.log(`Error writing files: ${error}`); + }; + } + try { + let name = "records"; + createJson(records, name); + } catch (error) { + console.log(`Error exporting data ${error}`); + } +})(); From a9e885102fd37e48d117978c58a9fc0b3ad67b13 Mon Sep 17 00:00:00 2001 From: Matthew Bond Date: Fri, 26 Apr 2024 13:14:01 +0100 Subject: [PATCH 2/3] chore: rename the function and add a description --- ...gest_records_on_index.js => sort_index_by_record_size.js} | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename javascript/{find_largest_records_on_index.js => sort_index_by_record_size.js} (88%) diff --git a/javascript/find_largest_records_on_index.js b/javascript/sort_index_by_record_size.js similarity index 88% rename from javascript/find_largest_records_on_index.js rename to javascript/sort_index_by_record_size.js index 3265f4f..d12c6f4 100644 --- a/javascript/find_largest_records_on_index.js +++ b/javascript/sort_index_by_record_size.js @@ -1,7 +1,6 @@ /* -Backup Index -This script will export an index, including records, settings, rules and synonyms to the current directory. -It can be used in conjunction with restore.js to backup and restore an index to an application. +Sort Index By Record Size +Sometimes we want to easily find the largest record in an index (in file size) so we can investigate situations where some small number of records are over the fileSizeLimit. This script is designed to fetch the entire index and then sort it by the total string size, and then export it to a file for analysis. */ // Install the API client: https://www.algolia.com/doc/api-client/getting-started/install/javascript/?client=javascript From 9926fa0c02a82f106b93c4110c5824ad45fedb60 Mon Sep 17 00:00:00 2001 From: Matthew Bond Date: Fri, 26 Apr 2024 14:33:07 +0100 Subject: [PATCH 3/3] fix: code review suggestions --- javascript/backup_index.js | 6 +++--- javascript/sort_index_by_record_size.js | 11 ++++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/javascript/backup_index.js b/javascript/backup_index.js index 9781d08..4b5e7e9 100644 --- a/javascript/backup_index.js +++ b/javascript/backup_index.js @@ -75,7 +75,7 @@ let records = [], console.log(`${synonyms.length} synonyms retrieved`); } catch (error) { - console.log(`Error retrieving data ${error}`); + console.log(`Error retrieving data ${error.message}`); } // write json files to current directory @@ -90,7 +90,7 @@ let records = [], ); } else (error) => { - console.log(`Error writing files: ${error}`); + console.log(`Error writing files: ${error.message}`); }; } try { @@ -103,6 +103,6 @@ let records = [], name = "synonyms"; createJson(synonyms, name); } catch (error) { - console.log(`Error exporting data ${error}`); + console.log(`Error exporting data ${error.message}`); } })(); diff --git a/javascript/sort_index_by_record_size.js b/javascript/sort_index_by_record_size.js index d12c6f4..6325c48 100644 --- a/javascript/sort_index_by_record_size.js +++ b/javascript/sort_index_by_record_size.js @@ -26,10 +26,7 @@ const index = client.initIndex(ALGOLIA_INDEX_NAME); // Requiring fs module in which writeFile function is defined. const fs = require("fs"); -let records = [], - settings = [], - rules = [], - synonyms = []; +let records = []; (async () => { // retrieve all records from index @@ -55,7 +52,7 @@ let records = [], } catch (error) { - console.log(`Error retrieving data ${error}`); + console.log(`Error retrieving data ${error.message}`); } // write json files to current directory @@ -70,13 +67,13 @@ let records = [], ); } else (error) => { - console.log(`Error writing files: ${error}`); + console.log(`Error writing files: ${error.message}`); }; } try { let name = "records"; createJson(records, name); } catch (error) { - console.log(`Error exporting data ${error}`); + console.log(`Error exporting data ${error.message}`); } })();