Skip to content

Commit 5ec1ef9

Browse files
committed
add support for expiring objects on GET
Signed-off-by: Utkarsh Srivastava <[email protected]> add support for migration queue length based migration kickoffs Signed-off-by: Utkarsh Srivastava <[email protected]>
1 parent 48ffb40 commit 5ec1ef9

File tree

4 files changed

+116
-1
lines changed

4 files changed

+116
-1
lines changed

config.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,16 @@ config.NSFS_LOW_FREE_SPACE_MB_UNLEASH = 10 * 1024;
901901
// operations safely.
902902
config.NSFS_LOW_FREE_SPACE_PERCENT_UNLEASH = 0.10;
903903

904+
// NSFS_GLACIER_GET_FORCE_EXPIRE if set to true then any restored item in the GLACIER
905+
// storage class will expire as soon as first GET request is received for it or
906+
// if the previous restore time has exceed, whichever is the earlier.
907+
config.NSFS_GLACIER_FORCE_EXPIRE_ON_GET = false;
908+
909+
// NSFS_GLACIER_DESIRED_MIGRATE_QUEUE_SIZE controls that how long migration queue/batch
910+
// Once exceeded, migration calls are supposed to kick in regardless of configured
911+
// interval
912+
config.NSFS_GLACIER_DESIRED_MIGRATE_QUEUE_SIZE = 50;
913+
904914
// anonymous account name
905915
config.ANONYMOUS_ACCOUNT_NAME = 'anonymous';
906916

src/manage_nsfs/manage_nsfs_glacier.js

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ async function process_migrations() {
2020

2121
if (
2222
await backend.low_free_space() ||
23-
await time_exceeded(fs_context, config.NSFS_GLACIER_MIGRATE_INTERVAL, GlacierBackend.MIGRATE_TIMESTAMP_FILE)
23+
await time_exceeded(fs_context, config.NSFS_GLACIER_MIGRATE_INTERVAL, GlacierBackend.MIGRATE_TIMESTAMP_FILE) ||
24+
await migrate_queue_too_long()
2425
) {
2526
await run_glacier_migrations(fs_context, backend);
2627
await record_current_time(fs_context, GlacierBackend.MIGRATE_TIMESTAMP_FILE);
@@ -154,6 +155,21 @@ async function time_exceeded(fs_context, interval, timestamp_file) {
154155
return false;
155156
}
156157

158+
/**
159+
* migrate_queue_too_long returns true if the underlying backend
160+
* decides that the migrate batch size has exceeded the configured
161+
* (NSFS_GLACIER_DESIRED_MIGRATE_QUEUE_SZ) approximate number of
162+
* entries pending for migration
163+
*
164+
* @returns {Promise<boolean>}
165+
*/
166+
async function migrate_queue_too_long() {
167+
const log = new PersistentLogger(config.NSFS_GLACIER_LOGS_DIR, GlacierBackend.MIGRATE_WAL_NAME, { locking: null });
168+
const approx_entries = await log.approx_entries({ samples: 10 });
169+
170+
return approx_entries > config.NSFS_GLACIER_DESIRED_MIGRATE_QUEUE_SIZE;
171+
}
172+
157173
/**
158174
* record_current_time stores the current timestamp in ISO format into
159175
* the given timestamp file

src/sdk/namespace_fs.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,6 +1142,8 @@ class NamespaceFS {
11421142
}
11431143
}
11441144

1145+
await this._expire_object_if_desired(fs_context, file_path, file, stat);
1146+
11451147
await file.close(fs_context);
11461148
file = null;
11471149
object_sdk.throw_if_aborted();
@@ -3415,6 +3417,30 @@ class NamespaceFS {
34153417
return Math.max(in_bytes, free_from_percentage);
34163418
}
34173419

3420+
/**
3421+
* _expire_object_if_desired expires a object if the object has storage
3422+
* class set to GLACIER and NooBaa is configured for forced get based
3423+
* eviction
3424+
* @param {nb.NativeFSContext} fs_context
3425+
* @param {string} file_path
3426+
* @param {nb.NativeFile} file
3427+
* @param {nb.NativeFSStats} stat
3428+
*/
3429+
async _expire_object_if_desired(fs_context, file_path, file, stat) {
3430+
if (!config.NSFS_GLACIER_FORCE_EXPIRE_ON_GET) return;
3431+
if (
3432+
s3_utils.parse_storage_class(stat.xattr[GlacierBackend.STORAGE_CLASS_XATTR]) !== s3_utils.STORAGE_CLASS_GLACIER
3433+
) return;
3434+
3435+
// Remove all the restore related xattrs
3436+
await file.replacexattr(fs_context, {
3437+
// Set date to 1970-01-01 to force expiry
3438+
[GlacierBackend.XATTR_RESTORE_EXPIRY]: new Date(0).toISOString()
3439+
}, GlacierBackend.XATTR_RESTORE_REQUEST);
3440+
3441+
await this.append_to_migrate_wal(file_path);
3442+
}
3443+
34183444
async append_to_migrate_wal(entry) {
34193445
if (!config.NSFS_GLACIER_LOGS_ENABLED) return;
34203446

src/util/persistent_logger.js

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,69 @@ class PersistentLogger {
208208
}
209209
}
210210

211+
/**
212+
* approx_entries returns approximate number of enties in the current active file
213+
* based on the chosen strategy and sample size
214+
* @param {{
215+
* strategy?: "TOP_K",
216+
* samples?: number
217+
* }} cfg
218+
* @returns {Promise<number>}
219+
*/
220+
async approx_entries(cfg) {
221+
const { strategy = "TOP_K", samples = 10 } = cfg;
222+
223+
// Open the reader with NO lock so that we don't interfere
224+
// with the current writer
225+
//
226+
// We don't need any consistency guarantees etc here either so
227+
// it's okay to even read partial writes
228+
const reader = new NewlineReader(
229+
this.fs_context,
230+
this.active_path,
231+
{ lock: null, skip_overflow_lines: true },
232+
);
233+
234+
try {
235+
let avg_length;
236+
if (strategy === "TOP_K") {
237+
avg_length = await this._get_top_k_entries_avg_length(reader, samples);
238+
} else {
239+
throw new Error("unsupported strategy:" + strategy);
240+
}
241+
242+
const stat = await reader.fh.stat(this.fs_context);
243+
return Math.round(stat.size / avg_length);
244+
} finally {
245+
await reader.close();
246+
}
247+
}
248+
249+
/**
250+
* _get_top_k_entries_avg_length takes a new line reader and sample count
251+
* and returns the average length of the entries from the sample
252+
* @param {NewlineReader} reader
253+
* @param {number} samples
254+
*/
255+
async _get_top_k_entries_avg_length(reader, samples) {
256+
let count = 0;
257+
let total_length = 0;
258+
let entry = await reader.nextline();
259+
260+
while (entry !== null && count < samples) {
261+
count += 1;
262+
total_length += entry.length;
263+
264+
entry = await reader.nextline();
265+
}
266+
267+
if (count < samples) {
268+
dbg.log1("not enough samples in the active log file:", this.active_path, count);
269+
}
270+
271+
return Math.round(total_length / count);
272+
}
273+
211274
async _replace_active(log_noent) {
212275
const inactive_file = `${this.namespace}.${Date.now()}.log`;
213276
const inactive_file_path = path.join(this.dir, inactive_file);

0 commit comments

Comments
 (0)