From f63341d29364334093575165d591ea525ab36f72 Mon Sep 17 00:00:00 2001 From: Ian Dees Date: Tue, 18 Feb 2025 21:50:16 -0600 Subject: [PATCH] Set row group size so that the buffer doesn't grow indefinitely --- task/collect.js | 1 + 1 file changed, 1 insertion(+) diff --git a/task/collect.js b/task/collect.js index 320b2a92..4276264a 100755 --- a/task/collect.js +++ b/task/collect.js @@ -334,6 +334,7 @@ async function parquet_datas(tmp, datas, name) { notes: { type: 'UTF8', optional: true } }); const writer = await parquet.ParquetWriter.openFile(schema, path.resolve(tmp, `${name}.parquet`)); + writer.setRowGroupSize(16384); for (const data of datas) { const resolved_data_filename = path.resolve(tmp, 'sources', data);