diff --git a/.eslintrc.js b/.eslintrc.js index f730a261..02e6047c 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -16,6 +16,7 @@ module.exports = { ignorePatterns: [ "/dist/", "/examples/", + "/documentation/", "/node_modules/", "/out/", "/src/shell-post.js", diff --git a/README.md b/README.md index bfc130b4..4e98ee5e 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,33 @@ db.create_function("add_js", add); // Run a query in which the function is used db.run("INSERT INTO hello VALUES (add_js(7, 3), add_js('Hello ', 'world'));"); // Inserts 10 and 'Hello world' +// You can create custom aggregation functions, by passing a name +// and a set of functions to `db.create_aggregate`: +// +// - an `init` function. This function receives no argument and returns +// the initial value for the state of the aggregate function. +// - a `step` function. This function takes two arguments +// - the current state of the aggregation +// - a new value to aggregate to the state +// It should return a new value for the state. +// - a `finalize` function. This function receives a state object, and +// returns the final value of the aggregate. It can be omitted, in which case +// the final value of the state will be returned directly by the aggregate function. +// +// Here is an example aggregation function, `json_agg`, which will collect all +// input values and return them as a JSON array: +db.create_aggregate( + "json_agg", + { + init: () => [], + step: (state, val) => [...state, val], + finalize: (state) => JSON.stringify(state), + } +); + +db.exec("SELECT json_agg(column1) FROM (VALUES ('hello'), ('world'))"); +// -> The result of the query is the string '["hello","world"]' + // Export the database to an Uint8Array containing the SQLite database file const binaryArray = db.export(); ``` diff --git a/src/api.js b/src/api.js index 854bf32e..ec8c2fe7 100644 --- a/src/api.js +++ b/src/api.js @@ -225,6 +225,14 @@ Module["onRuntimeInitialized"] = function onRuntimeInitialized() { "", ["number", "string", "number"] ); + + // https://www.sqlite.org/c3ref/aggregate_context.html + // void *sqlite3_aggregate_context(sqlite3_context*, int nBytes) + var sqlite3_aggregate_context = cwrap( + "sqlite3_aggregate_context", + "number", + ["number", "number"] + ); var registerExtensionFunctions = cwrap( "RegisterExtensionFunctions", "number", @@ -1131,81 +1139,90 @@ Module["onRuntimeInitialized"] = function onRuntimeInitialized() { return sqlite3_changes(this.db); }; - /** Register a custom function with SQLite - @example Register a simple function - db.create_function("addOne", function (x) {return x+1;}) - db.exec("SELECT addOne(1)") // = 2 + var extract_blob = function extract_blob(ptr) { + var size = sqlite3_value_bytes(ptr); + var blob_ptr = sqlite3_value_blob(ptr); + var blob_arg = new Uint8Array(size); + for (var j = 0; j < size; j += 1) { + blob_arg[j] = HEAP8[blob_ptr + j]; + } + return blob_arg; + }; - @param {string} name the name of the function as referenced in - SQL statements. - @param {function} func the actual function to be executed. - @return {Database} The database object. Useful for method chaining - */ + var parseFunctionArguments = function parseFunctionArguments(argc, argv) { + var args = []; + for (var i = 0; i < argc; i += 1) { + var value_ptr = getValue(argv + (4 * i), "i32"); + var value_type = sqlite3_value_type(value_ptr); + var arg; + if ( + value_type === SQLITE_INTEGER + || value_type === SQLITE_FLOAT + ) { + arg = sqlite3_value_double(value_ptr); + } else if (value_type === SQLITE_TEXT) { + arg = sqlite3_value_text(value_ptr); + } else if (value_type === SQLITE_BLOB) { + arg = extract_blob(value_ptr); + } else arg = null; + args.push(arg); + } + return args; + }; + var setFunctionResult = function setFunctionResult(cx, result) { + switch (typeof result) { + case "boolean": + sqlite3_result_int(cx, result ? 1 : 0); + break; + case "number": + sqlite3_result_double(cx, result); + break; + case "string": + sqlite3_result_text(cx, result, -1, -1); + break; + case "object": + if (result === null) { + sqlite3_result_null(cx); + } else if (result.length != null) { + var blobptr = allocate(result, ALLOC_NORMAL); + sqlite3_result_blob(cx, blobptr, result.length, -1); + _free(blobptr); + } else { + sqlite3_result_error(cx, ( + "Wrong API use : tried to return a value " + + "of an unknown type (" + result + ")." + ), -1); + } + break; + default: + sqlite3_result_null(cx); + } + }; + + /** Register a custom function with SQLite + @example Register a simple function + db.create_function("addOne", function (x) {return x+1;}) + db.exec("SELECT addOne(1)") // = 2 + + @param {string} name the name of the function as referenced in + SQL statements. + @param {function} func the actual function to be executed. + @return {Database} The database object. Useful for method chaining + */ Database.prototype["create_function"] = function create_function( name, func ) { function wrapped_func(cx, argc, argv) { + var args = parseFunctionArguments(argc, argv); var result; - function extract_blob(ptr) { - var size = sqlite3_value_bytes(ptr); - var blob_ptr = sqlite3_value_blob(ptr); - var blob_arg = new Uint8Array(size); - for (var j = 0; j < size; j += 1) { - blob_arg[j] = HEAP8[blob_ptr + j]; - } - return blob_arg; - } - var args = []; - for (var i = 0; i < argc; i += 1) { - var value_ptr = getValue(argv + (4 * i), "i32"); - var value_type = sqlite3_value_type(value_ptr); - var arg; - if ( - value_type === SQLITE_INTEGER - || value_type === SQLITE_FLOAT - ) { - arg = sqlite3_value_double(value_ptr); - } else if (value_type === SQLITE_TEXT) { - arg = sqlite3_value_text(value_ptr); - } else if (value_type === SQLITE_BLOB) { - arg = extract_blob(value_ptr); - } else arg = null; - args.push(arg); - } try { result = func.apply(null, args); } catch (error) { sqlite3_result_error(cx, error, -1); return; } - switch (typeof result) { - case "boolean": - sqlite3_result_int(cx, result ? 1 : 0); - break; - case "number": - sqlite3_result_double(cx, result); - break; - case "string": - sqlite3_result_text(cx, result, -1, -1); - break; - case "object": - if (result === null) { - sqlite3_result_null(cx); - } else if (result.length != null) { - var blobptr = allocate(result, ALLOC_NORMAL); - sqlite3_result_blob(cx, blobptr, result.length, -1); - _free(blobptr); - } else { - sqlite3_result_error(cx, ( - "Wrong API use : tried to return a value " - + "of an unknown type (" + result + ")." - ), -1); - } - break; - default: - sqlite3_result_null(cx); - } + setFunctionResult(cx, result); } if (Object.prototype.hasOwnProperty.call(this.functions, name)) { removeFunction(this.functions[name]); @@ -1229,6 +1246,137 @@ Module["onRuntimeInitialized"] = function onRuntimeInitialized() { return this; }; + /** Register a custom aggregate with SQLite + @example Register a custom sum function + db.create_aggregate("js_sum", { + init: () => 0, + step: (state, value) => state + value, + finalize: state => state + }); + db.exec("SELECT js_sum(column1) FROM (VALUES (1), (2))"); // = 3 + + @param {string} name the name of the aggregate as referenced in + SQL statements. + @param {object} aggregateFunctions + object containing at least a step function. + @param {function(): T} [aggregateFunctions.init = ()=>null] + a function receiving no arguments and returning an initial + value for the aggregate function. The initial value will be + null if this key is omitted. + @param {function(T, any) : T} aggregateFunctions.step + a function receiving the current state and a value to aggregate + and returning a new state. + Will receive the value from init for the first step. + @param {function(T): any} [aggregateFunctions.finalize = (state)=>state] + a function returning the result of the aggregate function + given its final state. + If omitted, the value returned by the last step + will be used as the final value. + @return {Database} The database object. Useful for method chaining + @template T + */ + Database.prototype["create_aggregate"] = function create_aggregate( + name, + aggregateFunctions + ) { + // Default initializer and finalizer + var init = aggregateFunctions["init"] + || function init() { return null; }; + var finalize = aggregateFunctions["finalize"] + || function finalize(state) { return state; }; + var step = aggregateFunctions["step"]; + + if (!step) { + throw "An aggregate function must have a step function in " + name; + } + + // state is a state object; we'll use the pointer p to serve as the + // key for where we hold our state so that multiple invocations of + // this function never step on each other + var state = {}; + + function wrapped_step(cx, argc, argv) { + // > The first time the sqlite3_aggregate_context(C,N) routine is + // > called for a particular aggregate function, SQLite allocates N + // > bytes of memory, zeroes out that memory, and returns a pointer + // > to the new memory. + // + // We're going to use that pointer as a key to our state array, + // since using sqlite3_aggregate_context as it's meant to be used + // through webassembly seems to be very difficult. Just allocate + // one byte. + var p = sqlite3_aggregate_context(cx, 1); + + // If this is the first invocation of wrapped_step, call `init` + // + // Make sure that every path through the step and finalize + // functions deletes the value state[p] when it's done so we don't + // leak memory and possibly stomp the init value of future calls + if (!Object.hasOwnProperty.call(state, p)) state[p] = init(); + + var args = parseFunctionArguments(argc, argv); + var mergedArgs = [state[p]].concat(args); + try { + state[p] = step.apply(null, mergedArgs); + } catch (error) { + delete state[p]; + sqlite3_result_error(cx, error, -1); + } + } + + function wrapped_finalize(cx) { + var result; + var p = sqlite3_aggregate_context(cx, 1); + try { + result = finalize(state[p]); + } catch (error) { + delete state[p]; + sqlite3_result_error(cx, error, -1); + return; + } + setFunctionResult(cx, result); + delete state[p]; + } + + if (Object.hasOwnProperty.call(this.functions, name)) { + removeFunction(this.functions[name]); + delete this.functions[name]; + } + var finalize_name = name + "__finalize"; + if (Object.hasOwnProperty.call(this.functions, finalize_name)) { + removeFunction(this.functions[finalize_name]); + delete this.functions[finalize_name]; + } + // The signature of the wrapped function is : + // void wrapped(sqlite3_context *db, int argc, sqlite3_value **argv) + var step_ptr = addFunction(wrapped_step, "viii"); + + // The signature of the wrapped function is : + // void wrapped(sqlite3_context *db) + var finalize_ptr = addFunction(wrapped_finalize, "vi"); + this.functions[name] = step_ptr; + this.functions[finalize_name] = finalize_ptr; + + // passing null to the sixth parameter defines this as an aggregate + // function + // + // > An aggregate SQL function requires an implementation of xStep and + // > xFinal and NULL pointer must be passed for xFunc. + // - http://www.sqlite.org/c3ref/create_function.html + this.handleError(sqlite3_create_function_v2( + this.db, + name, + step.length - 1, + SQLITE_UTF8, + 0, + 0, + step_ptr, + finalize_ptr, + 0 + )); + return this; + }; + // export Database to Module Module.Database = Database; }; diff --git a/src/exported_functions.json b/src/exported_functions.json index b93b07d2..324017ae 100644 --- a/src/exported_functions.json +++ b/src/exported_functions.json @@ -41,5 +41,6 @@ "_sqlite3_result_int", "_sqlite3_result_int64", "_sqlite3_result_error", +"_sqlite3_aggregate_context", "_RegisterExtensionFunctions" ] diff --git a/test/test_aggregate_functions.js b/test/test_aggregate_functions.js new file mode 100644 index 00000000..d28c775f --- /dev/null +++ b/test/test_aggregate_functions.js @@ -0,0 +1,132 @@ +exports.test = function (SQL, assert) { + function assertFloat(got, expected, message="", sigma=0.001) { + assert.ok(got > expected - sigma && got < expected + sigma, message); + } + + var db = new SQL.Database(); + + db.create_aggregate("sum", { + step: function (state, value) { return (state || 0) + value; }, + }); + + db.exec("CREATE TABLE test (col);"); + db.exec("INSERT INTO test VALUES (1), (2), (3), (null);"); + var result = db.exec("SELECT sum(col) FROM test;"); + assert.equal(result[0].values[0][0], 6, "Simple aggregate function."); + + db.create_aggregate("percentile", { + init: function() { return { vals: [], pctile: null }}, + step: function (state, value, pctile) { + var typ = typeof value; + if (typ == "number" || typ == "bigint") { // ignore nulls + state.pctile = pctile; + state.vals.push(value); + } + return state; + }, + finalize: function (state) { + return percentile(state.vals, state.pctile); + } + }); + result = db.exec("SELECT percentile(col, 80) FROM test;"); + assertFloat(result[0].values[0][0], 2.6, "Aggregate function with two args"); + + db.create_aggregate("json_agg", { + init: () => [], + step: (state, val) => [...state, val], + finalize: (state) => JSON.stringify(state), + }); + + db.exec("CREATE TABLE test2 (col, col2);"); + db.exec("INSERT INTO test2 values ('four score', 12), ('and seven', 7), ('years ago', 1);"); + result = db.exec("SELECT json_agg(col) FROM test2;"); + assert.deepEqual( + JSON.parse(result[0].values[0]), + ["four score", "and seven", "years ago"], + "Aggregate function that returns JSON" + ); + + result = db.exec("SELECT json_agg(col), json_agg(col2) FROM test2;"); + assert.deepEqual( + result[0].values[0].map(JSON.parse), + [["four score", "and seven", "years ago"], [12, 7, 1]], + "Multiple aggregations at once" + ); + + db.create_aggregate("is_even", { + init: () => true, + step: state => !state + }); + result = db.exec("SELECT is_even() FROM (VALUES (1),(2),(0));"); + assert.deepEqual( + result[0].values[0][0], + 0, // this gets convert from "false" to an int by sqlite + "Aggregate functions respect falsy values" + ); + + db.create_aggregate("sum_non_zero", { + init: () => 0, + step: (state, value) => { + if (!value) throw "bananas"; + return state + value + } + }); + assert.throws( + () => db.exec("SELECT sum_non_zero(column1) FROM (VALUES (1),(2),(0));"), + "Error: bananas", + "Handles exception in a step function" + ); + assert.deepEqual( + db.exec("SELECT sum_non_zero(column1) FROM (VALUES (1),(2));")[0].values[0][0], + 3, + "Aggregate functions work after an exception has been thrown in step" + ); + + db.create_aggregate("throws_finalize", { + step: (state, value) => (state || 0) + value, + finalize: (state) => { + if (!state) throw "shoes" + return state; + } + }); + assert.throws( + () => db.exec("SELECT throws_finalize(column1) FROM (VALUES (0));"), + "Error: shoes", + "Handles exception in a finalize function" + ); + assert.deepEqual( + db.exec("SELECT throws_finalize(column1) FROM (VALUES (1),(2));")[0].values[0][0], + 3, + "Aggregate functions work after an exception has been thrown in finalize" + ); +} + +// helper function to calculate a percentile from an array. Will modify the +// array in-place. +function percentile(arr, p) { + arr.sort(); + const pos = (arr.length - 1) * (p / 100); + const base = Math.floor(pos); + const rest = pos - base; + if (arr[base + 1] !== undefined) { + return arr[base] + rest * (arr[base + 1] - arr[base]); + } else { + return arr[base]; + } +}; + +if (module == require.main) { + const target_file = process.argv[2]; + const sql_loader = require('./load_sql_lib'); + sql_loader(target_file).then((sql)=>{ + require('test').run({ + 'test functions': function(assert, done){ + exports.test(sql, assert, done); + } + }); + }) + .catch((e)=>{ + console.error(e); + assert.fail(e); + }); +} diff --git a/test/test_aggregate_redefinition.js b/test/test_aggregate_redefinition.js new file mode 100644 index 00000000..05779720 --- /dev/null +++ b/test/test_aggregate_redefinition.js @@ -0,0 +1,80 @@ +exports.test = function(sql, assert) { + // Test 1: Create a database, Register single function, close database, repeat 1000 times + for (var i = 1; i <= 1000; i++) + { + let lastStep = i == 1000; + let db = new sql.Database(); + try { + db.create_aggregate("TestFunction"+i, {step: (state, value) => i}) + } catch(e) { + assert.ok( + false, + "Test 1: Recreate database "+i+"th times and register aggregate" + +" function failed with exception:"+e + ); + db.close(); + break; + } + var result = db.exec("SELECT TestFunction"+i+"(1)"); + var result_str = result[0]["values"][0][0]; + if(result_str != i || lastStep) + { + assert.equal( + result_str, + i, + "Test 1: Recreate database "+i+"th times and register aggregate function" + ); + db.close(); + break; + } + db.close(); + } + + // Test 2: Create a database, Register same function 1000 times, close database + { + let db = new sql.Database(); + for (var i = 1; i <= 1000; i++) + { + let lastStep = i == 1000; + try { + db.create_aggregate("TestFunction", {step: (state, value) => i}) + } catch(e) { + assert.ok( + false, + "Test 2: Reregister aggregate function "+i+"th times failed with" + +" exception:"+e + ); + break; + } + var result = db.exec("SELECT TestFunction(1)"); + var result_str = result[0]["values"][0][0]; + if(result_str != i || lastStep) + { + assert.equal( + result_str, + i, + "Test 2: Reregister function "+i+"th times" + ); + break; + } + } + db.close(); + } +}; + + +if (module == require.main) { + const target_file = process.argv[2]; + const sql_loader = require('./load_sql_lib'); + sql_loader(target_file).then((sql)=>{ + require('test').run({ + 'test creating multiple functions': function(assert){ + exports.test(sql, assert); + } + }); + }) + .catch((e)=>{ + console.error(e); + assert.fail(e); + }); +}