Skip to content

Enable custom aggregate functions (take 2) #529

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Sep 8, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
24a6a12
initial commit.
Jul 3, 2020
fad9ba6
documentation
llimllib Sep 5, 2022
d191caa
remove no-longer-valid type
llimllib Sep 5, 2022
0d937a7
close over state initialization for performance
llimllib Sep 5, 2022
8fd3f8a
link documentation in comment
llimllib Sep 5, 2022
ba733ba
more testing
llimllib Sep 5, 2022
9e6b462
run tests if they're main
llimllib Sep 6, 2022
573afa7
accept a single arg
llimllib Sep 6, 2022
a3abdcb
this kind of works but I'm abandoning this branch
llimllib Sep 6, 2022
9daf01f
a middle road sqlite3_agg_context solution
llimllib Sep 6, 2022
ec5c72b
try out auto-updating state
llimllib Sep 6, 2022
a927950
improve quantile test, add multiple agg test
llimllib Sep 6, 2022
e643bd9
add a null to the test
llimllib Sep 6, 2022
2cbdb0e
acorn fails to parse ||=, whatever
llimllib Sep 6, 2022
b9ccd48
make eslint happy
llimllib Sep 6, 2022
ac548d4
make initial_value an argument
llimllib Sep 7, 2022
bf22aa1
test step and finalize exceptions
llimllib Sep 7, 2022
55858e9
add memory leak test
llimllib Sep 7, 2022
9a0c185
update docs to current interface
llimllib Sep 7, 2022
2445107
delete state in exception handlers
llimllib Sep 7, 2022
5b62cf6
remove null state
llimllib Sep 7, 2022
062f147
return init function and document object
llimllib Sep 7, 2022
7aff1ae
more tests and update back to init function
llimllib Sep 7, 2022
67f85e5
update redefinition test for new interface
llimllib Sep 7, 2022
b8692d4
update README to match fixed signature
llimllib Sep 7, 2022
b41e5cf
more consistent test formatting
llimllib Sep 7, 2022
d257bba
Update README.md
llimllib Sep 7, 2022
e82c286
clarify what exactly the result will contain
llimllib Sep 7, 2022
b65457c
Update README.md
lovasoa Sep 7, 2022
8d2c2e0
Update README.md
lovasoa Sep 7, 2022
f8f4a7c
Update README.md
lovasoa Sep 7, 2022
bdaa1b6
Update README.md
lovasoa Sep 7, 2022
e86d7ff
Update README.md
lovasoa Sep 7, 2022
423fc36
Improve documentation and type annotations
lovasoa Sep 8, 2022
f8e7bd3
ignore documentation in eslintrc
lovasoa Sep 8, 2022
799ebcd
reduce code size
lovasoa Sep 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,45 @@ db.create_function("add_js", add);
// Run a query in which the function is used
db.run("INSERT INTO hello VALUES (add_js(7, 3), add_js('Hello ', 'world'));"); // Inserts 10 and 'Hello world'

// You can create aggregation functions, by passing a name and three functions
// to `db.create_aggregate`:
//
// - an init function. This function receives no arguments and will be called
// when the aggregate begins. Returns a state object that will be passed to the
// other two functions if you need to track state.
// - a step function. This function receives as a first argument the state
// object created in init, as well as the values received in the step. It
// will be called on every value to be aggregated. Does not return anything.
// - a finalizer. This function receives one argument, the state object, and
// returns the final value of the aggregate
//
// Here is an example aggregation function, `json_agg`, which will collect all
// input values and return them as a JSON array:
db.create_aggregate(
"json_agg",
function() {
// This is the init function, which returns a state object:
return {
values: []
};
},
function(state, val) {
// This is the step function, which will store each value it receives in
// the values array of the state object
state.values.push(val);
},
function(state) {
// This is the finalize function, which converts the received values from
// the state object into a JSON array and returns that
return JSON.stringify(state.values);
}
);

// Now if you run this query:
var result = db.exec("SELECT json_agg(somecol) FROM atable;");

// result will be a json-encoded string representing each value of `somecol` in `atable`.

// Export the database to an Uint8Array containing the SQLite database file
const binaryArray = db.export();
```
Expand Down
220 changes: 158 additions & 62 deletions src/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -1131,81 +1131,90 @@ Module["onRuntimeInitialized"] = function onRuntimeInitialized() {
return sqlite3_changes(this.db);
};

/** Register a custom function with SQLite
@example Register a simple function
db.create_function("addOne", function (x) {return x+1;})
db.exec("SELECT addOne(1)") // = 2
var extract_blob = function extract_blob(ptr) {
var size = sqlite3_value_bytes(ptr);
var blob_ptr = sqlite3_value_blob(ptr);
var blob_arg = new Uint8Array(size);
for (var j = 0; j < size; j += 1) {
blob_arg[j] = HEAP8[blob_ptr + j];
}
return blob_arg;
};

@param {string} name the name of the function as referenced in
SQL statements.
@param {function} func the actual function to be executed.
@return {Database} The database object. Useful for method chaining
*/
var parseFunctionArguments = function parseFunctionArguments(argc, argv) {
var args = [];
for (var i = 0; i < argc; i += 1) {
var value_ptr = getValue(argv + (4 * i), "i32");
var value_type = sqlite3_value_type(value_ptr);
var arg;
if (
value_type === SQLITE_INTEGER
|| value_type === SQLITE_FLOAT
) {
arg = sqlite3_value_double(value_ptr);
} else if (value_type === SQLITE_TEXT) {
arg = sqlite3_value_text(value_ptr);
} else if (value_type === SQLITE_BLOB) {
arg = extract_blob(value_ptr);
} else arg = null;
args.push(arg);
}
return args;
};
var setFunctionResult = function setFunctionResult(cx, result) {
switch (typeof result) {
case "boolean":
sqlite3_result_int(cx, result ? 1 : 0);
break;
case "number":
sqlite3_result_double(cx, result);
break;
case "string":
sqlite3_result_text(cx, result, -1, -1);
break;
case "object":
if (result === null) {
sqlite3_result_null(cx);
} else if (result.length != null) {
var blobptr = allocate(result, ALLOC_NORMAL);
sqlite3_result_blob(cx, blobptr, result.length, -1);
_free(blobptr);
} else {
sqlite3_result_error(cx, (
"Wrong API use : tried to return a value "
+ "of an unknown type (" + result + ")."
), -1);
}
break;
default:
sqlite3_result_null(cx);
}
};

/** Register a custom function with SQLite
@example Register a simple function
db.create_function("addOne", function (x) {return x+1;})
db.exec("SELECT addOne(1)") // = 2

@param {string} name the name of the function as referenced in
SQL statements.
@param {function} func the actual function to be executed.
@return {Database} The database object. Useful for method chaining
*/
Database.prototype["create_function"] = function create_function(
name,
func
) {
function wrapped_func(cx, argc, argv) {
var args = parseFunctionArguments(argc, argv);
var result;
function extract_blob(ptr) {
var size = sqlite3_value_bytes(ptr);
var blob_ptr = sqlite3_value_blob(ptr);
var blob_arg = new Uint8Array(size);
for (var j = 0; j < size; j += 1) {
blob_arg[j] = HEAP8[blob_ptr + j];
}
return blob_arg;
}
var args = [];
for (var i = 0; i < argc; i += 1) {
var value_ptr = getValue(argv + (4 * i), "i32");
var value_type = sqlite3_value_type(value_ptr);
var arg;
if (
value_type === SQLITE_INTEGER
|| value_type === SQLITE_FLOAT
) {
arg = sqlite3_value_double(value_ptr);
} else if (value_type === SQLITE_TEXT) {
arg = sqlite3_value_text(value_ptr);
} else if (value_type === SQLITE_BLOB) {
arg = extract_blob(value_ptr);
} else arg = null;
args.push(arg);
}
try {
result = func.apply(null, args);
} catch (error) {
sqlite3_result_error(cx, error, -1);
return;
}
switch (typeof result) {
case "boolean":
sqlite3_result_int(cx, result ? 1 : 0);
break;
case "number":
sqlite3_result_double(cx, result);
break;
case "string":
sqlite3_result_text(cx, result, -1, -1);
break;
case "object":
if (result === null) {
sqlite3_result_null(cx);
} else if (result.length != null) {
var blobptr = allocate(result, ALLOC_NORMAL);
sqlite3_result_blob(cx, blobptr, result.length, -1);
_free(blobptr);
} else {
sqlite3_result_error(cx, (
"Wrong API use : tried to return a value "
+ "of an unknown type (" + result + ")."
), -1);
}
break;
default:
sqlite3_result_null(cx);
}
setFunctionResult(cx, result);
}
if (Object.prototype.hasOwnProperty.call(this.functions, name)) {
removeFunction(this.functions[name]);
Expand All @@ -1229,6 +1238,93 @@ Module["onRuntimeInitialized"] = function onRuntimeInitialized() {
return this;
};

/** Register a custom aggregate with SQLite
@example Register a aggregate function
db.create_aggregate(
"js_sum",
function () { return { sum: 0 }; },
function (state, value) { state.sum+=value; },
function (state) { return state.sum; }
);
db.exec("CREATE TABLE test (col); INSERT INTO test VALUES (1), (2)");
db.exec("SELECT js_sum(col) FROM test"); // = 3

@param {string} name the name of the aggregate as referenced in
SQL statements.
@param {function} init the actual function to be executed on initialize.
@param {function} step the actual function to be executed on step by step.
@param {function} finalize the actual function to be executed on finalize.
@return {Database} The database object. Useful for method chaining
*/
Database.prototype["create_aggregate"] = function create_aggregate(
name,
init,
step,
finalize
) {
var state = init();
function wrapped_step(cx, argc, argv) {
var args = parseFunctionArguments(argc, argv);
var mergedArgs = [state].concat(args);
try {
step.apply(null, mergedArgs);
} catch (error) {
sqlite3_result_error(cx, error, -1);
}
}
function wrapped_finalize(cx) {
var result;
try {
result = finalize.apply(null, [state]);
} catch (error) {
sqlite3_result_error(cx, error, -1);
state = null;
return;
}
setFunctionResult(cx, result);
state = null;
}

if (Object.prototype.hasOwnProperty.call(this.functions, name)) {
removeFunction(this.functions[name]);
delete this.functions[name];
}
if (Object.prototype.hasOwnProperty.call(
this.functions,
name + "__finalize"
)) {
removeFunction(this.functions[name + "__finalize"]);
delete this.functions[name + "__finalize"];
}
// The signature of the wrapped function is :
// void wrapped(sqlite3_context *db, int argc, sqlite3_value **argv)
var step_ptr = addFunction(wrapped_step, "viii");
// The signature of the wrapped function is :
// void wrapped(sqlite3_context *db)
var finalize_ptr = addFunction(wrapped_finalize, "vi");
this.functions[name] = step_ptr;
this.functions[name + "__finalize"] = finalize_ptr;

// passing null to the sixth parameter defines this as an aggregate
// function
//
// > An aggregate SQL function requires an implementation of xStep and
// > xFinal and NULL pointer must be passed for xFunc.
// - http://www.sqlite.org/c3ref/create_function.html
this.handleError(sqlite3_create_function_v2(
this.db,
name,
step.length - 1,
SQLITE_UTF8,
0,
0,
step_ptr,
finalize_ptr,
0
));
return this;
};

// export Database to Module
Module.Database = Database;
};
55 changes: 55 additions & 0 deletions test/test_aggregate_functions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
exports.test = function (SQL, assert) {
var db = new SQL.Database();

db.create_aggregate(
"sum",
function () { return { sum: 0 }; },
function (state, value) { state.sum += value; },
function (state) { return state.sum; }
);

db.exec("CREATE TABLE test (col);");
db.exec("INSERT INTO test VALUES (1), (2), (3);");
var result = db.exec("SELECT sum(col) FROM test;");
assert.equal(result[0].values[0][0], 6, "Simple aggregate function.");

db.create_aggregate(
"percentile",
function () { return { vals: [], pctile: null }; }, // init
function (state, value, pctile) {
state.vals.push(value);
},
function (state) {
return percentile(state.vals, state.pctile);
}
);
var result = db.exec("SELECT percentile(col, 20) FROM test;");
assert.equal(result[0].values[0][0], 1, "Aggregate function with two args");

db.create_aggregate(
"json_agg",
function() { return { vals: [] }; },
function(state, val) { state.vals.push(val); },
function(state) { return JSON.stringify(state.vals); }
);

db.exec("CREATE TABLE test2 (col, col2);");
db.exec("INSERT INTO test2 values ('four score', 12), ('and seven', 7), ('years ago', 1);");
var result = db.exec("SELECT json_agg(col) FROM test2;");
assert.deepEqual(JSON.parse(result[0].values[0]), ["four score", "and seven", "years ago"], "Aggregate function that returns JSON");
}

// helper function to calculate a percentile from an array. Will modify the
// array in-place.
function percentile(arr, p) {
arr.sort();
const pos = (arr.length - 1) * (p / 100);
const base = Math.floor(pos);
const rest = pos - base;
if (arr[base + 1] !== undefined) {
return arr[base] + rest * (arr[base + 1] - arr[base]);
} else {
return arr[base];
}
};