diff --git a/README.adoc b/README.adoc index 9887cfa..9b51c06 100644 --- a/README.adoc +++ b/README.adoc @@ -39,7 +39,11 @@ Verify that you can connect to the database by running $ psql -d trends -You can exit the `psql` shell using the `\q` command. +You can exit the `psql` shell using the `\q` command. Finally, before importing any data, run + + $ sudo -u postgres psql -d trends -f trends/bloom.sql + +to enable the bloom index extension (which must be done as a superuser). === Importing data diff --git a/test/conftest.py b/test/conftest.py index cfafecb..24f6112 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -3,10 +3,12 @@ from contextlib import contextmanager import logging +import os import pytest from testing.postgresql import Postgresql +import trends from trends.sql import db_connect from .create import create_test_db @@ -22,6 +24,10 @@ def database(request): postgres_args = Postgresql.DEFAULT_SETTINGS['postgres_args'] postgres_args += " -c full_page_writes=off" with Postgresql(postgres_args=postgres_args) as database: + with db_connect(database.url()) as c: + with open(f"{os.path.dirname(trends.__file__)}/bloom.sql") as bloom: + c.cursor().execute(bloom.read()) + with caplog_session(request) as caplog: with caplog.at_level(logging.ERROR): create_test_db(database.url()) diff --git a/test/create.py b/test/create.py index 8357d57..3ceacda 100755 --- a/test/create.py +++ b/test/create.py @@ -88,16 +88,15 @@ def create_test_db(url): with db_connect(url) as c: cur = c.cursor() cur.execute("ANALYZE;") - # A second time to test partitioning log_json - db_init(c) - cur.execute("REFRESH MATERIALIZED VIEW leaderboard_cube;") - cur.execute("REFRESH MATERIALIZED VIEW map_popularity;") - - with db_connect(url) as c: class args: since = datetime.fromtimestamp(0) trends.importer.link_demos.link_logs(args, c) trends.importer.link_matches.link_matches(args, c) + cur.execute("ANALYZE;") + # A second time to test partitioning log_json + db_init(c) + cur.execute("REFRESH MATERIALIZED VIEW leaderboard_cube;") + cur.execute("REFRESH MATERIALIZED VIEW map_popularity;") if __name__ == '__main__': if len(sys.argv) != 2: diff --git a/trends/bloom.sql b/trends/bloom.sql new file mode 100644 index 0000000..1fce69a --- /dev/null +++ b/trends/bloom.sql @@ -0,0 +1,9 @@ +CREATE EXTENSION IF NOT EXISTS bloom; + +DO $$ BEGIN + CREATE OPERATOR CLASS enum_ops DEFAULT FOR TYPE anyenum USING bloom AS + OPERATOR 1 =(anyenum, anyenum), + FUNCTION 1 hashenum(anyenum); +EXCEPTION WHEN duplicate_object THEN + NULL; +END $$; diff --git a/trends/migrations/leaderboard_bloom.sql b/trends/migrations/leaderboard_bloom.sql new file mode 100644 index 0000000..34186e0 --- /dev/null +++ b/trends/migrations/leaderboard_bloom.sql @@ -0,0 +1,73 @@ +BEGIN; + +DROP MATERIALIZED VIEW leaderboard_cube; +CREATE MATERIALIZED VIEW leaderboard_cube AS SELECT + playerid, + league, + formatid, + primary_classid AS classid, + mapid, + grouping(playerid, league, formatid, primary_classid, mapid) AS grouping, + sum(log.duration) AS duration, + sum((wins > losses)::INT) AS wins, + sum((wins = losses)::INT) AS ties, + sum((wins < losses)::INT) AS losses, + sum(kills) AS kills, + sum(deaths) AS deaths, + sum(assists) AS assists, + sum(dmg) AS dmg, + sum(dt) AS dt, + sum(shots) AS shots, + sum(hits) AS hits +FROM log_nodups AS log +JOIN player_stats USING (logid) +GROUP BY CUBE (playerid, league, formatid, classid, mapid) +ORDER BY mapid, classid, formatid, playerid, league; + +-- To help out the query planner +CREATE STATISTICS IF NOT EXISTS leaderboard_stats (dependencies, ndistinct, mcv) + ON league, formatid, classid, mapid, grouping + FROM leaderboard_cube; + +-- When we have no filters (or nothing better) +CREATE INDEX IF NOT EXISTS leaderboard_grouping ON leaderboard_cube (grouping); + +-- When we have a single filter +CREATE INDEX IF NOT EXISTS leaderboard_league ON leaderboard_cube (league) + WHERE playerid NOTNULL + AND league NOTNULL + AND formatid ISNULL + AND classid ISNULL + AND mapid ISNULL + AND grouping = b'01110'::INT; +CREATE INDEX IF NOT EXISTS leaderboard_format ON leaderboard_cube (formatid) + WHERE playerid NOTNULL + AND league ISNULL + AND formatid NOTNULL + AND classid ISNULL + AND mapid ISNULL + AND grouping = b'01110'::INT; +CREATE INDEX IF NOT EXISTS leaderboard_class ON leaderboard_cube (classid) + WHERE playerid NOTNULL + AND league ISNULL + AND formatid ISNULL + AND classid NOTNULL + AND mapid ISNULL + AND grouping = b'01110'::INT; +CREATE INDEX IF NOT EXISTS leaderboard_map ON leaderboard_cube (mapid) + WHERE playerid NOTNULL + AND league ISNULL + AND formatid ISNULL + AND classid ISNULL + AND mapid NOTNULL + AND grouping = b'01110'::INT; + +-- When we have multiple filters +CREATE INDEX IF NOT EXISTS leaderboard_bloom ON leaderboard_cube + USING bloom (grouping, mapid, classid, formatid, league) + WITH (col1=1, col2=1, col3=1, col4=1, col5=1) + WHERE playerid NOTNULL; + +COMMIT; + +ANALYZE VERBOSE leaderboard_cube; diff --git a/trends/schema.sql b/trends/schema.sql index edfcbc2..fd1a17b 100644 --- a/trends/schema.sql +++ b/trends/schema.sql @@ -695,6 +695,7 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS leaderboard_cube AS SELECT formatid, primary_classid AS classid, mapid, + grouping(playerid, league, formatid, primary_classid, mapid) AS grouping, sum(log.duration) AS duration, sum((wins > losses)::INT) AS wins, sum((wins = losses)::INT) AS ties, @@ -712,10 +713,49 @@ GROUP BY CUBE (playerid, league, formatid, classid, mapid) ORDER BY mapid, classid, formatid, playerid, league WITH NO DATA; -CREATE UNIQUE INDEX IF NOT EXISTS leaderboard_pkey - ON leaderboard_cube (mapid, classid, formatid, playerid, league); - -CREATE INDEX IF NOT EXISTS leaderboard_classid ON leaderboard_cube (classid, formatid); +-- To help out the query planner +CREATE STATISTICS IF NOT EXISTS leaderboard_stats (dependencies, ndistinct, mcv) + ON league, formatid, classid, mapid, grouping + FROM leaderboard_cube; + +-- When we have no filters (or nothing better) +CREATE INDEX IF NOT EXISTS leaderboard_grouping ON leaderboard_cube (grouping); + +-- When we have a single filter +CREATE INDEX IF NOT EXISTS leaderboard_league ON leaderboard_cube (league) + WHERE playerid NOTNULL + AND league NOTNULL + AND formatid ISNULL + AND classid ISNULL + AND mapid ISNULL + AND grouping = b'01110'::INT; +CREATE INDEX IF NOT EXISTS leaderboard_format ON leaderboard_cube (formatid) + WHERE playerid NOTNULL + AND league ISNULL + AND formatid NOTNULL + AND classid ISNULL + AND mapid ISNULL + AND grouping = b'01110'::INT; +CREATE INDEX IF NOT EXISTS leaderboard_class ON leaderboard_cube (classid) + WHERE playerid NOTNULL + AND league ISNULL + AND formatid ISNULL + AND classid NOTNULL + AND mapid ISNULL + AND grouping = b'01110'::INT; +CREATE INDEX IF NOT EXISTS leaderboard_map ON leaderboard_cube (mapid) + WHERE playerid NOTNULL + AND league ISNULL + AND formatid ISNULL + AND classid ISNULL + AND mapid NOTNULL + AND grouping = b'01110'::INT; + +-- When we have multiple filters +CREATE INDEX IF NOT EXISTS leaderboard_bloom ON leaderboard_cube + USING bloom (grouping, mapid, classid, formatid, league) + WITH (col1=1, col2=1, col3=1, col4=1, col5=1) + WHERE playerid NOTNULL; CREATE TABLE IF NOT EXISTS weapon ( weaponid SERIAL PRIMARY KEY, diff --git a/trends/site/root.py b/trends/site/root.py index 85299dc..b131e96 100644 --- a/trends/site/root.py +++ b/trends/site/root.py @@ -74,11 +74,16 @@ def leaderboard(): # Since we are using a cube, we need to explicitly select the NULL rows cube_clauses = [] - for (name, column) in (('class', 'classid'), ('format', 'formatid'), ('map', 'mapid')): + grouping = 0b00000 + for (name, column, group) in ( + ('map', 'mapid', 0b00001), + ('class', 'classid', 0b00010), + ('format', 'formatid', 0b00100), + ('league', 'league', 0b01000), + ): if not filters[name]: - cube_clauses.append("AND {} ISNULL".format(column)) - if not filters['league']: - cube_clauses.append("AND league ISNULL") + cube_clauses.append(f"AND {column} ISNULL") + grouping |= group cube_clauses = '\n'.join(cube_clauses) order, order_clause = get_order({ @@ -137,7 +142,7 @@ def leaderboard(): sum(dmg) * 1.0 / nullif(sum(dt), 0) AS dr, sum(hits) * 1.0 / nullif(sum(shots), 0) AS acc FROM leaderboard_cube - WHERE playerid NOTNULL + WHERE playerid NOTNULL AND grouping = %(grouping)s {} {} GROUP BY playerid @@ -147,7 +152,7 @@ def leaderboard(): LEFT JOIN player USING (playerid) LEFT JOIN name USING (nameid);""" .format(filter_clauses, cube_clauses, order_clause), - { **filters, 'limit': limit, 'offset': offset }) + { **filters, 'grouping': grouping, 'limit': limit, 'offset': offset }) resp = flask.make_response(flask.render_template("leaderboard.html", leaderboard=leaderboard.fetchall())) resp.cache_control.max_age = 3600