Skip to content

Commit 6bec33b

Browse files
authored
feat(anonymizer): base on pg_anonymizer, use detect to check columns. (#18)
1 parent 7374c20 commit 6bec33b

File tree

4 files changed

+113
-0
lines changed

4 files changed

+113
-0
lines changed

dblinter/default_config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,14 @@ table:
179179
message: "Uppercase used on table {0}.{1}.{2}."
180180
fixes:
181181
- Do not use uppercase for any database objects
182+
- name: TableWithSensibleColumn
183+
ruleid: T012
184+
enabled: True
185+
context:
186+
desc: Base on the extension anon (https://postgresql-anonymizer.readthedocs.io/en/stable/detection), show sensitive column.
187+
message: "{0} have column {1} (category {2}) that can be consider has sensitive. It should be masked for non data-operator users."
188+
fixes:
189+
- Install extension anon, and create some masking rules on.
182190
schema:
183191
- name: SchemaWithDefaultRoleNotGranted
184192
ruleid: S001
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import logging
2+
3+
from dblinter.database_connection import DatabaseConnection
4+
5+
LOGGER = logging.getLogger("dblinter")
6+
7+
8+
def table_with_sensible_column(
9+
self, db: DatabaseConnection, _, context, table, sarif_document
10+
):
11+
LOGGER.debug(
12+
"table_with_sensible_column for %s.%s in db %s", table[0], table[1], db.database
13+
)
14+
CHECK_EXTENSION = "select count(*) as nb from pg_extension where extname='anon'"
15+
anon = db.query(CHECK_EXTENSION)[0][0]
16+
if anon == 0:
17+
LOGGER.info(
18+
"TableWithSensibleColumn is enabled, but anon extension not found. in db %s. see https://postgresql-anonymizer.readthedocs.io to install",
19+
db.database,
20+
)
21+
return
22+
SENSITIVE_COLS = f"""with coltable as (SELECT column_name,
23+
identifiers_category from
24+
anon.detect('en_US')
25+
join pg_class c on oid=table_name
26+
where c.relname='{table[1]}'
27+
union
28+
SELECT column_name,
29+
identifiers_category from
30+
anon.detect('fr_FR')
31+
join pg_class c on oid=table_name
32+
where c.relname='{table[1]}')
33+
select distinct column_name,identifiers_category from coltable
34+
"""
35+
36+
uri = f"{db.database}.{table[0]}.{table[1]}"
37+
sensitive_cols = db.query(SENSITIVE_COLS)
38+
if sensitive_cols:
39+
for elt in sensitive_cols:
40+
message_args = (uri, elt[0], elt[1])
41+
sarif_document.add_check(
42+
self.get_ruleid_from_function_name(), message_args, uri, context
43+
)

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from testcontainers.core.waiting_utils import wait_container_is_ready
55
from testcontainers.postgres import PostgresContainer
66

7+
# PG_IMAGE = "registry.gitlab.com/dalibo/postgresql_anonymizer:latest"
78
PG_IMAGE = "postgres:14"
89
PG_PORT = 5432
910
PG_USER = "postgres"
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from dblinter.configuration_model import Context
2+
from dblinter.database_connection import DatabaseConnection
3+
from dblinter.function_library import FunctionLibrary
4+
from dblinter.sarif_document import SarifDocument
5+
6+
7+
def test_table_with_sensitive_column(postgres_instance_args) -> None:
8+
args = postgres_instance_args
9+
db = DatabaseConnection(args)
10+
CHECK_EXTENSION = "select count(*) as nb from pg_extension where extname='anon'"
11+
anon = db.query(CHECK_EXTENSION)[0][0]
12+
if anon == 0:
13+
assert True
14+
return
15+
context = Context(
16+
desc="Base on the extension anon (https://postgresql-anonymizer.readthedocs.io/en/stable/detection), show sensitive column.",
17+
fixes=[
18+
"Install extension anon, and create some masking rules on.",
19+
],
20+
message="{0} have column {1} (category {2}) that can be consider has sensitive. It should be masked for non data-operator users.",
21+
)
22+
function_library = FunctionLibrary()
23+
db.query("select anon.init()")
24+
db.query("CREATE TABLE test (id integer, creditcard text)")
25+
sarif_document = SarifDocument()
26+
function_library.get_function_by_function_name("table_with_sensible_column")(
27+
function_library, db, [], context, ("public", "test"), sarif_document
28+
)
29+
assert (
30+
sarif_document.sarif_doc.runs[0].results[0].message.text
31+
== "postgres.public.test have column creditcard (category creditcard) that can be consider has sensitive. It should be masked for non data-operator users."
32+
)
33+
assert (
34+
sarif_document.sarif_doc.runs[0].results[1].message.text
35+
== "postgres.public.test have column id (category account_id) that can be consider has sensitive. It should be masked for non data-operator users."
36+
)
37+
38+
39+
def test_table_without_sensitive_column(postgres_instance_args) -> None:
40+
args = postgres_instance_args
41+
db = DatabaseConnection(args)
42+
CHECK_EXTENSION = "select count(*) as nb from pg_extension where extname='anon'"
43+
anon = db.query(CHECK_EXTENSION)[0][0]
44+
if anon == 0:
45+
assert True
46+
return
47+
context = Context(
48+
desc="Base on the extension anon (https://postgresql-anonymizer.readthedocs.io/en/stable/detection), show sensitive column.",
49+
fixes=[
50+
"Install extension anon, and create some masking rules on.",
51+
],
52+
message="{0} have column {1} (category {2}) that can be consider has sensitive. It should be masked for non data-operator users.",
53+
)
54+
function_library = FunctionLibrary()
55+
db.query("select anon.init()")
56+
db.query("CREATE TABLE test (test_id integer, description text)")
57+
sarif_document = SarifDocument()
58+
function_library.get_function_by_function_name("table_with_sensible_column")(
59+
function_library, db, [], context, ("public", "test"), sarif_document
60+
)
61+
assert sarif_document.sarif_doc.runs[0].results == []

0 commit comments

Comments
 (0)