From 5e47dcb885289937b54ab13928fb7db41b7de95c Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 27 Aug 2024 09:15:54 +1000 Subject: [PATCH 1/6] Pull out new _extract_tokens for easier testing --- casbin/persist/adapter.py | 17 ++++++++--- tests/persist/test_adapter.py | 53 +++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 tests/persist/test_adapter.py diff --git a/casbin/persist/adapter.py b/casbin/persist/adapter.py index 2c9b9a38..f4c69bdc 100644 --- a/casbin/persist/adapter.py +++ b/casbin/persist/adapter.py @@ -13,14 +13,14 @@ # limitations under the License. -def load_policy_line(line, model): - """loads a text line as a policy rule to model.""" +def _extract_tokens(line): + """Return the list of 'tokens' from the line, or None if this line has none""" if line == "": - return + return None if line[:1] == "#": - return + return None stack = [] tokens = [] @@ -40,6 +40,15 @@ def load_policy_line(line, model): tokens[-1] += c tokens = [x.strip() for x in tokens] + return tokens + + +def load_policy_line(line, model): + """loads a text line as a policy rule to model.""" + + tokens = _extract_tokens(line) + if tokens is None: + return key = tokens[0] sec = key[0] diff --git a/tests/persist/test_adapter.py b/tests/persist/test_adapter.py new file mode 100644 index 00000000..c6028544 --- /dev/null +++ b/tests/persist/test_adapter.py @@ -0,0 +1,53 @@ +from casbin.persist.adapter import _extract_tokens +from tests import TestCaseBase + + +class TestExtractTokens(TestCaseBase): + def test_ignore_lines(self): + self.assertIsNone(_extract_tokens("")) # empty + self.assertIsNone(_extract_tokens("# comment")) + + def test_simple_lines(self): + # split on top-level commas, strip whitespace from start and end + self.assertEqual(_extract_tokens("one"), ["one"]) + self.assertEqual(_extract_tokens("one,two"), ["one", "two"]) + self.assertEqual(_extract_tokens(" ignore \t,\t external, spaces "), ["ignore", "external", "spaces"]) + + self.assertEqual(_extract_tokens("internal spaces preserved"), ["internal spaces preserved"]) + + def test_nested_lines(self): + # basic nesting within a single token + self.assertEqual( + _extract_tokens("outside1()"), + ["outside1()"], + ) + self.assertEqual( + _extract_tokens("outside1(inside1())"), + ["outside1(inside1())"], + ) + + # split on top-level commas, but not on internal ones + self.assertEqual( + _extract_tokens("outside1(inside1(), inside2())"), + ["outside1(inside1(), inside2())"], + ) + self.assertEqual( + _extract_tokens("outside1(inside1(), inside2(inside3(), inside4()))"), + ["outside1(inside1(), inside2(inside3(), inside4()))"], + ) + self.assertEqual( + _extract_tokens("outside1(inside1(), inside2()), outside2(inside3(), inside4())"), + ["outside1(inside1(), inside2())", "outside2(inside3(), inside4())"], + ) + + # different delimiters + self.assertEqual( + _extract_tokens( + "all_square[inside1[], inside2[]],square_and_parens[inside1(), inside2()],parens_and_square(inside1[], inside2[])" + ), + [ + "all_square[inside1[], inside2[]]", + "square_and_parens[inside1(), inside2()]", + "parens_and_square(inside1[], inside2[])", + ], + ) From f063e784b69b7909896ccb78f27cb5e2ffd9b027 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 27 Aug 2024 09:26:29 +1000 Subject: [PATCH 2/6] [0002_original] Add a benchmark too: -------------------------------------------------------------------------------------------------- benchmark: 4 tests -------------------------------------------------------------------------------------------------- Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS (Kops/s) Rounds Iterations ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ test_benchmark_extract_tokens_short_simple 1.4580 (1.0) 58.3340 (1.0) 1.5731 (1.0) 0.5280 (1.0) 1.5420 (1.0) 0.0411 (1.0) 53;455 635.6819 (1.0) 22305 1 test_benchmark_extract_tokens_short_nested 3.0829 (2.11) 13,417.2500 (230.01) 3.8216 (2.43) 49.5247 (93.80) 3.2500 (2.11) 0.2091 (5.09) 60;3604 261.6736 (0.41) 116509 1 test_benchmark_extract_tokens_long_simple 13.5830 (9.32) 95.4170 (1.64) 14.0610 (8.94) 1.0361 (1.96) 13.7920 (8.94) 0.2080 (5.06) 3089;10664 71.1189 (0.11) 57555 1 test_benchmark_extract_tokens_long_nested 26.5830 (18.23) 71.4579 (1.22) 27.2121 (17.30) 0.8780 (1.66) 27.1250 (17.59) 0.0841 (2.05) 566;2007 36.7484 (0.06) 30457 1 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ --- .github/workflows/build.yml | 1 + tests/benchmarks/benchmark_adapter.py | 30 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 tests/benchmarks/benchmark_adapter.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c3e63bad..517a6e00 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,6 +39,7 @@ jobs: tests/benchmarks/benchmark_model.py tests/benchmarks/benchmark_management_api.py tests/benchmarks/benchmark_role_manager.py + tests/benchmarks/benchmark_adapter.py - name: Upload coverage data to coveralls.io run: coveralls --service=github diff --git a/tests/benchmarks/benchmark_adapter.py b/tests/benchmarks/benchmark_adapter.py new file mode 100644 index 00000000..3a094662 --- /dev/null +++ b/tests/benchmarks/benchmark_adapter.py @@ -0,0 +1,30 @@ +from casbin.persist.adapter import _extract_tokens + + +def _benchmark_extract_tokens(benchmark, line): + @benchmark + def run_benchmark(): + _extract_tokens(line) + + +def test_benchmark_extract_tokens_short_simple(benchmark): + _benchmark_extract_tokens(benchmark, "abc,def,ghi") + + +def test_benchmark_extract_tokens_long_simple(benchmark): + # fixed UUIDs for length and to be similar to "real world" usage of UUIDs + _benchmark_extract_tokens( + benchmark, + "00000000-0000-0000-0000-000000000000,00000000-0000-0000-0000-000000000001,00000000-0000-0000-0000-000000000002", + ) + + +def test_benchmark_extract_tokens_short_nested(benchmark): + _benchmark_extract_tokens(benchmark, "abc(def,ghi),jkl(mno,pqr)") + + +def test_benchmark_extract_tokens_long_nested(benchmark): + _benchmark_extract_tokens( + benchmark, + "00000000-0000-0000-0000-000000000000(00000000-0000-0000-0000-000000000001,00000000-0000-0000-0000-000000000002),00000000-0000-0000-0000-000000000003(00000000-0000-0000-0000-000000000004,00000000-0000-0000-0000-000000000005)", + ) From 6283d4fdcc6c001e5a6c493fce2c1e9df5165d41 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 27 Aug 2024 09:44:11 +1000 Subject: [PATCH 3/6] [0003_finditer] re.finditer based solution -------------------------------------------------------------------------------------------------------- benchmark: 4 tests ------------------------------------------------------------------------------------------------------- Name (time in ns) Min Max Mean StdDev Median IQR Outliers OPS (Kops/s) Rounds Iterations ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- test_benchmark_extract_tokens_short_simple 999.8912 (1.0) 18,417.0203 (1.0) 1,123.3050 (1.0) 207.9197 (1.0) 1,124.9213 (1.0) 42.0259 (1.0) 97;670 890.2302 (1.0) 26667 1 test_benchmark_extract_tokens_long_simple 1,415.9596 (1.42) 45,624.9109 (2.48) 1,562.9986 (1.39) 223.0803 (1.07) 1,542.0374 (1.37) 42.0259 (1.0) 620;6094 639.7958 (0.72) 196733 1 test_benchmark_extract_tokens_short_nested 1,749.9551 (1.75) 31,417.0029 (1.71) 1,940.4944 (1.73) 295.1896 (1.42) 1,917.0111 (1.70) 42.0259 (1.0) 348;4660 515.3326 (0.58) 115943 1 test_benchmark_extract_tokens_long_nested 2,583.9545 (2.58) 33,583.0264 (1.82) 2,777.7344 (2.47) 224.8759 (1.08) 2,750.0791 (2.44) 42.0259 (1.0) 374;5402 360.0056 (0.40) 117082 1 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- --- casbin/persist/adapter.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/casbin/persist/adapter.py b/casbin/persist/adapter.py index f4c69bdc..fd3309f6 100644 --- a/casbin/persist/adapter.py +++ b/casbin/persist/adapter.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re + +_INTERESTING_TOKENS_RE = re.compile(r"[,\[\]\(\)]") + def _extract_tokens(line): """Return the list of 'tokens' from the line, or None if this line has none""" @@ -24,20 +28,29 @@ def _extract_tokens(line): stack = [] tokens = [] - for c in line: + + # The tokens are separated by commas, but we support nesting so a naive `line.split(",")` is + # wrong. E.g. `abc(def, ghi), jkl` is two tokens: `abc(def, ghi)` and `jkl`. We do this by + # iterating over the locations of any tokens of interest, and either: + # + # - [](): adjust the nesting depth + # - ,: slice the line to save the token, if the , is at the top-level, outside all []() + # + # `start_idx` represents the start of the current token, that we haven't seen a `,` for yet. + start_idx = 0 + for match in _INTERESTING_TOKENS_RE.finditer(line): + c = match.group() if c == "[" or c == "(": stack.append(c) - tokens[-1] += c elif c == "]" or c == ")": stack.pop() - tokens[-1] += c elif c == "," and len(stack) == 0: - tokens.append("") - else: - if len(tokens) == 0: - tokens.append(c) - else: - tokens[-1] += c + # we've found the end of a top level token so save that and start a new one + tokens.append(line[start_idx : match.start()]) + start_idx = match.end() + + # trailing token after the last , + tokens.append(line[start_idx:]) tokens = [x.strip() for x in tokens] return tokens From 18b884b0e40fdd786a2c421b214e6b83d1a0b2d4 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 27 Aug 2024 09:53:48 +1000 Subject: [PATCH 4/6] [0006_implicit_comma] `c == ","` is implied, omit it: ---------------------------------------------------------------------------------------------------------- benchmark: 4 tests --------------------------------------------------------------------------------------------------------- Name (time in ns) Min Max Mean StdDev Median IQR Outliers OPS (Kops/s) Rounds Iterations --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- test_benchmark_extract_tokens_short_simple 957.9817 (1.0) 20,832.9875 (1.0) 1,106.7959 (1.0) 167.6173 (1.0) 1,084.0595 (1.0) 42.0259 (1.0) 100;508 903.5089 (1.0) 29412 1 test_benchmark_extract_tokens_long_simple 1,374.9814 (1.44) 54,500.0657 (2.62) 1,526.7156 (1.38) 224.1555 (1.34) 1,500.0114 (1.38) 42.0259 (1.0) 511;7264 655.0008 (0.72) 187513 1 test_benchmark_extract_tokens_short_nested 1,707.9292 (1.78) 119,416.9745 (5.73) 1,897.0402 (1.71) 592.5052 (3.53) 1,874.9852 (1.73) 83.9354 (2.00) 458;6029 527.1370 (0.58) 166666 1 test_benchmark_extract_tokens_long_nested 2,500.0190 (2.61) 197,916.0588 (9.50) 2,773.7646 (2.51) 1,057.4522 (6.31) 2,707.9368 (2.50) 166.9396 (3.97) 509;2386 360.5209 (0.40) 116511 1 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- --- casbin/persist/adapter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/casbin/persist/adapter.py b/casbin/persist/adapter.py index fd3309f6..c56041d4 100644 --- a/casbin/persist/adapter.py +++ b/casbin/persist/adapter.py @@ -44,8 +44,9 @@ def _extract_tokens(line): stack.append(c) elif c == "]" or c == ")": stack.pop() - elif c == "," and len(stack) == 0: - # we've found the end of a top level token so save that and start a new one + elif len(stack) == 0: + # must be a comma outside of any nesting: we've found the end of a top level token so + # save that and start a new one tokens.append(line[start_idx : match.start()]) start_idx = match.end() From aa5d8386f977512ef21a37d072153e1f8f329d91 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 27 Aug 2024 09:56:30 +1000 Subject: [PATCH 5/6] [0007_no_len] use `not stack` instead of `len(stack) == 0` ------------------------------------------------------------------------------------------------------------ benchmark: 4 tests ------------------------------------------------------------------------------------------------------------ Name (time in ns) Min Max Mean StdDev Median IQR Outliers OPS (Kops/s) Rounds Iterations -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- test_benchmark_extract_tokens_short_simple 917.0035 (1.0) 30,542.0253 (1.0) 1,116.1783 (1.0) 703.1331 (3.24) 1,083.0117 (1.0) 41.9095 (1.0) 228;817 895.9142 (1.0) 35346 1 test_benchmark_extract_tokens_long_simple 1,332.9554 (1.45) 46,208.0352 (1.51) 1,483.3709 (1.33) 216.9831 (1.0) 1,459.0332 (1.35) 42.0259 (1.00) 820;5565 674.1402 (0.75) 183217 1 test_benchmark_extract_tokens_short_nested 1,624.9251 (1.77) 185,417.0114 (6.07) 1,924.2939 (1.72) 1,860.9511 (8.58) 1,792.0975 (1.65) 167.0560 (3.99) 722;1138 519.6711 (0.58) 89214 1 test_benchmark_extract_tokens_long_nested 2,417.0149 (2.64) 16,447,208.9382 (538.51) 3,852.3560 (3.45) 100,246.2894 (462.00) 2,707.9368 (2.50) 208.0342 (4.96) 20;836 259.5814 (0.29) 51283 1 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- --- casbin/persist/adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/casbin/persist/adapter.py b/casbin/persist/adapter.py index c56041d4..733889b4 100644 --- a/casbin/persist/adapter.py +++ b/casbin/persist/adapter.py @@ -44,7 +44,7 @@ def _extract_tokens(line): stack.append(c) elif c == "]" or c == ")": stack.pop() - elif len(stack) == 0: + elif not stack: # must be a comma outside of any nesting: we've found the end of a top level token so # save that and start a new one tokens.append(line[start_idx : match.start()]) From 6d67769a599dd26e2677d9496a6e9e39d7083a44 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 27 Aug 2024 09:57:27 +1000 Subject: [PATCH 6/6] [0008_inline_strip] Strip inline, don't do a second iteration --------------------------------------------------------------------------------------------------------- benchmark: 4 tests --------------------------------------------------------------------------------------------------------- Name (time in ns) Min Max Mean StdDev Median IQR Outliers OPS (Kops/s) Rounds Iterations -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- test_benchmark_extract_tokens_short_simple 790.9257 (1.0) 25,041.9835 (1.00) 965.2633 (1.0) 690.2278 (5.36) 917.0035 (1.0) 42.0259 (1.02) 261;825 1,035.9868 (1.0) 55814 1 test_benchmark_extract_tokens_long_simple 1,207.9254 (1.53) 568,790.9434 (22.75) 1,424.0680 (1.48) 1,607.7592 (12.49) 1,415.9596 (1.54) 83.0041 (2.02) 544;6540 702.2137 (0.68) 179118 1 test_benchmark_extract_tokens_short_nested 1,499.8950 (1.90) 24,999.9575 (1.0) 1,632.3808 (1.69) 136.4377 (1.06) 1,625.0415 (1.77) 41.0946 (1.0) 1290;4671 612.6022 (0.59) 146349 1 test_benchmark_extract_tokens_long_nested 2,291.0535 (2.90) 28,708.0184 (1.15) 2,454.3364 (2.54) 128.6998 (1.0) 2,457.9931 (2.68) 42.0259 (1.02) 1624;8542 407.4421 (0.39) 146349 1 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- --- casbin/persist/adapter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/casbin/persist/adapter.py b/casbin/persist/adapter.py index 733889b4..b5c457d5 100644 --- a/casbin/persist/adapter.py +++ b/casbin/persist/adapter.py @@ -47,13 +47,12 @@ def _extract_tokens(line): elif not stack: # must be a comma outside of any nesting: we've found the end of a top level token so # save that and start a new one - tokens.append(line[start_idx : match.start()]) + tokens.append(line[start_idx : match.start()].strip()) start_idx = match.end() # trailing token after the last , - tokens.append(line[start_idx:]) + tokens.append(line[start_idx:].strip()) - tokens = [x.strip() for x in tokens] return tokens