Skip to content

Commit 9daff10

Browse files
authored
Support minial combine pattern count setting (#17)
1 parent 9e92bc8 commit 9daff10

11 files changed

+47
-60
lines changed

demo/uri_drain.ini

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ depth = 4
3333
max_children = 100
3434
max_clusters = 1024
3535
extra_delimiters = ["/"]
36+
combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:8}
3637

3738
[PROFILING]
3839
enabled = True

models/Configuration.md

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Drain is the core algorithm of URI Drain.
3636
| max_clusters | int | DRAIN_MAX_CLUSTERS | 1024 | Max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. |
3737
| extra_delimiters | string | DRAIN_EXTRA_DELIMITERS | \["/"\] | The extra delimiters to split the sequence. |
3838
| analysis_min_url_count | int | DRAIN_ANALYSIS_MIN_URL_COUNT | 20 | The minimum number of unique URLs(each service) to trigger the analysis. |
39+
| combine_min_url_count | int | DRAIN_COMBINE_MIN_URL_COUNT | 8 | The minimum number of unique URLs(candidate of each service) to mask as variable URL(encase some similar URL are not restful, such as `/test/one` and `test/two`). |
3940

4041
### Profiling
4142

models/uri_drain/template_miner.py

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def __init__(self,
8484
max_children=self.config.drain_max_children,
8585
max_clusters=self.config.drain_max_clusters,
8686
extra_delimiters=self.config.drain_extra_delimiters,
87+
combine_min_url_count=self.config.drain_combine_min_url_count,
8788
profiler=self.profiler,
8889
param_str=param_str,
8990
# param_extra=param_extra, # MODIFIED:: for URI Drain < It is now a dict since contains multiple types

models/uri_drain/template_miner_config.py

+3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def __init__(self):
2828
self.drain_max_children = 100
2929
self.drain_max_clusters = None
3030
self.drain_analysis_min_url_count = 20
31+
self.drain_combine_min_url_count = 8
3132
self.masking_instructions = []
3233
self.mask_prefix = "<"
3334
self.mask_suffix = ">"
@@ -82,6 +83,8 @@ def load(self, config_filename: str):
8283
self.parameter_extraction_cache_capacity)
8384
self.drain_analysis_min_url_count = self.read_config_value(parser, section_drain, 'analysis_min_url_count', int,
8485
self.drain_analysis_min_url_count)
86+
self.drain_combine_min_url_count = self.read_config_value(parser, section_drain, 'combine_min_url_count', int,
87+
self.drain_combine_min_url_count)
8588

8689
masking_instructions = []
8790
masking_list = json.loads(masking_instructions_str)

models/uri_drain/uri_drain.py

+39-6
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@
1515

1616

1717
class LogCluster: # TODO Modified:: Changed to URICluster
18-
__slots__ = ["log_template_tokens", "cluster_id", "size"]
18+
__slots__ = ["log_template_tokens", "cluster_id", "size", "latest_urls"]
1919

20-
def __init__(self, log_template_tokens: list, cluster_id: int):
20+
def __init__(self, log_template_tokens: list, cluster_id: int, combine_min_url_count: int):
2121
self.log_template_tokens = tuple(log_template_tokens)
2222
self.cluster_id = cluster_id
2323
self.size = 1
24+
self.latest_urls = LRUCache(combine_min_url_count+1)
2425

2526
def get_template(self):
2627
# Modified:: Changed to join by slash instead of space for
@@ -47,6 +48,27 @@ def get_template(self):
4748
template = '/'.join(self.log_template_tokens)
4849
return f'/{template}'
4950

51+
def adding_url(self, url: str):
52+
if self.latest_urls.__contains__(url):
53+
return
54+
self.latest_urls[url] = True
55+
56+
def __str__(self):
57+
# return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
58+
return f"size={str(self.size).ljust(10)}: {self.get_template()}"
59+
60+
61+
class SingleURILogCluster:
62+
__slots__ = ["uri", "cluster_id", "size"]
63+
64+
def __init__(self, uri: str):
65+
self.uri = uri
66+
self.cluster_id = -1
67+
self.size = 1
68+
69+
def get_template(self):
70+
return self.uri
71+
5072
def __str__(self):
5173
# return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
5274
return f"size={str(self.size).ljust(10)}: {self.get_template()}"
@@ -83,6 +105,7 @@ def __init__(self,
83105
sim_th=0.4,
84106
max_children=100,
85107
max_clusters=None,
108+
combine_min_url_count=8,
86109
extra_delimiters=(),
87110
profiler: Profiler = NullProfiler(),
88111
param_str="{var}", # Modified:: required param_str
@@ -116,6 +139,7 @@ def __init__(self,
116139
self.max_node_depth = depth - 2 # max depth of a prefix tree node, starting from zero
117140
self.sim_th = sim_th
118141
self.max_children = max_children
142+
self.combine_min_url_count = combine_min_url_count
119143
self.root_node = Node()
120144
self.profiler = profiler
121145
self.extra_delimiters = extra_delimiters
@@ -133,7 +157,14 @@ def __init__(self,
133157

134158
@property
135159
def clusters(self):
136-
return self.id_to_cluster.values()
160+
result = []
161+
for cluster in self.id_to_cluster.values():
162+
if cluster.latest_urls and cluster.latest_urls.__len__() >= self.combine_min_url_count:
163+
result.append(cluster)
164+
continue
165+
for url, _ in cluster.latest_urls.items():
166+
result.append(SingleURILogCluster(url))
167+
return result
137168

138169
@property
139170
def cluster_patterns(self):
@@ -245,7 +276,7 @@ def add_log_message(self, content: str):
245276
self.profiler.start_section("create_cluster")
246277
self.clusters_counter += 1
247278
cluster_id = self.clusters_counter
248-
match_cluster = LogCluster(content_tokens, cluster_id)
279+
match_cluster = LogCluster(content_tokens, cluster_id, self.combine_min_url_count)
249280
self.id_to_cluster[cluster_id] = match_cluster
250281
self.add_seq_to_prefix_tree(self.root_node, match_cluster)
251282
update_type = "cluster_created"
@@ -261,7 +292,7 @@ def add_log_message(self, content: str):
261292
update_type = "rejected (create new)"
262293
self.clusters_counter += 1
263294
cluster_id = self.clusters_counter
264-
match_cluster = LogCluster(content_tokens, cluster_id)
295+
match_cluster = LogCluster(content_tokens, cluster_id, self.combine_min_url_count)
265296
self.id_to_cluster[cluster_id] = match_cluster
266297
self.add_seq_to_prefix_tree(self.root_node, match_cluster)
267298
match_cluster.size -= 1
@@ -278,6 +309,7 @@ def add_log_message(self, content: str):
278309
if self.profiler:
279310
self.profiler.end_section()
280311

312+
match_cluster.adding_url(content)
281313
return match_cluster, update_type
282314

283315
def get_total_cluster_size(self):
@@ -315,12 +347,13 @@ def __init__(self,
315347
sim_th=0.4,
316348
max_children=100,
317349
max_clusters=None,
350+
combine_min_url_count=8,
318351
extra_delimiters=(),
319352
profiler: Profiler = NullProfiler(),
320353
param_str="<*>",
321354
# param_extra=None, # Modified:: Added param_extra
322355
parametrize_numeric_tokens=True):
323-
super().__init__(depth, sim_th, max_children, max_clusters, extra_delimiters, profiler, param_str,
356+
super().__init__(depth, sim_th, max_children, max_clusters, combine_min_url_count, extra_delimiters, profiler, param_str,
324357
# param_extra,
325358
parametrize_numeric_tokens)
326359

servers/simple/uri_drain.ini

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ max_children = ${DRAIN_MAX_CHILDREN:100}
3535
max_clusters = ${DRAIN_MAX_CLUSTERS:1024}
3636
extra_delimiters = ${DRAIN_EXTRA_DELIMITERS:["/"]}
3737
analysis_min_url_count = ${DRAIN_ANALYSIS_MIN_URL_COUNT:20}
38+
combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:8}
3839

3940
[PROFILING]
4041
enabled = ${PROFILING_ENABLED:False}

test/e2e/expected/endpoint_counterexamples.yaml

+1-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,5 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
patterns:
16-
- "/api/v1/usernames/{var}"
17-
- "/api/v1/users/{var}"
15+
patterns: []
1816
version: "1"

test/e2e/expected/endpoint_hard.yaml

-18
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,14 @@
1313
# limitations under the License.
1414

1515
patterns:
16-
- /api-this-is-a-special-case/v99999999999999999/orders/delete/{var}
17-
- /api-this-is-a-special-case/v99999999999999999/orders/reorder/{var}
18-
- /api-this-is-a-special-case/v99999999999999999/orders/update/{var}
1916
- /api/v1/bills/{var}
2017
- /api/v1/companies/{var}
21-
- /api/v1/companies/{var}/employees/{var}/reviews/{var}
22-
- /api/v1/companies/{var}/tasks/{var}/assignees/{var}
2318
- /api/v1/projects/{var}
2419
- /api/v1/services/{var}
2520
- /api/v1/users/{var}/posts/{var}/comments
26-
- /api/v1/users/{var}/posts/{var}/comments/{var}
2721
- /api/v1/wallets/{var}
28-
- /api/v2/admin/users/{var}
2922
- /api/v2/courses/{var}/modules/{var}/lessons
3023
- /api/v2/customers/{var}
3124
- /api/v3/products/{var}/reviews/{var}/comments
32-
- /api/v3/providers/{var}
3325
- /api/v4/orders/{var}/items/{var}/tracking
34-
- /customer/{var}
35-
- /customer/{var}/order/{var}
36-
- /customer/{var}/profile/{var}/compare/{var}/profile/{var}
37-
- ABC/{var}
38-
- HikariCP/Connection/{var}
39-
- google.com/api/v1/users/{var}
40-
- http://www.google.com/api/v1/users/{var}
41-
- https://www.google.com/api/v1/users/{var}
42-
- top1.abc.example.com.net.cn/api/v1/users/{var}
43-
- www.google.com/api/v1/users/{var}
4426
version: '1'

test/e2e/expected/endpoint_hard_3k.yaml

-16
Original file line numberDiff line numberDiff line change
@@ -13,30 +13,14 @@
1313
# limitations under the License.
1414

1515
patterns:
16-
- /api-this-is-a-special-case/v99999999999999999/orders/delete/{var}
17-
- /api-this-is-a-special-case/v99999999999999999/orders/reorder/{var}
18-
- /api-this-is-a-special-case/v99999999999999999/orders/update/{var}
1916
- /api/v1/bills/{var}
2017
- /api/v1/companies/{var}
21-
- /api/v1/companies/{var}/employees/{var}/reviews/{var}
22-
- /api/v1/companies/{var}/tasks/{var}/assignees/{var}
2318
- /api/v1/projects/{var}
2419
- /api/v1/services/{var}
2520
- /api/v1/users/{var}/posts/{var}/comments
26-
- /api/v1/users/{var}/posts/{var}/comments/{var}
2721
- /api/v1/wallets/{var}
28-
- /api/v2/admin/users/{var}
2922
- /api/v2/courses/{var}/modules/{var}/lessons
3023
- /api/v2/customers/{var}
3124
- /api/v3/products/{var}/reviews/{var}/comments
32-
- /api/v3/providers/{var}
3325
- /api/v4/orders/{var}/items/{var}/tracking
34-
- /customer/{var}
35-
- /customer/{var}/order/{var}
36-
- /customer/{var}/profile/{var}/compare/{var}/profile/{var}
37-
- google.com/api/v1/users/{var}
38-
- http://www.google.com/api/v1/users/{var}
39-
- https://www.google.com/api/v1/users/{var}
40-
- top1.abc.example.com.net.cn/api/v1/users/{var}
41-
- www.google.com/api/v1/users/{var}
4226
version: '1'

test/e2e/expected/endpoint_trivial.yaml

-10
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,8 @@
1414

1515
patterns:
1616
- /api/v1/accounts/{var}
17-
- /api/v1/invoices/{var}
1817
- /api/v1/orders/{var}
1918
- /api/v1/posts/{var}
2019
- /api/v1/products/{var}
2120
- /api/v1/users/{var}
22-
- /api/v2/data/users/{var}
23-
- /api/v999/orders/{var}
24-
- /product/{var}
25-
- /user/{var}
26-
- /user/{var}/post/{var}
27-
- /user/{var}/profile/{var}/compare/{var}/profile/{var}
28-
- GET:/api/v1/users/{var}
29-
- http://www.google.com/api/v1/users/{var}
30-
- https://www.google.com/api/v1/users/{var}
3121
version: '1'

test/e2e/expected/endpoint_trivial_3k.yaml

-7
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,4 @@ patterns:
1919
- /api/v1/posts/{var}
2020
- /api/v1/products/{var}
2121
- /api/v1/users/{var}
22-
- /api/v2/data/users/{var}
23-
- /api/v999/orders/{var}
24-
- /product/{var}
25-
- /user/{var}
26-
- /user/{var}/post/{var}
27-
- /user/{var}/profile/{var}/compare/{var}/profile/{var}
28-
- GET:/api/v1/users/{var}
2922
version: '1'

0 commit comments

Comments
 (0)