forked from optimizely/python-sdk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbucketer.py
153 lines (120 loc) · 5.71 KB
/
bucketer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Copyright 2016-2017, 2019-2021 Optimizely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
try:
import mmh3
except ImportError:
from .lib import pymmh3 as mmh3
MAX_TRAFFIC_VALUE = 10000
UNSIGNED_MAX_32_BIT_VALUE = 0xFFFFFFFF
MAX_HASH_VALUE = math.pow(2, 32)
HASH_SEED = 1
BUCKETING_ID_TEMPLATE = '{bucketing_id}{parent_id}'
GROUP_POLICIES = ['random']
class Bucketer(object):
""" Optimizely bucketing algorithm that evenly distributes visitors. """
def __init__(self):
""" Bucketer init method to set bucketing seed and logger instance. """
self.bucket_seed = HASH_SEED
def _generate_unsigned_hash_code_32_bit(self, bucketing_id):
""" Helper method to retrieve hash code.
Args:
bucketing_id: ID for bucketing.
Returns:
Hash code which is a 32 bit unsigned integer.
"""
# Adjusting MurmurHash code to be unsigned
return mmh3.hash(bucketing_id, self.bucket_seed) & UNSIGNED_MAX_32_BIT_VALUE
def _generate_bucket_value(self, bucketing_id):
""" Helper function to generate bucket value in half-closed interval [0, MAX_TRAFFIC_VALUE).
Args:
bucketing_id: ID for bucketing.
Returns:
Bucket value corresponding to the provided bucketing ID.
"""
ratio = float(self._generate_unsigned_hash_code_32_bit(bucketing_id)) / MAX_HASH_VALUE
return math.floor(ratio * MAX_TRAFFIC_VALUE)
def find_bucket(self, project_config, bucketing_id, parent_id, traffic_allocations):
""" Determine entity based on bucket value and traffic allocations.
Args:
project_config: Instance of ProjectConfig.
bucketing_id: ID to be used for bucketing the user.
parent_id: ID representing group or experiment.
traffic_allocations: Traffic allocations representing traffic allotted to experiments or variations.
Returns:
Entity ID which may represent experiment or variation and
"""
bucketing_key = BUCKETING_ID_TEMPLATE.format(bucketing_id=bucketing_id, parent_id=parent_id)
bucketing_number = self._generate_bucket_value(bucketing_key)
message = 'Assigned bucket %s to user with bucketing ID "%s".' % (bucketing_number, bucketing_id)
project_config.logger.debug(
message
)
for traffic_allocation in traffic_allocations:
current_end_of_range = traffic_allocation.get('endOfRange')
if bucketing_number < current_end_of_range:
return traffic_allocation.get('entityId')
return None
def bucket(self, project_config, experiment, user_id, bucketing_id):
""" For a given experiment and bucketing ID determines variation to be shown to user.
Args:
project_config: Instance of ProjectConfig.
experiment: Object representing the experiment or rollout rule in which user is to be bucketed.
user_id: ID for user.
bucketing_id: ID to be used for bucketing the user.
Returns:
Variation in which user with ID user_id will be put in. None if no variation
and array of log messages representing decision making.
*/.
"""
decide_reasons = []
if not experiment:
return None, decide_reasons
# Determine if experiment is in a mutually exclusive group.
# This will not affect evaluation of rollout rules.
if experiment.groupPolicy in GROUP_POLICIES:
group = project_config.get_group(experiment.groupId)
if not group:
return None, decide_reasons
user_experiment_id = self.find_bucket(
project_config, bucketing_id, experiment.groupId, group.trafficAllocation,
)
if not user_experiment_id:
message = 'User "%s" is in no experiment.' % user_id
project_config.logger.info(message)
decide_reasons.append(message)
return None, decide_reasons
if user_experiment_id != experiment.id:
message = 'User "%s" is not in experiment "%s" of group %s.' \
% (user_id, experiment.key, experiment.groupId)
project_config.logger.info(
message
)
decide_reasons.append(message)
return None, decide_reasons
message = 'User "%s" is in experiment %s of group %s.' % (user_id, experiment.key, experiment.groupId)
project_config.logger.info(
message
)
decide_reasons.append(message)
# Bucket user if not in white-list and in group (if any)
variation_id = self.find_bucket(project_config, bucketing_id,
experiment.id, experiment.trafficAllocation)
if variation_id:
variation = project_config.get_variation_from_id(experiment.id, variation_id)
return variation, decide_reasons
else:
message = 'Bucketed into an empty traffic range. Returning nil.'
project_config.logger.info(message)
decide_reasons.append(message)
return None, decide_reasons