5
5
from urllib .parse import urlparse
6
6
import datetime as dt
7
7
8
- def get_flat_data_df (metrics , dimensions , remove_matches = None , ** other_params ):
8
+ def get_flat_data_df (metrics , dimensions , ** other_params ):
9
9
"""
10
10
Get a df from the Analytics API with a flat structure (no multiindex).
11
11
12
12
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
13
13
:param metrics: the metrics to get
14
14
:param dimensions: the dimensions to get
15
- :param remove_matches: a list of regex patterns or None elements to remove from each dimension.
16
- Each regex or None element should correspond with an element of dimensions and remove_matches must be the same length as dimensions.
17
- If the value is None, no patterns are removed, defaults to None.
18
-
19
15
:return: a DataFrame with the data from the Analytics API
20
16
"""
21
- if remove_matches is not None :
22
- assert len (remove_matches ) == len (dimensions )
23
-
24
17
df = get_data_df (
25
18
metrics ,
26
19
[dimension ["id" ] for dimension in dimensions ],
27
20
** other_params ,
28
21
)
29
- if remove_matches is not None :
30
- for i , match in enumerate ([dimension ["remove_matches" ] for dimension in dimensions ]):
31
- if match is not None :
32
- df = df .loc [~ df .index .get_level_values (i ).str .fullmatch (match )]
33
22
return df .reset_index ().rename (columns = get_rename_dict (dimensions )).copy ()
34
23
35
24
def get_rename_dict (dimensions ):
@@ -41,26 +30,27 @@ def get_rename_dict(dimensions):
41
30
def get_outbound_links_df (analytics_params ):
42
31
"""
43
32
Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links.
33
+ analytics_params cannot currently include a dimension_filter
44
34
45
35
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
46
36
:return: a DataFrame with the outbound links from the Analytics API
47
37
"""
48
38
pd .set_option ('future.no_silent_downcasting' , True )
39
+ assert "dimension_filter" not in analytics_params
49
40
# Get the builtin "Click" event
50
41
df_builtin_links = get_flat_data_df (
51
42
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
52
43
[DIMENSION_PAGE_PATH , DIMENSION_BUILTIN_URL , DIMENSION_EVENT_NAME ],
53
- remove_matches = [ None , r"\s*" , None ] ,
44
+ dimension_filter = f"eventName== { EVENT_BUILTIN_CLICK } " ,
54
45
** analytics_params ,
55
46
).groupby (
56
47
[DIMENSION_PAGE_PATH ["alias" ], DIMENSION_BUILTIN_URL ["alias" ]]
57
48
).sum ().reset_index ()
58
-
59
49
# Get the custom "outbound_link_click" event
60
50
df_custom_links = get_flat_data_df (
61
51
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
62
52
[DIMENSION_EVENT_NAME , DIMENSION_CUSTOM_URL , DIMENSION_PAGE_PATH ],
63
- remove_matches = [ DIMENSION_EVENT_NAME [ "remove_matches" ], r"\(not set\)" , None ] ,
53
+ dimension_filter = f"eventName== { EVENT_CUSTOM_CLICK } " ,
64
54
** analytics_params ,
65
55
).groupby (
66
56
[DIMENSION_PAGE_PATH ["alias" ], DIMENSION_CUSTOM_URL ["alias" ]]
@@ -153,11 +143,12 @@ def get_page_views_df(analytics_params):
153
143
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
154
144
:return: a DataFrame with the page views from the Analytics API
155
145
"""
146
+ assert "dimension_filter" not in analytics_params
156
147
df_response = get_flat_data_df (
157
148
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS , METRIC_PAGE_VIEW ],
158
149
[DIMENSION_PAGE_PATH , DIMENSION_EVENT_NAME ],
159
- dimension_filter = "eventName==page_view" ,
160
150
** analytics_params ,
151
+ dimension_filter = f"eventName=={ EVENT_PAGE_VIEW } " ,
161
152
).rename (
162
153
columns = {
163
154
DIMENSION_PAGE_PATH ["alias" ]: "Page Path" ,
0 commit comments