Skip to content

Commit 83337e6

Browse files
authored
Stage Payments V1 EDA
1 parent e66fdd9 commit 83337e6

File tree

1 file changed

+340
-0
lines changed

1 file changed

+340
-0
lines changed

payments/payments_eda.ipynb

+340
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "d5abe861-679e-48b6-8b9c-8175a4e211e0",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import pandas as pd\n",
11+
"import plotly.express as px\n",
12+
"import seaborn as sns\n",
13+
"from calitp import query_sql"
14+
]
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": null,
19+
"id": "8597be54-d1b6-476b-a8d9-817d6b42bbf0",
20+
"metadata": {},
21+
"outputs": [],
22+
"source": [
23+
"df = query_sql(\"SELECT * FROM views.payments_rides LIMIT 1000000\", as_df=True)\n",
24+
"df"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"id": "3df1926d-8288-455e-a4e6-60668fa0ec68",
31+
"metadata": {},
32+
"outputs": [],
33+
"source": [
34+
"df.columns"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": null,
40+
"id": "4595bea8-caed-47a9-9d75-6fba1408a56f",
41+
"metadata": {},
42+
"outputs": [],
43+
"source": [
44+
"data = df[\"participant_id\"].value_counts(normalize=True)\n",
45+
"sns.barplot(x=data.index, y=data.values)"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": null,
51+
"id": "1e869e02-44c1-4cfc-ab5c-e2b94f447f8c",
52+
"metadata": {},
53+
"outputs": [],
54+
"source": [
55+
"df[\"micropayment_id\"].value_counts()"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"id": "f60366b6-dbc2-460a-a4d6-04b83fdeef32",
62+
"metadata": {},
63+
"outputs": [],
64+
"source": [
65+
"data = df[\"card_scheme\"].value_counts(normalize=True)\n",
66+
"sns.barplot(x=data.index, y=data.values)"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"id": "5da57538-e570-4332-b00b-ab232d7a0633",
73+
"metadata": {},
74+
"outputs": [],
75+
"source": [
76+
"data = df[\"issuer\"].value_counts(normalize=True).head()\n",
77+
"ax = sns.barplot(x=data.index, y=data.values)\n",
78+
"ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)\n",
79+
"ax"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"id": "d27daf9f-8415-429e-ac1a-76316c4be217",
86+
"metadata": {},
87+
"outputs": [],
88+
"source": [
89+
"#Add counts\n",
90+
"data = df['issuer_country'].value_counts().head()\n",
91+
"sns.barplot(x=data.index, y=data.values)"
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"execution_count": null,
97+
"id": "f56c075e-e782-4785-9e6a-d28ae4477791",
98+
"metadata": {},
99+
"outputs": [],
100+
"source": [
101+
"data"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": null,
107+
"id": "57277378-2309-4c1a-8709-8fbbfe9d32d1",
108+
"metadata": {},
109+
"outputs": [],
110+
"source": [
111+
"df['transaction_month'] = pd.to_datetime(df['transaction_date_time_utc']).dt.month\n",
112+
"top_10_non_US = df['issuer_country'].value_counts(normalize=True)[1:11]\n",
113+
"country_counts_df = df.groupby(['issuer_country', 'transaction_month']).count().reset_index()\n",
114+
"top_10_country_counts_df = country_counts_df[country_counts_df['issuer_country'].isin(top_10_non_US.index)]\n",
115+
"top_10_country_counts_df"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": null,
121+
"id": "b5c9b03b-f89c-4e8d-8a08-ca664946f3bd",
122+
"metadata": {},
123+
"outputs": [],
124+
"source": [
125+
"sns.lineplot(x='transaction_month', y='participant_id', hue='issuer_country', data=top_10_country_counts_df)"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": null,
131+
"id": "ef0601f0-5b78-4646-b117-c4b5d5a83649",
132+
"metadata": {},
133+
"outputs": [],
134+
"source": [
135+
"data = df[\"form_factor\"].value_counts(normalize=True)\n",
136+
"sns.barplot(x=data.index, y=data.values)"
137+
]
138+
},
139+
{
140+
"cell_type": "code",
141+
"execution_count": null,
142+
"id": "325c4b51-5df4-473c-bfd7-2f84845dbadb",
143+
"metadata": {},
144+
"outputs": [],
145+
"source": [
146+
"data = df[\"charge_amount\"]\n",
147+
"sns.displot(x=data.values)"
148+
]
149+
},
150+
{
151+
"cell_type": "code",
152+
"execution_count": null,
153+
"id": "3ef9d046-6ba5-406c-be18-373c159c827d",
154+
"metadata": {},
155+
"outputs": [],
156+
"source": [
157+
"df[\"charge_amount\"].describe()"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"execution_count": null,
163+
"id": "58978e83-7813-488a-b997-ac22b195ac55",
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"data = df[\"charge_type\"].value_counts(normalize=True)\n",
168+
"sns.barplot(x=data.index, y=data.values)"
169+
]
170+
},
171+
{
172+
"cell_type": "code",
173+
"execution_count": null,
174+
"id": "ebac22d8-99f8-4922-b0e6-5f1e70cdd885",
175+
"metadata": {},
176+
"outputs": [],
177+
"source": [
178+
"data = df[\"adjustment_type\"].value_counts(normalize=True)\n",
179+
"ax = sns.barplot(x=data.index, y=data.values)\n",
180+
"ax.set_xticklabels(ax.get_xticklabels(),rotation = 60)\n",
181+
"ax"
182+
]
183+
},
184+
{
185+
"cell_type": "code",
186+
"execution_count": null,
187+
"id": "8daa0f6a-d238-4c82-ad50-c423d2f07702",
188+
"metadata": {},
189+
"outputs": [],
190+
"source": [
191+
"df[\"adjustment_description\"].value_counts(normalize=True)"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": null,
197+
"id": "c1d7965a-299c-4327-a07b-b99b885a8468",
198+
"metadata": {},
199+
"outputs": [],
200+
"source": [
201+
"# Simulations for revenue/ridership if this was distributed differently?"
202+
]
203+
},
204+
{
205+
"cell_type": "code",
206+
"execution_count": null,
207+
"id": "a6038fde-9ebc-4705-9b5a-f3aca75eb5b9",
208+
"metadata": {
209+
"tags": []
210+
},
211+
"outputs": [],
212+
"source": [
213+
"sns.displot(pd.to_datetime(df[\"transaction_date_time_utc\"]).dt.hour)"
214+
]
215+
},
216+
{
217+
"cell_type": "code",
218+
"execution_count": null,
219+
"id": "dd41da3c-da64-4506-a241-2f3469d18a32",
220+
"metadata": {
221+
"tags": []
222+
},
223+
"outputs": [],
224+
"source": [
225+
"df[\"route_short_name\"].value_counts(normalize=True)"
226+
]
227+
},
228+
{
229+
"cell_type": "code",
230+
"execution_count": null,
231+
"id": "88701603-1dd2-4dd9-8a83-890483b53486",
232+
"metadata": {},
233+
"outputs": [],
234+
"source": [
235+
"df.corr()"
236+
]
237+
},
238+
{
239+
"cell_type": "code",
240+
"execution_count": null,
241+
"id": "2d8ccc87-255d-48c8-ac42-308907b7cde7",
242+
"metadata": {},
243+
"outputs": [],
244+
"source": [
245+
"corr_cols = df.columns.drop(['participant_id', 'micropayment_id', 'funding_source_vault_id', 'customer_id', 'principal_customer_id', \n",
246+
" 'bin', 'masked_pan', 'vehicle_id', 'adjustment_id', 'littlepay_transaction_id', 'off_littlepay_transaction_id', \n",
247+
" 'device_id', 'charge_amount', 'transaction_date_time_utc', 'transaction_date_time_pacific', \n",
248+
" 'off_transaction_date_time_utc', 'off_transaction_date_time_pacific', 'refund_amount', 'location_id',\n",
249+
" 'nominal_amount', 'adjustment_amount', 'latitude', 'longitude', 'off_latitude', 'off_longitude'])\n",
250+
"one_hot_df = pd.get_dummies(df, columns=corr_cols)\n",
251+
"one_hot_df"
252+
]
253+
},
254+
{
255+
"cell_type": "code",
256+
"execution_count": null,
257+
"id": "d20c1eb2-336a-42a3-a7a8-d959e42ea663",
258+
"metadata": {},
259+
"outputs": [],
260+
"source": [
261+
"#corr_df = one_hot_df.corr()"
262+
]
263+
},
264+
{
265+
"cell_type": "code",
266+
"execution_count": null,
267+
"id": "4f6a0f49-baa9-4156-9c78-caf5dfba04b6",
268+
"metadata": {},
269+
"outputs": [],
270+
"source": [
271+
"corr_df = corr_df.dropna(how='all')\n",
272+
"corr_df.nlargest(n=10, columns=corr_df.columns)"
273+
]
274+
},
275+
{
276+
"cell_type": "code",
277+
"execution_count": null,
278+
"id": "896df0ec-462d-4ad6-814d-2c487ea2f26e",
279+
"metadata": {},
280+
"outputs": [],
281+
"source": [
282+
"# correlate across other values"
283+
]
284+
},
285+
{
286+
"cell_type": "code",
287+
"execution_count": null,
288+
"id": "3c7f9aa1-a6c1-4ad4-bb4e-f792b2417755",
289+
"metadata": {},
290+
"outputs": [],
291+
"source": [
292+
"df[\"direction\"].value_counts(normalize=True)"
293+
]
294+
},
295+
{
296+
"cell_type": "code",
297+
"execution_count": null,
298+
"id": "addf55da-706f-40a1-aa05-7819f50e5179",
299+
"metadata": {},
300+
"outputs": [],
301+
"source": [
302+
"df[\"adjustment_type\"].value_counts()"
303+
]
304+
},
305+
{
306+
"cell_type": "code",
307+
"execution_count": null,
308+
"id": "691d4113-a3f9-4b8d-a802-68fcc4231b14",
309+
"metadata": {
310+
"tags": []
311+
},
312+
"outputs": [],
313+
"source": [
314+
"fig = px.scatter_geo(df, lat=\"latitude\", lon=\"longitude\", hover_name=\"route_short_name\")\n",
315+
"fig.show()"
316+
]
317+
}
318+
],
319+
"metadata": {
320+
"kernelspec": {
321+
"display_name": "Python 3 (ipykernel)",
322+
"language": "python",
323+
"name": "python3"
324+
},
325+
"language_info": {
326+
"codemirror_mode": {
327+
"name": "ipython",
328+
"version": 3
329+
},
330+
"file_extension": ".py",
331+
"mimetype": "text/x-python",
332+
"name": "python",
333+
"nbconvert_exporter": "python",
334+
"pygments_lexer": "ipython3",
335+
"version": "3.9.13"
336+
}
337+
},
338+
"nbformat": 4,
339+
"nbformat_minor": 5
340+
}

0 commit comments

Comments
 (0)