|
5 | 5 | import seaborn as sns
|
6 | 6 |
|
7 | 7 | def cleaning_data():
|
8 |
| -# Importing the datasets |
| 8 | + # Importing the datasets |
9 | 9 | portfolio = pd.read_json("portfolio.json",lines=True)
|
10 | 10 | profile = pd.read_json("profile.json",lines=True)
|
11 | 11 | transcript = pd.read_json("transcript.json",lines=True)
|
@@ -33,6 +33,7 @@ def cleaning_data():
|
33 | 33 | # renaming the id column to offer_id
|
34 | 34 | cleaned_portfolio = cleaned_portfolio.rename(columns={'id':'offer_id'})
|
35 | 35 |
|
| 36 | + |
36 | 37 | # Data Cleaning of profile dataset
|
37 | 38 |
|
38 | 39 | # To check the number of NULL values in each column
|
@@ -106,5 +107,18 @@ def cleaning_data():
|
106 | 107 | # cleaning offers
|
107 | 108 | offers = offers.drop(['transaction','record'],axis=1)
|
108 | 109 | offers = offers.rename(columns={'record_value':'offer_id'})
|
| 110 | + |
| 111 | + offers['offer_type'] = offers['offer_id'].copy() |
| 112 | + |
| 113 | + offers['offer_type'].replace(['ae264e3637204a6fb9bb56bc8210ddfd','4d5c57ea9a6940dd891ad53e9dbe8da0',\ |
| 114 | + '9b98b8c7a33c4b65b9aebfe6a799e6d9','f19421c1d4aa40978ebb69ca19b0e20d'],\ |
| 115 | + 'bogo', inplace=True) |
| 116 | + |
| 117 | + offers['offer_type'].replace(['0b1e1539f2cc45b7b9fa7c272da2e1d7','2298d6c36e964ae4a3e7e9706d1fb8c2',\ |
| 118 | + 'fafdcd668e3743c1bb461111dcafc2a4','2906b810c7d4411798c6938adc9daaa5'],\ |
| 119 | + 'discount', inplace=True) |
| 120 | + |
| 121 | + offers['offer_type'].replace(['3f207df678b143eea3cee63160fa8bed','5a8bc65990b245e5a138643cd4eb9837'],\ |
| 122 | + 'informational', inplace=True) |
109 | 123 |
|
110 | 124 | return cleaned_portfolio, cleaned_profile, offers, transactions
|
0 commit comments