Skip to content

Commit 3c7acc9

Browse files
authored
Update Notebook.py
1 parent 12b518e commit 3c7acc9

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

Diff for: Notebook.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def cleaning_data():
7575

7676
# Then we drop the reference column because it is not useful to us further analysis
7777
cleaned_profile = cleaned_profile.drop(['today_date'],axis=1)
78-
78+
cleaned_profile['age_by_decade'] = pd.cut(cleaned_profile['age'], bins=range(10,120,10),right=False, labels=['10s','20s', '30s', '40s', '50s','60s', '70s', '80s', '90s', '100s'])
79+
cleaned_profile['income_range'] = pd.cut(cleaned_profile['income'], bins=range(0,120001,10000),right=False, labels=['10k','20k', '30k', '40k', '50k','60k', '70k', '80k', '90k', '100k', '110k', '120k'])
7980

8081
# Data Cleaning of transcript.json
8182
cleaned_transcript = transcript
@@ -89,9 +90,7 @@ def cleaning_data():
8990
profile118 = profile[profile['age']==118]
9091
id118 = profile118['id']
9192

92-
for i in range(len(cleaned_transcript)):
93-
if cleaned_transcript['person'][i] in list(id118):
94-
cleaned_transcript = cleaned_transcript.drop(i)
93+
cleaned_transcript = cleaned_transcript[~cleaned_transcript['person'].isin(id118)]
9594

9695
cleaned_transcript['record'] = cleaned_transcript.value.apply(lambda x: list(x.keys())[0])
9796
cleaned_transcript['record_value'] = cleaned_transcript.value.apply(lambda x: list(x.values())[0])

0 commit comments

Comments
 (0)