Skip to content

Commit 096c414

Browse files
committed
Add pandas codes
1 parent 9316656 commit 096c414

12 files changed

+225
-0
lines changed
+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# https://leetcode.cn/problems/customers-who-bought-all-products
2+
3+
import pandas as pd
4+
5+
6+
def find_customers(customer: pd.DataFrame, product: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.09.06
9+
Pass/Error/Bug: 1/1/0
10+
执行用时: 264 ms, 在所有 Python3 提交中击败了 82.14% 的用户
11+
内存消耗:59.22 Mb, 在所有 Python3 提交中击败了 25.00% 的用户
12+
'''
13+
n_product = product.shape[0]
14+
return (
15+
customer
16+
.groupby(['customer_id', 'product_key']).nunique()
17+
.reset_index()
18+
.groupby(['customer_id'])['product_key'].count()
19+
.reset_index()
20+
.query('product_key==@n_product')
21+
.drop('product_key', axis=1)
22+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# https://leetcode.cn/problems/actors-and-directors-who-cooperated-at-least-three-times
2+
3+
import pandas as pd
4+
5+
6+
def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.08.09
9+
Pass/Error/Bug: 1/0/0
10+
执行用时: 284 ms, 在所有 Python3 提交中击败了 46.23% 的用户
11+
内存消耗:60.60 Mb, 在所有 Python3 提交中击败了 15.63% 的用户
12+
'''
13+
return (
14+
actor_director
15+
.groupby(['actor_id', 'director_id']).count()
16+
.query('timestamp>=3')
17+
.reset_index()
18+
.drop('timestamp', axis=1)
19+
)

1068.product-sales-analysis-i.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# https://leetcode.cn/problems/product-sales-analysis-i
2+
3+
import pandas as pd
4+
5+
6+
def sales_analysis(sales: pd.DataFrame, product: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.09.03
9+
Pass/Error/Bug: 1/0/0
10+
执行用时: 332 ms, 在所有 Python3 提交中击败了 62.71% 的用户
11+
内存消耗:61.35 Mb, 在所有 Python3 提交中击败了 6.78% 的用户
12+
'''
13+
return pd.merge(sales, product, on='product_id').groupby(['product_name', 'year', 'price']).size().reset_index().drop(0, axis=1)

1084.sales-analysis-iii.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# https://leetcode.cn/problems/sales-analysis-iii
2+
3+
import pandas as pd
4+
5+
6+
def sales_analysis(product: pd.DataFrame, sales: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.09.03
9+
Pass/Error/Bug: 1/2/0
10+
执行用时: 344 ms, 在所有 Python3 提交中击败了 67.57% 的用户
11+
内存消耗:62.78 Mb, 在所有 Python3 提交中击败了 5.41% 的用户
12+
'''
13+
mdf = pd.merge(sales, product, on='product_id')
14+
remove_df = mdf[(mdf['sale_date']>'2019-03-31') | (mdf['sale_date']<'2019-01-01')]
15+
return (
16+
mdf.set_index('product_id')
17+
.drop(remove_df['product_id'])
18+
.reset_index()[['product_id','product_name']]
19+
.drop_duplicates()
20+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# https://leetcode.cn/problems/user-activity-for-the-past-30-days-i
2+
3+
import pandas as pd
4+
5+
6+
def user_activity(activity: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.09.09
9+
Pass/Error/Bug: 1/1/0
10+
执行用时: 408 ms, 在所有 Python3 提交中击败了 22.45% 的用户
11+
内存消耗:60.27 Mb, 在所有 Python3 提交中击败了 6.12% 的用户
12+
'''
13+
tdf = (
14+
activity
15+
.groupby(['activity_date', 'user_id']).first()
16+
.reset_index()
17+
.groupby('activity_date')['user_id'].count()
18+
.reset_index()
19+
.rename(columns={'activity_date':'day', 'user_id':'active_users'})
20+
)
21+
22+
return tdf[(tdf['day']>'2019-06-27') & (tdf['day']<='2019-07-27')]

1148.article-views-i.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# https://leetcode.cn/problems/article-views-i
2+
3+
import pandas as pd
4+
5+
6+
def article_views_i(views: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.08.01
9+
Pass/Error/Bug: 1/0/0
10+
执行用时: 276 ms, 在所有 Python3 提交中击败了 75.42% 的用户
11+
内存消耗:60.80 Mb, 在所有 Python3 提交中击败了 30.18% 的用户
12+
'''
13+
return (
14+
views.query('viewer_id == author_id')[['author_id']]
15+
.rename(columns={'author_id':'id'})
16+
.drop_duplicates()
17+
.sort_values('id')
18+
)

1280.students-and-examinations.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# https://leetcode.cn/problems/students-and-examinations
2+
3+
import pandas as pd
4+
5+
6+
def students_and_examinations(students: pd.DataFrame, subjects: pd.DataFrame, examinations: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.08.08
9+
Pass/Error/Bug: 1/1/0
10+
执行用时: 384 ms, 在所有 Python3 提交中击败了 51.10% 的用户
11+
内存消耗:61.20 MB, 在所有 Python3 提交中击败了 19.38% 的用户
12+
'''
13+
header = pd.merge(students.sort_values('student_id'), subjects, how='cross')
14+
data = pd.merge(
15+
students.assign(attended_exams=0),
16+
examinations,
17+
on='student_id',
18+
how='inner'
19+
).groupby(['student_id', 'student_name', 'subject_name']).count().reset_index()
20+
return pd.merge(
21+
header,
22+
data,
23+
on=['student_id', 'student_name', 'subject_name'],
24+
how='left'
25+
).fillna(0).astype({'attended_exams':int}).sort_values(['student_id', 'subject_name'])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# https://leetcode.cn/problems/replace-employee-id-with-the-unique-identifier/
2+
3+
import pandas as pd
4+
5+
6+
def replace_employee_id(employees: pd.DataFrame, employee_uni: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.08.09
9+
Pass/Error/Bug: 1/1/0
10+
执行用时: 380 ms, 在所有 Python3 提交中击败了 20.60% 的用户
11+
内存消耗:60.80 MB, 在所有 Python3 提交中击败了 23.77% 的用户
12+
'''
13+
return pd.merge(employee_uni, employees, on='id', how='outer').drop('id', axis=1)
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# https://leetcode.cn/problems/group-sold-products-by-the-date
2+
3+
import pandas as pd
4+
5+
6+
def categorize_products(activities: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.08.08
9+
Pass/Error/Bug: 1/1/0
10+
执行用时: 328 ms, 在所有 Python3 提交中击败了 58/85% 的用户
11+
内存消耗:61.20 MB, 在所有 Python3 提交中击败了 63.68% 的用户
12+
'''
13+
return (
14+
activities
15+
.groupby('sell_date').agg(
16+
num_sold=('product', 'nunique'),
17+
products=('product', lambda x:','.join(sorted(set(x))))
18+
)
19+
.reset_index()
20+
.sort_values('sell_date', ascending=True)
21+
)

1517.find-users-with-valid-e-mails.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# https://leetcode.cn/problems/find-users-with-valid-e-mails
2+
3+
import pandas as pd
4+
5+
6+
def valid_emails(users: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.08.01
9+
Pass/Error/Bug: 2/3/0
10+
执行用时: 268 ms, 在所有 Python3 提交中击败了 80.83% 的用户
11+
内存消耗:60.20 MB, 在所有 Python3 提交中击败了 39.35% 的用户
12+
'''
13+
return users[users['mail'].str.match(r'^[a-zA-Z]+[a-zA-Z0-9]*[\._-]*[a-zA-Z0-9]*@leetcode\.com')]

1527.patients-with-a-condition.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# https://leetcode.cn/problems/patients-with-a-condition
2+
3+
import pandas as pd
4+
5+
6+
def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.08.01
9+
Pass/Error/Bug: 1/0/0
10+
执行用时: 332 ms, 在所有 Python3 提交中击败了 9.00% 的用户
11+
内存消耗:59.70 MB, 在所有 Python3 提交中击败了 5.32% 的用户
12+
'''
13+
patients['npos'] = patients['conditions'].apply(lambda x: sum(c.startswith('DIAB1') for c in x.split(' ')))
14+
return patients.query('npos>0').drop('npos', axis=1)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# https://leetcode.cn/problems/friend-requests-ii-who-has-the-most-friends
2+
3+
import pandas as pd
4+
5+
6+
def most_friends(request_accepted: pd.DataFrame) -> pd.DataFrame:
7+
'''
8+
Date: 2023.09.04
9+
Pass/Error/Bug: 1/0/0
10+
执行用时: 256 ms, 在所有 Python3 提交中击败了 96/83% 的用户
11+
内存消耗:58.94 Mb, 在所有 Python3 提交中击败了 65.08% 的用户
12+
'''
13+
return (
14+
(
15+
pd.concat(
16+
[
17+
request_accepted,
18+
request_accepted.rename(columns={'requester_id':'accepter_id', 'accepter_id':'requester_id'})
19+
]
20+
)
21+
.groupby('requester_id').size()
22+
).reset_index()
23+
.rename(columns={'requester_id':'id', 0:'num'})
24+
.sort_values('num').tail(1)
25+
)

0 commit comments

Comments
 (0)