Add pandas codes

chAwater · chAwater · commit 096c414cbbb7 · 2023-09-09T23:05:51.000+08:00
diff --git a/1045.customers-who-bought-all-products.py b/1045.customers-who-bought-all-products.py
@@ -0,0 +1,22 @@
+# https://leetcode.cn/problems/customers-who-bought-all-products
+
+import pandas as pd
+
+
+def find_customers(customer: pd.DataFrame, product: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.09.06
+    Pass/Error/Bug: 1/1/0
+    执行用时：  264 ms, 在所有 Python3 提交中击败了 82.14% 的用户
+    内存消耗：59.22 Mb, 在所有 Python3 提交中击败了 25.00% 的用户
+    '''
+    n_product = product.shape[0]
+    return (
+        customer
+        .groupby(['customer_id', 'product_key']).nunique()
+        .reset_index()
+        .groupby(['customer_id'])['product_key'].count()
+        .reset_index()
+        .query('product_key==@n_product')
+        .drop('product_key', axis=1)
+    )
diff --git a/1050.actors-and-directors-who-cooperated-at-least-three-times.py b/1050.actors-and-directors-who-cooperated-at-least-three-times.py
@@ -0,0 +1,19 @@
+# https://leetcode.cn/problems/actors-and-directors-who-cooperated-at-least-three-times
+
+import pandas as pd
+
+
+def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.08.09
+    Pass/Error/Bug: 1/0/0
+    执行用时：  284 ms, 在所有 Python3 提交中击败了 46.23% 的用户
+    内存消耗：60.60 Mb, 在所有 Python3 提交中击败了 15.63% 的用户
+    '''
+    return (
+        actor_director
+        .groupby(['actor_id', 'director_id']).count()
+        .query('timestamp>=3')
+        .reset_index()
+        .drop('timestamp', axis=1)
+    )
diff --git a/1068.product-sales-analysis-i.py b/1068.product-sales-analysis-i.py
@@ -0,0 +1,13 @@
+# https://leetcode.cn/problems/product-sales-analysis-i
+
+import pandas as pd
+
+
+def sales_analysis(sales: pd.DataFrame, product: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.09.03
+    Pass/Error/Bug: 1/0/0
+    执行用时：  332 ms, 在所有 Python3 提交中击败了 62.71% 的用户
+    内存消耗：61.35 Mb, 在所有 Python3 提交中击败了  6.78% 的用户
+    '''
+    return pd.merge(sales, product, on='product_id').groupby(['product_name', 'year', 'price']).size().reset_index().drop(0, axis=1)
diff --git a/1084.sales-analysis-iii.py b/1084.sales-analysis-iii.py
@@ -0,0 +1,20 @@
+# https://leetcode.cn/problems/sales-analysis-iii
+
+import pandas as pd
+
+
+def sales_analysis(product: pd.DataFrame, sales: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.09.03
+    Pass/Error/Bug: 1/2/0
+    执行用时：  344 ms, 在所有 Python3 提交中击败了 67.57% 的用户
+    内存消耗：62.78 Mb, 在所有 Python3 提交中击败了  5.41% 的用户
+    '''
+    mdf = pd.merge(sales, product, on='product_id')
+    remove_df = mdf[(mdf['sale_date']>'2019-03-31') | (mdf['sale_date']<'2019-01-01')]
+    return (
+        mdf.set_index('product_id')
+        .drop(remove_df['product_id'])
+        .reset_index()[['product_id','product_name']]
+        .drop_duplicates()
+    )
diff --git a/1141.user-activity-for-the-past-30-days-i.py b/1141.user-activity-for-the-past-30-days-i.py
@@ -0,0 +1,22 @@
+# https://leetcode.cn/problems/user-activity-for-the-past-30-days-i
+
+import pandas as pd
+
+
+def user_activity(activity: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.09.09
+    Pass/Error/Bug: 1/1/0
+    执行用时：  408 ms, 在所有 Python3 提交中击败了 22.45% 的用户
+    内存消耗：60.27 Mb, 在所有 Python3 提交中击败了  6.12% 的用户
+    '''
+    tdf = (
+        activity
+        .groupby(['activity_date', 'user_id']).first()
+        .reset_index()
+        .groupby('activity_date')['user_id'].count()
+        .reset_index()
+        .rename(columns={'activity_date':'day', 'user_id':'active_users'})
+    )
+
+    return tdf[(tdf['day']>'2019-06-27') & (tdf['day']<='2019-07-27')]
diff --git a/1148.article-views-i.py b/1148.article-views-i.py
@@ -0,0 +1,18 @@
+# https://leetcode.cn/problems/article-views-i
+
+import pandas as pd
+
+
+def article_views_i(views: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.08.01
+    Pass/Error/Bug: 1/0/0
+    执行用时：  276 ms, 在所有 Python3 提交中击败了 75.42% 的用户
+    内存消耗：60.80 Mb, 在所有 Python3 提交中击败了 30.18% 的用户
+    '''
+    return (
+        views.query('viewer_id == author_id')[['author_id']]
+        .rename(columns={'author_id':'id'})
+        .drop_duplicates()
+        .sort_values('id')
+    )
diff --git a/1280.students-and-examinations.py b/1280.students-and-examinations.py
@@ -0,0 +1,25 @@
+# https://leetcode.cn/problems/students-and-examinations
+
+import pandas as pd
+
+
+def students_and_examinations(students: pd.DataFrame, subjects: pd.DataFrame, examinations: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.08.08
+    Pass/Error/Bug: 1/1/0
+    执行用时：  384 ms, 在所有 Python3 提交中击败了 51.10% 的用户
+    内存消耗：61.20 MB, 在所有 Python3 提交中击败了 19.38% 的用户
+    '''
+    header = pd.merge(students.sort_values('student_id'), subjects, how='cross')
+    data = pd.merge(
+        students.assign(attended_exams=0),
+        examinations,
+        on='student_id',
+        how='inner'
+    ).groupby(['student_id', 'student_name', 'subject_name']).count().reset_index()
+    return pd.merge(
+        header,
+        data,
+        on=['student_id', 'student_name', 'subject_name'],
+        how='left'
+    ).fillna(0).astype({'attended_exams':int}).sort_values(['student_id', 'subject_name'])
diff --git a/1378.replace-employee-id-with-the-unique-identifier.py b/1378.replace-employee-id-with-the-unique-identifier.py
@@ -0,0 +1,13 @@
+# https://leetcode.cn/problems/replace-employee-id-with-the-unique-identifier/
+
+import pandas as pd
+
+
+def replace_employee_id(employees: pd.DataFrame, employee_uni: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.08.09
+    Pass/Error/Bug: 1/1/0
+    执行用时：  380 ms, 在所有 Python3 提交中击败了 20.60% 的用户
+    内存消耗：60.80 MB, 在所有 Python3 提交中击败了 23.77% 的用户
+    '''
+    return pd.merge(employee_uni, employees, on='id', how='outer').drop('id', axis=1)
diff --git a/1484.group-sold-products-by-the-date.py b/1484.group-sold-products-by-the-date.py
@@ -0,0 +1,21 @@
+# https://leetcode.cn/problems/group-sold-products-by-the-date
+
+import pandas as pd
+
+
+def categorize_products(activities: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.08.08
+    Pass/Error/Bug: 1/1/0
+    执行用时：  328 ms, 在所有 Python3 提交中击败了 58/85% 的用户
+    内存消耗：61.20 MB, 在所有 Python3 提交中击败了 63.68% 的用户
+    '''
+    return (
+        activities
+        .groupby('sell_date').agg(
+            num_sold=('product', 'nunique'),
+            products=('product', lambda x:','.join(sorted(set(x))))
+        )
+        .reset_index()
+        .sort_values('sell_date', ascending=True)
+    )
diff --git a/1517.find-users-with-valid-e-mails.py b/1517.find-users-with-valid-e-mails.py
@@ -0,0 +1,13 @@
+# https://leetcode.cn/problems/find-users-with-valid-e-mails
+
+import pandas as pd
+
+
+def valid_emails(users: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.08.01
+    Pass/Error/Bug: 2/3/0
+    执行用时：  268 ms, 在所有 Python3 提交中击败了 80.83% 的用户
+    内存消耗：60.20 MB, 在所有 Python3 提交中击败了 39.35% 的用户
+    '''
+    return users[users['mail'].str.match(r'^[a-zA-Z]+[a-zA-Z0-9]*[\._-]*[a-zA-Z0-9]*@leetcode\.com')]
diff --git a/1527.patients-with-a-condition.py b/1527.patients-with-a-condition.py
@@ -0,0 +1,14 @@
+# https://leetcode.cn/problems/patients-with-a-condition
+
+import pandas as pd
+
+
+def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.08.01
+    Pass/Error/Bug: 1/0/0
+    执行用时：  332 ms, 在所有 Python3 提交中击败了  9.00% 的用户
+    内存消耗：59.70 MB, 在所有 Python3 提交中击败了  5.32% 的用户
+    '''
+    patients['npos'] = patients['conditions'].apply(lambda x: sum(c.startswith('DIAB1') for c in x.split(' ')))
+    return patients.query('npos>0').drop('npos', axis=1)
diff --git a/602.friend-requests-ii-who-has-the-most-friends.py b/602.friend-requests-ii-who-has-the-most-friends.py
@@ -0,0 +1,25 @@
+# https://leetcode.cn/problems/friend-requests-ii-who-has-the-most-friends
+
+import pandas as pd
+
+
+def most_friends(request_accepted: pd.DataFrame) -> pd.DataFrame:
+    '''
+    Date: 2023.09.04
+    Pass/Error/Bug: 1/0/0
+    执行用时：  256 ms, 在所有 Python3 提交中击败了 96/83% 的用户
+    内存消耗：58.94 Mb, 在所有 Python3 提交中击败了 65.08% 的用户
+    '''
+    return (
+        (
+            pd.concat(
+                [
+                    request_accepted,
+                    request_accepted.rename(columns={'requester_id':'accepter_id', 'accepter_id':'requester_id'})
+                ]
+            )
+            .groupby('requester_id').size()
+        ).reset_index()
+        .rename(columns={'requester_id':'id', 0:'num'})
+        .sort_values('num').tail(1)
+    )