diff --git a/modules/scrap.py b/modules/scrap.py index a6c3860..ce0c6a1 100644 --- a/modules/scrap.py +++ b/modules/scrap.py @@ -7,59 +7,68 @@ def __init__(self, username): self.username = username def fetchResponse(self): - BASE_URL = 'https://auth.geeksforgeeks.org/user/{}/practice/'.format(self.username) - - def extract_text_from_elements(elements, element_keys): - result = {} - index = 0 - for element in elements: - try: - inner_text = element.text - if inner_text == '_ _': - result[element_keys[index]] = "" - else: - result[element_keys[index]] = inner_text - except: - result[element_keys[index]] = "" - index += 1 - return result + BASE_URL = 'https://www.geeksforgeeks.org/user/{}/'.format(self.username) def extract_details(soup): basic_details_by_index = ["institution", "languagesUsed", "campusAmbassador"] coding_scores_by_index = ["codingScore", "totalProblemsSolved", "monthlyCodingScore", "articlesPublished"] - basic_details = soup.find_all("div", class_ = "basic_details_data") - coding_scores = soup.find_all("span", class_ = "score_card_value") + + institution = soup.find("div", class_ = "educationDetails_head_left--text__tgi9I") + lang_used = soup.find("div", class_ = "educationDetails_head_right--text__lLOHI") + campusAmbas = soup.find("div", class_ = "basicUserDetails_head_CA--text__IoHEU") + + score_card = soup.find_all("div", class_ = "scoreCard_head_left--score__oSi_x") + response = {} - response["basic_details"] = extract_text_from_elements(basic_details, basic_details_by_index) - response["coding_scores"] = extract_text_from_elements(coding_scores, coding_scores_by_index) + response["basic_details"] = { + basic_details_by_index[0]: institution.text if institution else '', + basic_details_by_index[1]: lang_used.text if lang_used else '', + basic_details_by_index[2]: campusAmbas.text if campusAmbas else '' + } + + response["coding_scores"] = { + coding_scores_by_index[0]: score_card[0].text if score_card[0] else '', + coding_scores_by_index[1]: score_card[1].text if score_card[1] else '', + coding_scores_by_index[2]: score_card[2].text if score_card[2] and score_card[2].text != "__" else '' + } + return response - def extract_questions_by_difficulty(soup, difficulty): - try: - response = {} - questions = [] - question_list_by_difficulty_tag = soup.find("div", id = difficulty.replace("#", "")).find_all("a") - response["count"] = len(question_list_by_difficulty_tag) - - for question_tag in question_list_by_difficulty_tag: - question = {} - question["question"] = question_tag.text - question["questionUrl"] = question_tag["href"] - questions.append(question) - - response["questions"] = questions - return response - except: - return { "count": 0, "questions": [] } def extract_questions_solved_count(soup): - difficulties = ["#school", "#basic", "#easy", "#medium", "#hard"] + + difficulties = ["school", "basic", "easy", "medium", "hard"] result = {} + + # Structure data for difficulty in difficulties: - result[difficulty] = extract_questions_by_difficulty(soup, difficulty) - + result[difficulty] = { "count": 0, "questions": []} + + question_header = soup.find_all( "div", class_ = "problemNavbar_head_nav--text__UaGCx" ) + + for el in question_header: + match = re.search(r'([A-Za-z]+)\s*\(\s*(\d+)\s*\)', el.text) + if match: + cat_name = match.group(1).lower() + cat_count = int(match.group(2)) + result[cat_name]["count"] = cat_count + + + response = requests.post("https://practiceapi.geeksforgeeks.org/api/v1/user/problems/submissions/", json={"handle":self.username,"requestType":"","year":"","month":""}) + submission_data = response.json() + + for level in submission_data['result']: + for ques in submission_data['result'][level]: + url = "https://www.geeksforgeeks.org/problems/{}/0" .format(submission_data['result'][level][ques]['slug']) + pname = submission_data['result'][level][ques]['pname'] + + result[level.lower()]['questions'].append({"question": pname, "questionUrl": url}) + return result + + + profilePage = requests.get(BASE_URL) if profilePage.status_code == 200: @@ -70,17 +79,18 @@ def extract_questions_solved_count(soup): generalInfo["userName"] = self.username - profile_pic = soup.find("img", class_ = "profile_pic") - institute_rank = soup.find("span", class_ = "rankNum") - streak_count = soup.find("div", class_ = "streakCnt") + profile_pic = soup.findAll("img", alt = self.username)[-1] + institute_rank = soup.find("span", class_ = "educationDetails_head_left_userRankContainer--text__wt81s") + streak_count = soup.find("div", class_ = "circularProgressBar_head_mid_streakCnt__MFOF1 tooltipped") + try: - generalInfo["profilePicture"] = profile_pic["src"] + generalInfo["profilePicture"] = "https://www.geeksforgeeks.org/" + profile_pic["src"] except: generalInfo["profilePicture"] = "" try: - generalInfo["instituteRank"] = institute_rank.text + generalInfo["instituteRank"] = institute_rank.text.split(" ")[0] except: generalInfo["instituteRank"] = "" @@ -101,7 +111,7 @@ def extract_questions_solved_count(soup): generalInfo[_key] = _value for key, value in question_count_details.items(): - solvedStats[key.replace("#", "")] = value + solvedStats[key] = value response["info"] = generalInfo response["solvedStats"] = solvedStats