From b93ebddf97b0fe93e09547f058c9ac8cdb6288c5 Mon Sep 17 00:00:00 2001 From: Rupesh-Singh-Karki Date: Sun, 13 Oct 2024 01:14:32 +0530 Subject: [PATCH 1/2] Added a new feature automated_resume_analyzer --- automated_resume_analyzer/README.md | 25 +++++ .../automated_resume_analyzer.py | 98 +++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 automated_resume_analyzer/README.md create mode 100644 automated_resume_analyzer/automated_resume_analyzer.py diff --git a/automated_resume_analyzer/README.md b/automated_resume_analyzer/README.md new file mode 100644 index 000000000..b49ab709f --- /dev/null +++ b/automated_resume_analyzer/README.md @@ -0,0 +1,25 @@ +# Resume Analyzer + +This Python script analyzes resumes (in PDF format) for common issues like structure, keyword optimization, and grammar problems. The tool is designed to help users enhance their resumes by providing insights into missing sections, keyword usage, and grammar issues. + +## Features + +- **Structure Check**: Ensures that key sections such as Education, Experience, Skills, Certifications, and Achievements are present. +- **Keyword Optimization**: Analyzes the resume text for relevant keywords (customizable) to ensure alignment with job descriptions. +- **Grammar Check**: Identifies potential grammar issues such as sentence fragments and missing punctuation. + +## Installation + +### Prerequisites + +Before running the script, you need to have the following installed: + +- Python 3.x +- The required Python libraries: + - `PyPDF2` (for PDF text extraction) + - `nltk` (for text analysis and grammar checks) + +You can install the necessary libraries using pip: + +```bash +pip install PyPDF2 nltk diff --git a/automated_resume_analyzer/automated_resume_analyzer.py b/automated_resume_analyzer/automated_resume_analyzer.py new file mode 100644 index 000000000..1001b8f18 --- /dev/null +++ b/automated_resume_analyzer/automated_resume_analyzer.py @@ -0,0 +1,98 @@ +import re +import PyPDF2 +import nltk +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize, sent_tokenize + +nltk.download('punkt') +nltk.download('stopwords') + + +# Function to extract text from a PDF file +def extract_text_from_pdf(pdf_path): + text = "" + with open(pdf_path, 'rb') as file: + reader = PyPDF2.PdfReader(file) + for page_num in range(len(reader.pages)): + page = reader.pages[page_num] + text += page.extract_text() + return text + + +# Function to analyze the structure of the resume +def check_structure(text): + structure_issues = [] + sections = [ + 'Education', + 'Experience', + 'Skills', + 'Certifications', + 'Achievements' + ] + for section in sections: + if section.lower() not in text.lower(): + structure_issues.append(f"Missing section: {section}") + return structure_issues + + +# Function to check keyword optimization in the resume +def keyword_optimization(text, keywords): + text_tokens = word_tokenize(text.lower()) + keywords_found = [word for word in text_tokens if word in keywords] + return keywords_found + + +# Function to check for grammar issues +def grammar_check(text): + grammar_issues = [] + sentences = sent_tokenize(text) + stop_words = set(stopwords.words('english')) + + for sentence in sentences: + words = word_tokenize(sentence) + filtered_sentence = [w for w in words if not w.lower() in stop_words] + # Check basic length and punctuation rules + if len(filtered_sentence) < 3: + grammar_issues.append(f"Possible fragment: {sentence}") + if not re.match(r'.*[.!?]$', sentence.strip()): + grammar_issues.append(f"Missing punctuation: {sentence}") + + return grammar_issues + + +# Main function to run the resume analyzer +def analyze_resume(pdf_path, keywords): + text = extract_text_from_pdf(pdf_path) + + print("Analyzing structure...") + structure_issues = check_structure(text) + if structure_issues: + print("Structure Issues Found:") + for issue in structure_issues: + print(f"- {issue}") + else: + print("Structure looks good.") + + print("\nAnalyzing keyword optimization...") + found_keywords = keyword_optimization(text, keywords) + print(f"Keywords found: {', '.join(found_keywords)}") + + print("\nAnalyzing grammar...") + grammar_issues = grammar_check(text) + if grammar_issues: + print("Grammar Issues Found:") + for issue in grammar_issues: + print(f"- {issue}") + else: + print("No major grammar issues found.") + + print("\nAnalysis complete.") + + +if __name__ == "__main__": + # Keywords to check for in the resume (can be customized) + resume_keywords = ['python', 'machine learning', 'data analysis', 'sql'] + + # Example usage + resume_path = 'your_resume.pdf' # Replace with the actual file path + analyze_resume(resume_path, resume_keywords) From f23fa56178667885f56585472de85e7a72269751 Mon Sep 17 00:00:00 2001 From: Rupesh-Singh-Karki Date: Sun, 13 Oct 2024 12:41:04 +0530 Subject: [PATCH 2/2] Added requirements.txt to automated_resume_analyzer --- automated_resume_analyzer/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 automated_resume_analyzer/requirements.txt diff --git a/automated_resume_analyzer/requirements.txt b/automated_resume_analyzer/requirements.txt new file mode 100644 index 000000000..180df8332 --- /dev/null +++ b/automated_resume_analyzer/requirements.txt @@ -0,0 +1,2 @@ +PyPDF2==3.0.1 +nltk==3.8.1