-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjob_search.py
More file actions
225 lines (191 loc) · 7.7 KB
/
job_search.py
File metadata and controls
225 lines (191 loc) · 7.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
from jobspy import scrape_jobs
import pandas as pd
from datetime import datetime
import os
import hashlib
import json
from pathlib import Path
import requests
def get_base_directory():
"""Get the base directory in user's home folder"""
home = str(Path.home())
base = os.path.join(home, "Job Search Results")
os.makedirs(base, exist_ok=True)
return base
def save_description(description, descriptions_dir):
"""Save description to a separate file and return its ID"""
if pd.isna(description) or description.strip() == '':
return None
desc_id = hashlib.md5(str(description).encode()).hexdigest()[:10]
filepath = os.path.join(descriptions_dir, f"{desc_id}.txt")
with open(filepath, 'w', encoding='utf-8') as f:
f.write(str(description))
return desc_id
def format_salary(row):
"""Format salary from min/max amounts into readable string"""
try:
if pd.isna(row.get('min_amount')):
return 'Not specified'
min_amt = str(row['min_amount']) if pd.notna(row.get('min_amount')) else ''
max_amt = str(row['max_amount']) if pd.notna(row.get('max_amount')) else ''
currency = str(row['currency']) if pd.notna(row.get('currency')) else 'CAD' # Default to CAD for Toronto
interval = str(row['interval']) if pd.notna(row.get('interval')) else 'yearly'
if min_amt and max_amt:
return f"{min_amt} - {max_amt} {currency} {interval}"
elif min_amt:
return f"{min_amt} {currency} {interval}"
else:
return 'Not specified'
except:
return 'Not specified'
def export_to_sheets(jobs_data, authtoken):
"""Export jobs data to Google Sheets"""
try:
headers = {
"Authorization": f"Bearer {authtoken}",
"Content-Type": "application/json"
}
# Convert all values to strings to ensure JSON compatibility
safe_jobs_data = []
for job in jobs_data:
safe_job = {}
for key, value in job.items():
if pd.isna(value):
safe_job[key] = ''
else:
safe_job[key] = str(value)
safe_jobs_data.append(safe_job)
# Format the data for sheets
data = {
"jobs": safe_jobs_data
}
response = requests.post(
"https://scripty.me/api/assistant/sheets/job-search",
headers=headers,
json=data
)
response.raise_for_status()
return response.json()
except Exception as e:
return {"success": False, "error": str(e)}
def search_jobs(job_title, location):
"""Main function to search for jobs"""
try:
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
base_dir = get_base_directory()
search_dir = os.path.join(base_dir, f"search_{timestamp}")
os.makedirs(search_dir, exist_ok=True)
city = location.split(',')[0].strip()
country = "Canada" if "ON" in location.upper() else "USA"
search_term = f'"{job_title}"'
google_search_term = f"{job_title} jobs in {city}"
print(f"Searching for {job_title} jobs in {location}...")
jobs = scrape_jobs(
site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor", "google"],
search_term=search_term,
location=location,
google_search_term=google_search_term,
country_indeed=country,
results_wanted=100,
hours_old=72,
description_format="markdown",
fetch_full_description=True,
return_as_df=True,
delay=[2, 5],
random_headers=True
)
# Convert jobs DataFrame to records and back to ensure numpy arrays are converted
jobs_records = jobs.to_dict('records')
filtered_jobs = pd.DataFrame(jobs_records)
# Convert date_posted to string format, handling both datetime and string inputs
if 'date_posted' in filtered_jobs.columns:
filtered_jobs['date_posted'] = pd.to_datetime(filtered_jobs['date_posted']).dt.strftime('%Y-%m-%d')
filtered_jobs['salary'] = filtered_jobs.apply(format_salary, axis=1)
filtered_jobs['status'] = 'Not Applied'
relevant_columns = [
'title', 'company', 'location', 'date_posted', 'job_type',
'is_remote', 'company_industry', 'job_url', 'salary', 'status'
]
existing_columns = [col for col in relevant_columns if col in filtered_jobs.columns]
filtered_jobs = filtered_jobs[existing_columns]
filtered_jobs = filtered_jobs.drop_duplicates(
subset=['title', 'company', 'job_url'],
keep='first'
)
if 'date_posted' in filtered_jobs.columns:
filtered_jobs = filtered_jobs.sort_values('date_posted', ascending=False)
csv_path = os.path.join(search_dir, "jobs.csv")
filtered_jobs.to_csv(csv_path, index=False)
metadata = {
'timestamp': timestamp,
'total_jobs': len(filtered_jobs),
'search_term': search_term,
'location': location,
'date_range': '3 days'
}
with open(os.path.join(search_dir, "metadata.json"), 'w') as f:
json.dump(metadata, f, indent=2)
results = filtered_jobs.to_dict('records')
# Export to sheets and prepare final message
sheets_result = export_to_sheets(results, authtoken)
sheets_status = "Successfully exported to Google Sheets" if sheets_result.get("success", False) else "Failed to export to Google Sheets"
print(f"\nFound {len(filtered_jobs)} jobs")
print(f"Results saved to: {csv_path}")
print(sheets_status)
result = {
"success": True,
"jobs_found": len(filtered_jobs),
"save_location": search_dir,
"results": results,
"sheets_export": sheets_result
}
return result
except Exception as e:
return {
"success": False,
"error": str(e)
}
# API definition
object = {
"name": "job_search",
"description": "Search for jobs across multiple platforms and save results to CSV and Google Sheets",
"parameters": {
"type": "object",
"properties": {
"job_title": {
"type": "string",
"description": "Job title to search for (e.g., 'software engineer', 'data scientist')"
},
"location": {
"type": "string",
"description": "Location to search in (e.g., 'Toronto, ON', 'San Francisco, CA')"
},
"export_to_sheets": {
"type": "boolean",
"description": "Whether to export results to Google Sheets (requires Google authentication)",
"default": True
}
},
"required": ["job_title", "location"]
}
}
async def func(args):
"""Handler function for the API"""
try:
if not args.get("job_title"):
return json.dumps({
"success": False,
"error": "Job title is required"
})
if not args.get("location"):
return json.dumps({
"success": False,
"error": "Location is required"
})
result = search_jobs(args["job_title"], args["location"])
return json.dumps(result)
except Exception as e:
return json.dumps({
"success": False,
"error": str(e)
})