What do I change the error 'WebDriver' object has no attribute 'find_element_by_css_selector' in the code below so that this jobscraper works again?
What code do I add / overwrite so that the above error is gone ?
-- main.py --
What code do I add / overwrite so that the above error is gone ?
-- main.py --
from selenium.webdriver.support.ui import WebDriverWait
import json
import os
import sys
import threading
import pandas as pd
from IndeedScraper import IndeedScraper
from LinkedInScraper import LinkedInScraper
from ScraperUtil import ScraperUtil
def run_search(json_file_name):
"""Collects job listings that match the parameters stored in the provided json file."""
# Attempt to load the json file. If it isn't successful, close the program.
try:
search_keywords, location, ignore_keywords, experience = load_json("./Search Configs/"+json_file_name)
except:
return
# Store just the file name, not the file extension. This is useful for creating a new Excel file.
file_name = json_file_name.split('.')[0]
all_dataFrames = [scrape_indeed(search_keywords, location, ignore_keywords, experience),
scrape_linkedin(search_keywords, location, ignore_keywords, experience)]
# Stores the search results within an Excel file.
store_in_excel_file(file_name, all_dataFrames)
def load_json(json_file_name):
# Load search variables from config file.
try:
with open(json_file_name, "r") as jsonfile:
config = json.load(jsonfile)
# Save all search parameters as variables.
search_keywords = config["search_keywords"]
location = config["location"]
ignore_keywords = config['ignore_keywords']
experience = str(config['experience'].lower())
# Warn the user if they haven't provided a valid experience parameter.
if experience not in ["junior", "mid", "senior"]:
print(
"Warning: Experience value in", json_file_name,
" is invalid. please choose either 'Junior', 'Mid', "
"or 'Senior'. Jobs of all experience levels will be included in this search.")
# Print a summary of the search parameters.
print("Read config successfully.")
print("search_keywords=", search_keywords)
print("location=", location)
print("ignore_keywords=", ignore_keywords)
print("experience=", experience)
return search_keywords, location, ignore_keywords, experience
except Exception as e:
raise ValueError("Error, could not load ", json_file_name, str(e))
def scrape_indeed(search_keywords, location, ignore_keywords, experience):
"""Instantiates and calls scrape() method on a LinkedInScraper object.
returns the dataFrame stored in the object once the search is complete."""
indeed = IndeedScraper()
try:
indeed.scrape(search_keywords, location, ignore_keywords, experience)
print(indeed.data.shape[0], "jobs loaded from Indeed.")
return indeed.data
except Exception as e:
print("Error loading jobs from Indeed: " + str(e))
return ScraperUtil.construct_dataframe([]) # Return an empty dataFrame.
def scrape_linkedin(search_keywords, location, ignore_keywords, experience):
"""Instantiates and calls scrape() method on an IndeedScraper object.
returns the dataFrame stored in the object once the search is complete."""
linkedin = LinkedInScraper()
try:
linkedin.scrape(search_keywords, location, ignore_keywords, experience)
print(linkedin.data.shape[0], "jobs loaded from LinkedIn.")
return linkedin.data
except Exception as e:
print("Error loading jobs from LinkedIn: " + str(e))
return ScraperUtil.construct_dataframe([]) # Return an empty dataFrame.
def store_in_excel_file(file_name, all_dataFrames):
"""Stores all job listings in an Excel file. If the file exists, new listings are added to the existing file.
Otherwise, a new Excel file is created."""
master_dataFrame = ScraperUtil.construct_dataframe([])
try:
master_dataFrame = pd.read_excel(file_name + '.xlsx')
except:
print(file_name + ".xlsx doesn't exist yet. Creating new file.")
all_dataFrames.append(master_dataFrame)
new_dataFrame = pd.concat(all_dataFrames)
length_before = new_dataFrame.shape[0]
new_dataFrame.drop_duplicates(keep='last', subset=['Title', 'Company', 'Source', 'Date Posted'], inplace=True)
length_after = new_dataFrame.shape[0]
total_duplicates = length_before - length_after
print("Total duplicates dropped:", total_duplicates)
new_dataFrame.to_excel(file_name + '.xlsx', index=False)
if __name__ == "__main__":
all_threads = []
for entry in os.scandir(path="./Search Configs"):
if entry.name.split('.')[1] == 'json':
all_threads.append(threading.Thread(target=run_search, args=(entry.name,)))
if(len(all_threads) == 0):
print("No json files found in 'Search Configs' directory. No search will be made.")
else:
for thread in all_threads:
thread.start()
for thread in all_threads:
thread.join()