Scraping Vocabulary using Selenium and parsing to DataFrame Scraping Vocabulary using Selenium and parsing to DataFrame selenium selenium

Scraping Vocabulary using Selenium and parsing to DataFrame


You can iterate over each word to append to the column:

from selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECimport selenium.common.exceptionsimport osimport pandas as pdchrome_options = webdriver.ChromeOptions()chrome_options.add_argument("--window-size=1920x1080")# chrome_options.add_argument("--headless")chrome_driver = os.getcwd() + "\\chromedriver.exe"driver = webdriver.Chrome(options=chrome_options, executable_path=chrome_driver)# Define the dataframedf = pd.DataFrame(columns=['rating'])driver.get("https://sq.m.wiktionary.org/w/index.php?title=Kategoria:Shqip&pagefrom=agall%C3%ABk#mw-pages")for x in range(200):    rating_element = WebDriverWait(driver, 10).until(        EC.presence_of_element_located((By.CSS_SELECTOR, "#mw-pages > div > div > div > ul"))    )    rating = rating_element.text    for word in rating.split('\n'):        df2 = pd.DataFrame([word], columns=['rating'])        df = df.append(df2, ignore_index=True)    try:        element = WebDriverWait(driver, 10).until(            EC.presence_of_element_located((By.LINK_TEXT, "faqja pasardhëse"))        )        element.click()            except selenium.common.exceptions.TimeoutException:        breakprint(df)df.to_csv('word_list.csv', encoding='utf-8', index=False)

Outputs

      rating0    agallëk1       agar2      agave3       agde4     ageshë..       ...595    ankim596  ankimor597  ankohem598    ankoj599   ankojë[600 rows x 1 columns]

Edit

Added the option to write to a file.