Skip to content

Commit

Permalink
Merge pull request #2 from jasminhusadzic/main
Browse files Browse the repository at this point in the history
Update selenium steps
  • Loading branch information
sadovsd authored Apr 16, 2024
2 parents f1e44f6 + 4b8433d commit 9c87e7f
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 8 deletions.
1 change: 1 addition & 0 deletions .github/workflows/feature_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ jobs:
- name: Execute Python workflows from bash script
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY }}
HOPSWORKS_PROJECT_NAME: ${{ secrets.HOPSWORKS_PROJECT_NAME }}
run: jupyter nbconvert --to notebook --execute notebooks/04_feature_pipeline.ipynb
44 changes: 36 additions & 8 deletions src/load_transform_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def get_new_ethereum_ohlc():
chrome_options.add_argument("--headless") # Run Chrome in headless mode
chrome_options.add_argument("--no-sandbox") # Bypass OS security model
chrome_options.add_argument("--disable-dev-shm-usage") # Overcome limited resource problems
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_experimental_option("prefs", {"download.default_directory": str(NEW_OHLC)})

# Set the path to Chromedriver
Expand All @@ -36,10 +37,25 @@ def get_new_ethereum_ohlc():

try:
driver.get('https://www.coinlore.com/coin/ethereum/historical-data')
WebDriverWait(driver, 10).until(
# Wait for the download button to be present in the DOM
download_button = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//button[@onclick="tableToCSV()"]'))
)

# Scroll the download button into view using JavaScript
driver.execute_script("arguments[0].scrollIntoView();", download_button)

# Wait for the download button to be clickable
download_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//button[@onclick="tableToCSV()"]'))
).click()
time.sleep(5) # Wait for the download to complete
)

# Click the download button
download_button.click()

# Make sure to wait for download
time.sleep(15)


# Locate the downloaded file
list_of_files = glob.glob(os.path.join(NEW_OHLC, '*.csv')) # * means all if need specific format then *.csv
Expand Down Expand Up @@ -81,19 +97,31 @@ def get_new_ethereum_ohlc1():
chrome_options.add_experimental_option("prefs", prefs)
chrome_options.add_argument("--headless") # Essential for GitHub Actions
chrome_options.add_argument("--no-sandbox") # Bypass OS security model
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--disable-dev-shm-usage") # Overcome limited resource problems
driver = webdriver.Chrome(options=chrome_options)

try:
driver.get('https://www.coinlore.com/coin/ethereum/historical-data')

# Wait for the download button to be clickable and click it
WebDriverWait(driver, 10).until(
# Wait for the download button to be present in the DOM
download_button = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//button[@onclick="tableToCSV()"]'))
)

# Scroll the download button into view using JavaScript
driver.execute_script("arguments[0].scrollIntoView();", download_button)

# Wait for the download button to be clickable
download_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//button[@onclick="tableToCSV()"]'))
).click()
)

# Click the download button
download_button.click()

# Allow time for the download to complete
time.sleep(5)
# Make sure to wait for download
time.sleep(15)

# Locate the downloaded file
list_of_files = glob.glob(os.path.join(NEW_OHLC, '*.csv')) # * means all if need specific format then *.csv
Expand Down

0 comments on commit 9c87e7f

Please sign in to comment.