This paste expires on 2023-04-04 03:23:56.584971. Repaste, or download this paste. . Pasted through web.

import os
import time
import shutil
import logging
import hashlib
import urllib.request
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
try:
    # Set up logging
    log_file = os.path.join(os.getcwd(), 'download.log')
    logging.basicConfig(filename=log_file, level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s')
    # Define the URL to scrape
    url = "https://9gag.com/tag/funny/fresh"
    # Set up the Chrome driver
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--ignore-ssl-errors')
    #options.add_argument('--headless')
    driver = webdriver.Chrome(options=options)
    # Navigate to the URL
    driver.get(url)
    # Wait for 5 seconds for the page to fully load
    time.sleep(5)
    # Scroll down several pages to load more images
    num_of_scrolls = 3
    for i in range(num_of_scrolls):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
    # Wait for 5 seconds for the possible dls to fully load
    time.sleep(5)
    # Find all images and videos with height greater than 320px and not in the .webm format
    images = driver.find_elements(By.XPATH, "//img[not(contains(@src, '.webm')) and contains(@style, 'height:') and substring-before(substring-after(@style, 'height:'), 'px') > 320]")
    videos = driver.find_elements(By.CSS_SELECTOR, "video[not(contains(@src, '.webm')) and @height > 320]")
    # Combine the images and videos into a single list
    posts = images + videos
    # Download the posts that meet the specified criteria
    for post in posts:
        # Check if we have already downloaded this post
        post_url = post.get_attribute("src") or post.get_attribute("srcObject").get("url")
        print(f"Downloading {post_url}")
        if os.path.isfile(post_url):
            logging.warning(f"Post already downloaded: {post_url}")
            continue
        # Download the post
        extension = post_url.split('.')[-1]
        if extension == "webm":
            continue # Skip video files in webm format
        filename = f"{datetime.now().strftime('%y%m%d%H%M%S')}_{post_url.split('/')[-1].split('.')[0]}.{extension}"
        download_dir = "z:\\gag"
        filepath = os.path.join(download_dir, filename)
        urllib.request.urlretrieve(post_url, filepath)
        # Log the download event
        logging.info(f"Downloaded post: {filepath}")
    if not posts:
        logging.warning("No suitable posts found")
    # Wait for downloads to complete
    expected_files = set([os.path.join(download_dir, f) for f in os.listdir(download_dir) if os.path.isfile(os.path.join(download_dir, f))])
    while True:
        if expected_files.issubset(set([os.path.join(download_dir, f) for f in os.listdir(download_dir)])):
            break
        time.sleep(2)
    # Quit the driver
   # driver.quit()
    # Log the end of the script
    logging.info("Script finished")
except Exception as e:
    logging.error(f"An error occurred: {e}")
Filename: 9gag attempt. Size: 3kb. View raw, , hex, or download this file.