In [1]:
import selenium
import time
import datetime
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from bs4 import BeautifulSoup

from IPython.display import display, Image, HTML

from jupyter_progressbar import ProgressBar
import json
In [3]:
def remove_kickstarter_url_prefix(url):
    if url.startswith(''):
        return url[len(''):]
    return url
In [7]:
driver = webdriver.Chrome()

root = ''

discover_links = {
    for link in driver.find_elements_by_tag_name('a')
    for link in [link.get_attribute('href')]
    for link in [remove_kickstarter_url_prefix(link)]
    if link.startswith("/discover/")

In [26]:
except Exception as e:

driver = webdriver.Chrome()
In [27]:
projects = dict()

class get_all_projects:
    def __init__(self, driver):
        self.driver = driver
        self.total_comments = next(
            int(element.text.replace(' projects', '').replace(',', ''))
            for element in driver.find_elements_by_class_name('count')
            if element.text.endswith(' projects')
    def __iter__(self):
        done = set()
        driver.execute_script("$('.load_more > a').click()")
        n_wait = 0
        while driver.execute_script("return $('.load_more > a').length") > 0:
            n_wait += 1
            n_projects = driver.execute_script("return $('*[data-project]').length")
            if n_projects > 0 or n_wait > 5:
                driver.execute_script("$('.load_more > a').click()")
                for item in driver.find_elements_by_css_selector('*[data-project]'):
                    project = json.loads(item.get_attribute('data-project'))
                    if project['id'] not in done:
                        driver.execute_script('$("*[data-project_pid=%d]").parent().remove()' % project['id'])
                        yield project
                n_wait = 0
    def __len__(self):
        return self.total_comments
for discover_link in discover_links:
    driver.get(root + discover_link)
    for project in ProgressBar(get_all_projects(driver)):
        projects[project['id']] = project

In [121]:
