NoneType error during multi-page scrape

I'm working on a web scraper and am close to getting what I need, but I can't figure out why I'm getting a NoneType error all of a sudden after it finishes scraping the fourth page (of 204). Here's my code:

script_path = os.path.dirname(os.path.realpath(__file__)) driver = webdriver.PhantomJS(executable_path="/usr/local/bin/bin/phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) case_list = #this function launches the headless browser and gets us to the first page of results, which we'll scrape using main def search(): driver.get('https://www.courts.mo.gov/casenet/cases/nameSearch.do') if 'Service Unavailable' in driver.page_source: log('Casenet website seems to be down. Receiving "service unavailable"') driver.quit() gc.collect() return False time.sleep(2) court = Select(driver.find_element_by_id('courtId')) court.select_by_visible_text('All Participating Courts') case_enter = driver.find_element_by_id('inputVO.lastName') case_enter.send_keys('Wakefield & Associates') year_enter = driver.find_element_by_id('inputVO.yearFiled') year_enter.send_keys('2018') driver.find_element_by_id('findButton').click() time.sleep(3) #scrapes table and stores what we need in a list of lists def main(): parties = dates = case_nums = html = driver.page_source soup = BeautifulSoup(html, 'html.parser') table = soup.findAll('table', {'class':'outerTable'}) for row in table: col = row.find_all('td', attrs={'class':'td1'}) col2 = row.find_all('td', attrs={'class':'td2'}) all_links = soup.findAll('a') for cols in col: if 'V' in cols.text: cols = cols.string cols.encode('utf-8').strip() cols = re.sub("xa0''", '', cols).strip() parties.append(cols) for cols in col2: if 'V' in cols.text: cols = cols.string cols.encode('utf-8').strip() cols = re.sub("xa0''", '', cols).strip() parties.append(cols) for link in all_links: raw_html = str(link) if 'goToThisCase' in raw_html: start = raw_html.find("('") + 2 end = raw_html.find("',") case = raw_html[start:end].strip() case_nums.append(case) for i in col: if '/2018' in i.text: i = i.string i.encode('utf-8').strip() i = re.sub("xa0", '', i).strip() dates.append(i) for j in col2: if '/2018' in j.text: j = j.string j.encode('utf-8').strip() j = re.sub("xa0", '', j).strip() dates.append(j) case_list.append(parties) case_list.append(case_nums) case_list.append(dates) return case_list def page_looper(): main() count = '1' print "page %s fully scraped" % count count = str(int(count) +1) print len(case_list), " cases so far" print case_list for count in range(2,9): link = driver.find_element_by_link_text(str(count)) link.click() time.sleep(2) main() print "page %s fully scraped" % count count = str(int(count) +1) print len(case_list), " cases so far" print case_list next_page_link = driver.find_element_by_partial_link_text('Next') print "Next 10 pages found" next_page_link.click() time.sleep(2) try: page_looper() except Exception: print "no more cases" #pprint.pprint(case_list) #data = zip(case_list[0],case_list[1],case_list[2]) #pprint.pprint(data) # with open(script_path + "/cases.csv", "w") as f: # writer = csv.writer(f) # for d in data: # writer.writerow(d) search() page_looper()

After it finishes scraping the fourth page, it throws:

Traceback (most recent call last): File "wakefield.py", line 175, in <module> page_looper() File "wakefield.py", line 140, in page_looper main() File "wakefield.py", line 84, in main cols.encode('utf-8').strip() AttributeError: 'NoneType' object has no attribute 'encode'

Any idea what gives?

I'm also unclear how to make my lists of lists work to export to a csv in the end, where each case is on a row, and the columns are parties, case_num, dates. Thanks in advance.

1 Answer
1

AttributeError: 'NoneType' object has no attribute 'encode'

means you are trying to invoke a method encode on None object. To prevent this, you have to check if the Object is not None.

encode

None

Object

None

Replace:

for cols in col: if 'V' in cols.text: cols = cols.string cols.encode('utf-8').strip() cols = re.sub("xa0''", '', cols).strip() parties.append(cols)

with:

for cols in col: if 'V' in cols.text: if cols.string: # check if 'cols.string' is not 'None' cols = cols.string cols.encode('utf-8').strip() cols = re.sub("xa0''", '', cols).strip() parties.append(cols)

I did, and then it threw: AttributeError: 'NavigableString' object has no attribute 'text' -- plus I thought I solved the WebElement issue by using .text in the if statement?
– jayohday
Jun 29 at 19:28

Have edited my answer, please check
– Andrei Suvorkov
Jun 29 at 19:35

Made the change and I'm still getting the same error. I'm beginning to think it's an issue with the website.
– jayohday
Jun 29 at 19:55

I think I have found the problem, please check edited answer.
– Andrei Suvorkov
Jun 30 at 0:15

By clicking "Post Your Answer", you acknowledge that you have read our updated terms of service, privacy policy and cookie policy, and that your continued use of the website is subject to these policies.

Search This Blog

Mgiyuk