NoneType error during multi-page scrape


NoneType error during multi-page scrape



I'm working on a web scraper and am close to getting what I need, but I can't figure out why I'm getting a NoneType error all of a sudden after it finishes scraping the fourth page (of 204). Here's my code:


script_path = os.path.dirname(os.path.realpath(__file__))

driver = webdriver.PhantomJS(executable_path="/usr/local/bin/bin/phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])

case_list =

#this function launches the headless browser and gets us to the first page of results, which we'll scrape using main
def search():

driver.get('https://www.courts.mo.gov/casenet/cases/nameSearch.do')

if 'Service Unavailable' in driver.page_source:
log('Casenet website seems to be down. Receiving "service unavailable"')
driver.quit()
gc.collect()
return False

time.sleep(2)

court = Select(driver.find_element_by_id('courtId'))

court.select_by_visible_text('All Participating Courts')

case_enter = driver.find_element_by_id('inputVO.lastName')

case_enter.send_keys('Wakefield & Associates')

year_enter = driver.find_element_by_id('inputVO.yearFiled')

year_enter.send_keys('2018')

driver.find_element_by_id('findButton').click()

time.sleep(3)

#scrapes table and stores what we need in a list of lists
def main():

parties =

dates =

case_nums =

html = driver.page_source

soup = BeautifulSoup(html, 'html.parser')

table = soup.findAll('table', {'class':'outerTable'})

for row in table:

col = row.find_all('td', attrs={'class':'td1'})

col2 = row.find_all('td', attrs={'class':'td2'})

all_links = soup.findAll('a')

for cols in col:
if 'V' in cols.text:
cols = cols.string
cols.encode('utf-8').strip()
cols = re.sub("xa0''", '', cols).strip()
parties.append(cols)

for cols in col2:
if 'V' in cols.text:
cols = cols.string
cols.encode('utf-8').strip()
cols = re.sub("xa0''", '', cols).strip()
parties.append(cols)

for link in all_links:
raw_html = str(link)

if 'goToThisCase' in raw_html:

start = raw_html.find("('") + 2
end = raw_html.find("',")
case = raw_html[start:end].strip()
case_nums.append(case)

for i in col:
if '/2018' in i.text:
i = i.string
i.encode('utf-8').strip()
i = re.sub("xa0", '', i).strip()
dates.append(i)

for j in col2:
if '/2018' in j.text:
j = j.string
j.encode('utf-8').strip()
j = re.sub("xa0", '', j).strip()
dates.append(j)

case_list.append(parties)
case_list.append(case_nums)
case_list.append(dates)

return case_list

def page_looper():

main()

count = '1'

print "page %s fully scraped" % count

count = str(int(count) +1)

print len(case_list), " cases so far"

print case_list

for count in range(2,9):

link = driver.find_element_by_link_text(str(count))

link.click()

time.sleep(2)

main()

print "page %s fully scraped" % count

count = str(int(count) +1)

print len(case_list), " cases so far"

print case_list

next_page_link = driver.find_element_by_partial_link_text('Next')
print "Next 10 pages found"
next_page_link.click()
time.sleep(2)

try:

page_looper()

except Exception:

print "no more cases"

#pprint.pprint(case_list)

#data = zip(case_list[0],case_list[1],case_list[2])

#pprint.pprint(data)

# with open(script_path + "/cases.csv", "w") as f:
# writer = csv.writer(f)
# for d in data:
# writer.writerow(d)
search()

page_looper()



After it finishes scraping the fourth page, it throws:


Traceback (most recent call last):
File "wakefield.py", line 175, in <module>
page_looper()
File "wakefield.py", line 140, in page_looper
main()
File "wakefield.py", line 84, in main
cols.encode('utf-8').strip()
AttributeError: 'NoneType' object has no attribute 'encode'



Any idea what gives?



I'm also unclear how to make my lists of lists work to export to a csv in the end, where each case is on a row, and the columns are parties, case_num, dates. Thanks in advance.




1 Answer
1


AttributeError: 'NoneType' object has no attribute 'encode'



means you are trying to invoke a method encode on None object. To prevent this, you have to check if the Object is not None.


encode


None


Object


None



Replace:


for cols in col:
if 'V' in cols.text:
cols = cols.string
cols.encode('utf-8').strip()
cols = re.sub("xa0''", '', cols).strip()
parties.append(cols)



with:


for cols in col:
if 'V' in cols.text:
if cols.string: # check if 'cols.string' is not 'None'
cols = cols.string
cols.encode('utf-8').strip()
cols = re.sub("xa0''", '', cols).strip()
parties.append(cols)





I did, and then it threw: AttributeError: 'NavigableString' object has no attribute 'text' -- plus I thought I solved the WebElement issue by using .text in the if statement?
– jayohday
Jun 29 at 19:28





Have edited my answer, please check
– Andrei Suvorkov
Jun 29 at 19:35





Made the change and I'm still getting the same error. I'm beginning to think it's an issue with the website.
– jayohday
Jun 29 at 19:55





I think I have found the problem, please check edited answer.
– Andrei Suvorkov
Jun 30 at 0:15







By clicking "Post Your Answer", you acknowledge that you have read our updated terms of service, privacy policy and cookie policy, and that your continued use of the website is subject to these policies.

Comments

Popular posts from this blog

paramiko-expect timeout is happening after executing the command

Export result set on Dbeaver to CSV

Opening a url is failing in Swift