python 크롤링 삽질

in crawling •  7 years ago  (edited)

 목표 : 페이지 당 10개 항목 *  페이지 전체 (약 500) 추출 

결과 : 1페이지 출력 

 
from bs4 import BeautifulSoup
import time
from selenium import webdriver
driver = webdriver.Chrome('C:\\Users\\ms\\Desktop\\chrome\\chromedriver')
driver.get('http://www.nsdi.go.kr/lxportal/?menuno=2776')



city = driver.find_element_by_id('shSido')



for option in city.find_elements_by_tag_name('option'):



   if option.text == ' 서울특별시':



       option.click()







driver.find_element_by_xpath(' // *[ @ id = "searchVO"] / div[1] / div / select / option[5]').click()



driver.find_element_by_xpath('//*[@id="icon_btn_write"]').click()



html = driver.page_source



soup = BeautifulSoup(html, 'html.parser')







a_list = []



for i in range(1, 51):



   path = '//*[@id="searchVO"]/div[2]/table/tbody/tr'



   num = i



   common_path = path+'['+str(num)+']'



   reg_num = common_path+'/td[2]'



   agent_name = common_path+'/td[3]'



   ads = common_path+'/td[4]'



   name = common_path+'/td[5]'



   tel = common_path+'/td[6]'   #xpath = path+'['+num+']'



   reg_list = driver.find_element_by_xpath(reg_num).text



   agent_list = driver.find_element_by_xpath(agent_name).text



   ads_list = driver.find_element_by_xpath(ads).text



   name_list = driver.find_element_by_xpath(name).text



   tel_list = driver.find_element_by_xpath(tel).text



   all_list = [reg_list, agent_list, ads_list, name_list, tel_list]



   a_list.append(all_list)



   continue



print(a_list)



print(len(a_list))



#a_list = a_list.append(test)



driver.find_element_by_xpath('//*[@id="searchVO"]/div[2]/div[2]/ol/li[12]/a').click()
Authors get paid when people like you upvote their post.
If you enjoyed what you read here, create your account today and start earning FREE STEEM!