python 3.x - Passing Scrapy response URL to Selenium and then Selenium response back to Scrapy -


how pass scrapy response url selenium , selenium response scrapy

i have scrapy spider first.py :

# -*- coding: utf-8 -*- import scrapy import re import json   class firstspider(scrapy.spider):     name = "first"     allowed_domains = ["someautosite.co.uk"]     start_urls = (         'http://www.someautosite.co.uk/some_specific_search_results',     )      def parse(self, response):         car_url in response.xpath('//article[contains(@class, "standard")]/div/div[2]/div[1]/h1/a/@href').extract():             absoluteurl = response.urljoin(car_url)             # yield {'url': absoluteurl}             yield scrapy.request(absoluteurl, callback=self.parse_car)      def parse_car(self, response):         pattern = re.compile(r"var utag_data = ({.*?});", re.multiline | re.dotall)         utag_data = response.xpath('//script[contains(.,"var utag")]/text()').re(pattern)[0]         utag_data_obj = json.loads(utag_data)         # make = utag_data_obj['make']         # model = utag_data_obj['model']         # yield {'make':utag_data_obj['make'],         #        'model':utag_data_obj['model'],         #        }         # yield utag_data         tel = response.xpath('//article/div[3]/section/div/div[@itemprop="telephone"]/text()').extract_first()         # tel_json_str = '{"tel":"' + str(tel) + '"}'         # tel_json_obj = json.loads(tel_json_str)         # combine 2 json objects one:         car_json = utag_data_obj.copy()         car_json.update({"tel": tel})         yield car_json         quotations_url = response.xpath('/html/body/article/section/ul/li[2]/a/@href').extract_first()         yield scrapy.request(quotations_url, callback=self.parse_quotations)      def parse_quotations(self, response):  # parse insurance quotation website link selenium         import filldata2 

and have selenium filldata2.py module tries quotation car url link extracted in parse_car method scrapy spider code above.

now selenum module starts :

from selenium import webdriver  selenium.webdriver.common.by import selenium.webdriver.support.ui import webdriverwait  # available since 2.4.0 selenium.webdriver.support import expected_conditions ec  # available since 2.26.0 selenium.webdriver.common.keys import keys import time import 6 six.moves.configparser import safeconfigparser  regno = 'aa00aaa' mile = '15000' firstname = 'john' lastname = 'smith' [...]  def yesno(idul):     idxpath = '//*[@id="{}"]'.format(idul)     return idxpath   def findid(idul):     found = driver.find_element_by_id(idul)     return found   def clickyes(idul):     idxpath = '//*[@id="{}"]'.format(idul)     arg = '{}//span[contains(text(), "yes")]'.format(idxpath)     return driver.find_element_by_xpath(arg).click()   def clickno(idul):     idxpath = '//*[@id="{}"]'.format(idul)     arg = '{}//span[contains(text(), "no")]'.format(idxpath)     return driver.find_element_by_xpath(arg).click()   def clickspan(idul):     idxpath = '//*[@id="{}"]'.format(idul)     arg = '{}//span[1]'.format(idxpath)     driver.find_element_by_xpath(arg).click()   class divselect(object):     def __init__(self, idul, divtext):         self.idul = idul         self.divtext = divtext         # exemplu: '//div[contains(text(), "right hand")]'         # self.divulxpath = '//div[contains(text(), "{}")]'.format(self.divtext)         self.idxpath = '//*[@id="{}"]'.format(self.idul)      def findid(self):         el = 'driver.find_element_by_id({})'.format(self.idul)         return el      @property     def clicky(self):  # merge doar la selectare de divuri         if len(str(self.divtext)) >= 2 , not self.divtext.isdigit():             arg = '{}//div[contains(text(), "{}")]'.format(self.idxpath, self.divtext)         else:             arg = '{}//div[{}]/label/div'.format(self.idxpath, self.divtext)             print('driver.find_element_by_xpath("{}").click()'.format(arg))         driver.find_element_by_xpath(arg).click()   def printval(cee, cssid):     def getval():         val = driver.find_element_by_xpath('//*[@id="{}"]'.format(cssid)).get_attribute('value')         if not val:             val = input('care e valoarea masinii:\n')         driver.find_element_by_xpath('//*[@id="{}"]'.format(cssid)).click()         fillin(cssid, val)         time.sleep(2)         # print(val)         # assert isinstance(val, object)         return val      valoare = getval()     if valoare.lower() == 'pret':         print('{} estimat este : £ {} '.format(cee, valoare)) if valoare else 'nu era nici un {}({}) estimat'.format(             cee, cssid)     else:         print('{} estimat/a/e este : {} '.format(cee, valoare)) if valoare else 'nu era nici un {}({}) estimat'.format(             cee, cssid)   def clickbutton(cssid):     driver.find_element_by_xpath('//*[@id="{}"]'.format(cssid)).click()   def fillin(cssid, var):     return driver.find_element_by_id(str(cssid)).send_keys(var)   def fillinsugestionbox(cssid, var):     driver.find_element_by_id(str(cssid)).send_keys(var)     return driver.find_element_by_xpath('//*[@id=\"{0}\"]'.format(cssid)).send_keys(keys.return)   knowsregistrationnumber = yesno('knows-registration-number').clickyes  # 1.2 please enter here started: registrationnumber = driver.find_element_by_id('registration-number') registrationnumber.send_keys(regno)  # 1.3 find vehicle find-vehicle-by-reg findvehiclebyreg = driver.find_element_by_id('find-vehicle-by-reg') findvehiclebyreg.click() time.sleep(1)  # todo : if no other variants # 1.3.1 multiple-vehicles-section : select list more options # multiplevehiclessection = driver.find_element_by_id('multiple-vehicles-section') # multiplevehiclessection.click() #     possible-vehicles : select list id try:     element = webdriverwait(driver, 10).until(ec.presence_of_element_located((by.id, "possible-vehicles")))     possiblevehicles = driver.find_element_by_id('possible-vehicles')     possiblevehicles.click()     print('am asteptat destul')     dropdown = possiblevehicles.find_elements_by_tag_name('option')     print('am selectat :\n      {} \n dintre urmatoarele:'.format(dropdown[1].text))     option in dropdown[1:]:         print(option.text)      if dropdown:         dropdown[1].click() except:     print('elementul possible-vehicles nu e prezent') # finally:  time.sleep(2)  # //*[@id="has-tracker"] yes/no hastracker = yesno('has-tracker').clickno  # //*[@id="imported"] yes/no imported = yesno('imported').clickno  # //*[@id="steering"] - 2 divs # choose options : # left hand or # right hand steering = divselect('steering', 'right hand').clicky  # todo: vezi ce faci daca nu pret setat. pune tu unul # //*[@id="current-value"] - citeste valoarea # driver.find_element_by_xpath('//*[@id="current-value"]')  printval('pret', 'current-value') # print('pretul estimat este : £ {} '.format(currentvalue)) if  currentvalue else 'nu era nici un pret estimat'  printval('scaune', 'numberofseats-dropdown')  # //*[@id="has-modifications"] hasmodifications = yesno('has-modifications').clickno  # clik next button # //*[@id="vehicle-lookup-next"] clickbutton('vehicle-lookup-next') time.sleep(1) # ============================================ # 2. vehicle usage                           | # ============================================ # 2.1 when did buy car? # //*[@id="vehicle-usage"]//span[1] vehicleusage = yesno('vehicle-usage').clickspan  # haven't bought car yet  # 2.2 use car for? # //*[@id="use-of-vehicle"]/ol/li[2]/div[2]/label/div/div[2] # //*[@id="use-of-vehicle"]//div[2] useofvehicle = divselect('use-of-vehicle', '2').clicky  # social, domestic, pleasure , commuting (sdpc)  # 2.3 annual personal mileage is? # //*[@id="annual-mileage"] annualmileage = driver.find_element_by_id('annual-mileage') annualmileage.send_keys(mile) [...much more...] ... ...  fillin('email', email) # main telephone number # let insurance providers answer queries # let keep date # //*[@id="communication-options"]/ol/li[2]/div[4]/label/div/div[2] divselect('communication-options', 'post').clicky # please tick box confirm have read , understood our website terms , conditions, \ #   assumptions may have made , rewards terms , conditions. \ #   if not understand items within document please contact us. # //*[@id="contact-details"]/div/ol/li[6]/ol/li[2]/div[2]/label/span # yesno('contact-details').clickspan - nu merge driver.find_element_by_xpath('//*[@id="contact-details"]/div/ol/li[6]/ol/li[2]/div[2]/label/span').click() # //*[@id="contact-details-next"] clickbutton('contact-details-next') driver.implicitly_wait(10) try:     element = webdriverwait(driver, 10).until(ec.visibility_of_element_located((by.id, "quotes")))     print('element = ', element)     try:         """          wait loading bar go away:         """         element2 = webdriverwait(driver, 60).until(ec.invisibility_of_element_located((by.xpath, '//*[@id="quotes-loading-container"]/div/div[1]')))         print('element2 = ', element2)     except:         print('bara de loading inca este activa. butonul more details cu cotatii nu e vizibil') except:     print('tabelul cu cotatii nu e vizibil')  source_code = driver.find_element_by_id('quotes').get_attribute('innerhtml') # element.get_attribute('innerhtml') f = open('c:\\users\\zzz\\pycharmprojects\\selenscrapy\\'+str(regno)+'.html', 'wb') f.write(source_code.encode('utf-8')) f.close() 

i know code messy. i'm python beginner , i'm playing code scrap cars car selling website , try insurance quotation them different site. link outer insurance quotation site (full of javascript, that's why need selenium webdriver) redirection link car selling site, because 2 sites collaborate. now, said before, quotation url needs parsed selenium, keep on separate module file, maybe 2 separate files, 1 config , 1 actions taken.

how pass insurance quotation url obtained scrapy firstspider parse_car() method selenium module , response of selenium script (which called source_code in second module above) scrapy in firstspider parse_quotations() method.

thank !


Comments

Popular posts from this blog

php - How to display all orders for a single product showing the most recent first? Woocommerce -

asp.net - How to correctly use QUERY_STRING in ISAPI rewrite? -

angularjs - How restrict admin panel using in backend laravel and admin panel on angular? -