使用Cookie模拟已登录状态
import scrapy class LoginByCookie(scrapy.Spider): """ 模拟登录方式一:直接使用cookie登录
登录慕课网 """ name = 'login_by_cookie' allowed_domains=['www.imooc.com']
start_urls=[]def start_requests(self): """重写start_requests()方法""" home_url=
'https://www.imooc.com/u/2346025' login_cookie={'imooc_uuid':
'c13c8cb7-442a-430e-a2c1-78d91c347b67', 'imooc_isnew_ct':'1515076153',
'imooc_isnew':'2', 'loginstate':'1', 'apsid':
'NhMDY2ZDFmODhmYWQ5ZmQ2NDI3ZDg0OTU0NWM3NTQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMjM0NjAyNQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4MDkwMjE4MjNAcXEuY29tAAAAAAAAAAAAAAAAAAAAAGI3ZmJjOTUxMTU2YjBlOTVlOTIxYzM1ZDk0OTVmOGNhW3FQWltxUFo%3DYm'
,'PHPSESSID':'vd48nsltdovbbifsn48pu15763', 'IMCDNS':'0',
'Hm_lvt_f0cfcccd7b1393990c78efdeebff3968':
'1515076155,1515221269,1515746784,1516641134',
'Hm_lpvt_f0cfcccd7b1393990c78efdeebff3968':'1516641134', 'cvde':
'5a661b6d0246d-3' } yield scrapy.FormRequest(
url=home_url,cookies=login_cookie,callback=self.parse_page)def parse_page
(self,response): print(response.body.decode('utf-8')) print(response.xpath(
'//title/text()').extract_first())
使用Form表单数据实现登录
import scrapy class LoginByFormData(scrapy.Spider): """模拟登录方式二: 使用Form表单登录"""
name ='login_by_formdata' start_urls = ['http://www.example.com/users/login.php'
]def parse(self, response): return scrapy.FormRequest.from_response( response,
formdata={'username': 'john', 'password': 'secret'}, callback=self.after_login )
# 等价于 # return [scrapy.FormRequest( # url=login_url, # formdata={'username':
'john', 'password': 'secret'}, # callback=self.after_login # )] def after_login
(self, response): # check login succeed before going on if "authentication
failed" in response.body: self.logger.error("Login failed") return # continue
scraping with authenticated session...