通过scrapy.Request实现翻页请求:
scrapy.Request(url, callback=None, method='GET', headers=None, body=None, cookies=None, meta=None, encoding='utf-8', priority=0, dont_filter=False, errback=None, flags=None)
这里以爬取腾讯招聘网站的岗位信息为例制作一个爬虫进行翻页请求的实现
1 # -*- coding: utf-8 -*- 2 import scrapy 3 4 5 class HrSpider(scrapy.Spider): 6 name = 'Hr' 7 allowed_domains = ['tencent.com'] 8 start_urls = ['https://hr.tencent.com/position.php'] 9 10 def parse(self, response):11 tr_list=response.xpath("//table[@class='tablelist']/tr")[1:-1]12 for tr in tr_list:13 item={}14 item["title"]=tr.xpath("./td[1]/a/text()").extract_first()15 item["postion"]=tr.xpath("./td[2]/text()").extract_first()16 item["publish_date"]=tr.xpath("./td[5]/text()").extract_first()17 yield item18 #找到下一页的URL地址,实现翻页请求19 next_url=response.xpath("//a[@id='next']/@href").extract_first()20 if next_url !=" javascript:;":21 next_url="https://hr.tencent.com/"+next_url22 yield scrapy.Request(23 next_url,24 callback=self.parse25 )