File tree Expand file tree Collapse file tree 5 files changed +27
-46
lines changed
v2/Chapter05_Scrapy/wikiSpider/wikiSpider Expand file tree Collapse file tree 5 files changed +27
-46
lines changed Original file line number Diff line number Diff line change 1+ import scrapy
2+
3+ class ArticleSpider (scrapy .Spider ):
4+ name = 'articles'
5+
6+ def start_requests (self ):
7+ urls = [
8+ "http://en.wikipedia.org/wiki/Python_%28programming_language%29" ,
9+ "https://en.wikipedia.org/wiki/Functional_programming" ,
10+ "https://en.wikipedia.org/wiki/Monty_Python" ]
11+ return [scrapy .Request (url = a , callback = self .parse ) for a in urls ]
12+
13+ def parse (self , response ):
14+ title = response .css ('h1::text' ).extract_first ()
15+ print ('Title is: {}' .format (title ))
Original file line number Diff line number Diff line change 1- from scrapy import Item , Field
1+ # -*- coding: utf-8 -*-
22
3+ # Define here the models for your scraped items
4+ #
5+ # See documentation in:
6+ # http://doc.scrapy.org/en/latest/topics/items.html
37
4- class Article (Item ):
5- title = Field ()
6- last_edited = Field ()
7- url = Field ()
8+ import scrapy
89
10+
11+ class WikispiderItem (scrapy .Item ):
12+ # define the fields for your item here like:
13+ # name = scrapy.Field()
14+ pass
Original file line number Diff line number Diff line change 1313
1414SPIDER_MODULES = ['wikiSpider.spiders' ]
1515NEWSPIDER_MODULE = 'wikiSpider.spiders'
16- LOG_LEVEL = 'ERROR'
16+
1717
1818# Crawl responsibly by identifying yourself (and your website) on the user-agent
1919#USER_AGENT = 'wikiSpider (+http://www.yourdomain.com)'
Load Diff This file was deleted.
Load Diff This file was deleted.
You can’t perform that action at this time.
0 commit comments