Skip to content

Commit 1f4e5c1

Browse files
committed
Working on Scrapy
1 parent 69ebcd0 commit 1f4e5c1

File tree

5 files changed

+27
-46
lines changed

5 files changed

+27
-46
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import scrapy
2+
3+
class ArticleSpider(scrapy.Spider):
4+
name='articles'
5+
6+
def start_requests(self):
7+
urls = [
8+
"http://en.wikipedia.org/wiki/Python_%28programming_language%29",
9+
"https://en.wikipedia.org/wiki/Functional_programming",
10+
"https://en.wikipedia.org/wiki/Monty_Python"]
11+
return [scrapy.Request(url=a, callback=self.parse) for a in urls]
12+
13+
def parse(self, response):
14+
title = response.css('h1::text').extract_first()
15+
print('Title is: {}'.format(title))
Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1-
from scrapy import Item, Field
1+
# -*- coding: utf-8 -*-
22

3+
# Define here the models for your scraped items
4+
#
5+
# See documentation in:
6+
# http://doc.scrapy.org/en/latest/topics/items.html
37

4-
class Article(Item):
5-
title = Field()
6-
last_edited = Field()
7-
url = Field()
8+
import scrapy
89

10+
11+
class WikispiderItem(scrapy.Item):
12+
# define the fields for your item here like:
13+
# name = scrapy.Field()
14+
pass

v2/Chapter05_Scrapy/wikiSpider/wikiSpider/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
SPIDER_MODULES = ['wikiSpider.spiders']
1515
NEWSPIDER_MODULE = 'wikiSpider.spiders'
16-
LOG_LEVEL = 'ERROR'
16+
1717

1818
# Crawl responsibly by identifying yourself (and your website) on the user-agent
1919
#USER_AGENT = 'wikiSpider (+http://www.yourdomain.com)'

v2/Chapter05_Scrapy/wikiSpider/wikiSpider/spiders/articleSpider.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

v2/Chapter05_Scrapy/wikiSpider/wikiSpider/spiders/testSpider.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

0 commit comments

Comments
 (0)