-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.js
More file actions
51 lines (40 loc) · 1.5 KB
/
example.js
File metadata and controls
51 lines (40 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
const { createRouter, createScraper } = require('tiny-scraper');
const cheerio = require('cheerio');
const { on } = require('flyd');
const { request } = require('axios');
const { get, compose } = require('./utils');
const github = 'https://github.com';
const router = createRouter();
const matchGithub = router.match(github);
const toUrl = path => `${github}${path}`;
const parseList = res => {
const $ = cheerio.load(res.data);
const items = $('.js-navigation-item .content>span>a').toArray();
const href = elem => $(elem).attr('href');
return items.map(compose(get, toUrl, href));
}
const parseFileInfo = res => {
const $ = cheerio.load(res.data);
return $('.file-info').text().replace(/\s+/g, ' ');
}
matchGithub('/zhangmq/tiny-scraper', function* (req, res, params, query) {
return parseList(res);
});
matchGithub('/zhangmq/tiny-scraper/tree/master/:folder*', function* (req, res, params, query) {
console.log('folder', params.folder);
return parseList(res);
});
matchGithub('/zhangmq/tiny-scraper/blob/master/:file*', function* (req, res, params, query) {
const { file } = params;
console.log('file', { file, info: parseFileInfo(res)});
});
const scraper = createScraper({
maxRequest: 1,
requestDuration: 3000,
router,
downloader: request
});
const { requestError$, routeError$ } = scraper;
on(error => console.log('download error', error), requestError$);
on(error => console.log('route error', error), routeError$);
scraper.task$([get('https://github.com/zhangmq/tiny-scraper')]);