Skip to content

Commit 0b669fa

Browse files
committed
Feature: download avatar pictures.
1 parent d0896c9 commit 0b669fa

File tree

4 files changed

+46
-3
lines changed

4 files changed

+46
-3
lines changed
Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
from .origin_picture_downloader import OriginPictureDownloader
22
from .retweet_picture_downloader import RetweetPictureDownloader
3+
from .avatar_picture_downloader import AvatarPictureDownloader
34
from .video_downloader import VideoDownloader
45

5-
__all__ = [OriginPictureDownloader, RetweetPictureDownloader, VideoDownloader]
6+
__all__ = [
7+
OriginPictureDownloader, RetweetPictureDownloader, AvatarPictureDownloader,
8+
VideoDownloader
9+
]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import os
2+
3+
from .img_downloader import ImgDownloader
4+
5+
6+
class AvatarPictureDownloader(ImgDownloader):
7+
def __init__(self, file_dir, file_download_timeout):
8+
super().__init__(file_dir, file_download_timeout)
9+
self.describe = u'头像图片'
10+
self.key = 'avatar_pictures'
11+
12+
def handle_download(self, urls):
13+
"""处理下载相关操作"""
14+
file_dir = self.file_dir + os.sep + self.describe
15+
if not os.path.isdir(file_dir):
16+
os.makedirs(file_dir)
17+
18+
for i, url in enumerate(urls):
19+
index = url.rfind('/')
20+
file_name = url[index:]
21+
file_path = file_dir + os.sep + file_name
22+
self.download_one_file(url, file_path, 'xxx')

weibo_spider/parser/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from .index_parser import IndexParser
22
from .page_parser import PageParser
3+
from .photo_parser import PhotoParser
4+
from .album_parser import AlbumParser
35

4-
__all__ = [IndexParser, PageParser]
6+
__all__ = [IndexParser, PageParser, PhotoParser, AlbumParser]

weibo_spider/spider.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
from tqdm import tqdm
1616

1717
from . import config_util, datetime_util
18-
from .parser import IndexParser, PageParser
18+
from .downloader import AvatarPictureDownloader
19+
from .parser import IndexParser, PageParser, PhotoParser, AlbumParser
1920
from .user import User
2021

2122
FLAGS = flags.FLAGS
@@ -140,6 +141,16 @@ def get_user_info(self, user_uri):
140141
self.user = IndexParser(self.cookie, user_uri).get_user()
141142
self.page_count += 1
142143

144+
def download_user_avatar(self, user_uri):
145+
"""下载用户头像"""
146+
avatar_album_url = PhotoParser(self.cookie,
147+
user_uri).extract_avatar_album_url()
148+
pic_urls = AlbumParser(self.cookie,
149+
avatar_album_url).extract_pic_urls()
150+
AvatarPictureDownloader(
151+
self._get_filepath('img'),
152+
self.file_download_timeout).handle_download(pic_urls)
153+
143154
def get_weibo_info(self):
144155
"""获取微博信息"""
145156
try:
@@ -302,6 +313,10 @@ def get_one_user(self, user_config):
302313
self.write_user(self.user)
303314
logger.info('*' * 100)
304315

316+
# 下载用户头像相册中的图片。
317+
if self.pic_download:
318+
self.download_user_avatar(user_config['user_uri'])
319+
305320
for weibos in self.get_weibo_info():
306321
self.write_weibo(weibos)
307322
self.got_num += len(weibos)

0 commit comments

Comments
 (0)