master

分支 (1)

管理

管理

master

gitee-7th-event-3
/
有了这代码_斗图没输过.py

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# 爬取无反爬虫技术网站 -->爱斗图网
import os
# 发送就收requests请求
import requests
from lxml import html, etree

# 爬取有意思的表情包,心有多大，硬盘有多大，表情包就有多大
class Spider(object):
    def start_request(self):
        #循环抓取网页，相当于网站翻页
        for i in range(1,2676):
            #1.获取网站整体数据
            print("======正在抓取%s页======"% i)
            response = requests.get("http://www.adoutu.com/picture/list/"+str(i))
            html = etree.HTML(response.content.decode())
            self.xpath_data(html)


    def xpath_data(self,html):
        #2.抽取想要的数据
        src_list = html.xpath('//div[@class="row  text-center picture-list"]/a/img/@src')
        #在div中找到类选择器“video-play”，进入value下的src,可获得视频链接
        tit_list = html.xpath('//div[@class="row  text-center picture-list"]/a/img/@title')
        #同理找到视频名称
        for src,tit in zip(src_list,tit_list):
            #3.视频文件名，视频下载
            url = src
            file_name="表情包\\" + tit +".gif";
            print("抓取成功："+file_name)
            content = requests.get(url).content
            #4.存储数据
            with open(file_name,"wb") as f:
                f.write(content)
spider = Spider()
spider.start_request()