1 Star 3 Fork 2

Jackin/Python3_t.qq.com_client

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
followerGather.py 5.00 KB
一键复制 编辑 原始数据 按行查看 历史
Jackin CNLove 提交于 2013-11-09 16:15 . Init
# -*- coding: UTF-8 -*-
import mysql.connector as db
import client.tWeibo
import time
import json
import re
import sys
def get_userName(html):
ptn1 = '<div class="userName"><strong><a href="/(?P<account>[^"]+)" title="[^"]+">(?P<nickName>.+?)</a><a href="[^"]+" title="(?P<certification>[^"]+)" target="_blank" class="[^"]+"></a><a href="[^"]+" class="[^"]+" user="[^"]+"><em>(?P<level>\d+)</em></a></strong></div>'
group = re.search(ptn1, html, re.I | re.S)
if group is not None:
return group.groupdict()
ptn2 = '<div class="userName"><strong><a href="/(?P<account>[^"]+)" title="[^"]+">(?P<nickName>.+?)</a><a href="[^"]+" class="[^"]+" user="[^"]+"><em>(?P<level>\d+)</em></a></strong></div>'
group = re.search(ptn2, html, re.I | re.S)
if group is None:
return None
match = group.groupdict()
match['certification'] = ''
return match
def get_userNums(html):
ptn1 = '<div class="userNums"><span><span class="cNote" >(?P<address>.+?)</span><a href="[^"]+">听众<strong id="[^"]+">(?P<follower>\d+)</strong>人</a></span><span><a href="[^"]+">收听<strong>(?P<following>\d+)</strong>人</a></span></div>'
group = re.search(ptn1, html, re.I | re.S)
if group is not None:
return group.groupdict()
ptn2 = '<div class="userNums"><span><span class="cNote" style="margin-right:0px" ></span><a href="[^"]+">听众<strong id="[^"]+">(?P<follower>\d+)</strong>人</a></span><span><a href="[^"]+">收听<strong>(?P<following>\d+)</strong>人</a></span></div>'
group = re.search(ptn2, html, re.I | re.S)
if group is None:
return None
match = group.groupdict()
match['address'] = ''
return match
def get_pubInfo(html):
ptn = '<div class="pubInfo"><p class="pubTime">(?P<weiboTime>.+?)(?P<weiboFrom>\<.+?)</p><p><a href="[^"]+">(?P<lastWeibo>.*?)</a></p></div>'
group = re.search(ptn, html, re.I | re.S)
if group is None:
return None
return group.groupdict()
def get_client(html):
ptn = '<div class="cNote source">通过<a boss="[^"]+" href="[^"]+">(?P<weiboClient>.+?)</a>收听</div>'
group = re.search(ptn, html, re.I | re.S)
if group is None:
return None
return group.groupdict()['weiboClient']
def parse_info(infos):
datas = []
for info in infos:
data = {'account' : '','nick_name' : '','address' : '','follower' : 0,'following' : 0,'last_weibo' : '','level' : '','weibo_from' : '','weibo_client' : '','weibo_time' : '','certification' : '',}
userName = get_userName(info)
userNums = get_userNums(info)
pubInfo = get_pubInfo(info)
if userName is None:
continue
data['account'] = userName['account']
data['nick_name'] = userName['nickName']
data['level'] = userName['level']
data['certification'] = userName['certification']
if userNums is not None:
data['address'] = userNums['address']
data['follower'] = int(userNums['follower'])
data['following'] = int(userNums['following'])
if pubInfo is not None:
data['last_weibo'] = pubInfo['lastWeibo']
data['weibo_from'] = re.sub('<[^>]+>', '', pubInfo['weiboFrom'])
data['weibo_time'] = pubInfo['weiboTime']
data['weibo_client'] = get_client(info)
datas.append(data)
return datas
def followerGather(weiboClient, dbConnect, account, count=None):
if count is None:
group = re.search(',num:\[\d+,(?P<count>\d+)\],', weiboClient.userCard(account), re.I)
if group is None:
return None
count = int(group.groupdict()['count'])
loopLimit = min(40, (count // 15) + 1)
cursor = dbConnect.cursor()
for page in range(1, loopLimit + 1):
txt = weiboClient.follower(following_account, page)
rs = json.loads(txt)
if rs['result'] != 0 :
print(txt)
return None
group = re.search('<ul class="LC">(?P<LC>.+?)</ul>', rs['info'], re.S | re.I)
if group is None:
print(txt)
return None
infos = re.sub('>\s+<', '><', group.groupdict()['LC'].replace('\n', '').replace('\r', '')).strip().replace('</li><li','</li>\r\n<li').split('\r\n')
for data in parse_info(infos):
sql = "INSERT IGNORE INTO `follower` SET " + ', '.join([
"`account`='%s'" % data['account'],
"`nick_name`='%s'" % data['nick_name'],
"`address`='%s'" % data['address'],
"`follower`=%d" % data['follower'],
"`following`=%d" % data['following'],
"`last_weibo`='%s'" % data['last_weibo'].replace("'",'').replace('\\',''),
"`level`='%s'" % data['level'],
"`weibo_from`='%s'" % data['weibo_from'],
"`weibo_client`='%s'" % data['weibo_client'],
"`weibo_time`='%s'" % data['weibo_time'],
"`certification`='%s'" % data['certification'],
"`following_account`='%s'; " % following_account,
])
# print(sql)
cursor.execute(sql)
dbConnect.commit()
if __name__ == '__main__':
connect = db.connect(user='root',db='collection',password='',host="127.0.0.1")
cursor = connect.cursor()
cursor.execute("SET SQL_MODE = 'TRADITIONAL'")
uin = 'QQ号'
passwd = '密码'
wb = client.tWeibo.tWeibo(uin, passwd)
wb.login()
cursor.execute("SELECT `id`, `account`, `follower` FROM `tempaccount` WHERE `follower`>0")
for (aid, account, follower) in cursor.fetchall():
followerGather(wb, connect, account, follower)
connect.commit()
cursor.close()
connect.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/Jackin/Python3_t.qq.com_client.git
[email protected]:Jackin/Python3_t.qq.com_client.git
Jackin
Python3_t.qq.com_client
Python3_t.qq.com_client
master

搜索帮助