1 Star 3 Fork 2

Jackin/Python3_t.qq.com_client

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
saveFollowerFromJSON2DB.py 4.56 KB
一键复制 编辑 原始数据 按行查看 历史
Jackin CNLove 提交于 2013-11-09 16:15 . Init
# -*- coding: UTF-8 -*-
import mysql.connector as db
import time
import json
import re
import sys
def get_userName(html):
ptn1 = '<div class="userName"><strong><a href="/(?P<account>[^"]+)" title="[^"]+">(?P<nickName>.+?)</a><a href="[^"]+" title="(?P<certification>[^"]+)" target="_blank" class="[^"]+"></a><a href="[^"]+" class="[^"]+" user="[^"]+"><em>(?P<level>\d+)</em></a></strong></div>'
group = re.search(ptn1, html, re.I | re.S)
if group is not None:
return group.groupdict()
ptn2 = '<div class="userName"><strong><a href="/(?P<account>[^"]+)" title="[^"]+">(?P<nickName>.+?)</a><a href="[^"]+" class="[^"]+" user="[^"]+"><em>(?P<level>\d+)</em></a></strong></div>'
group = re.search(ptn2, html, re.I | re.S)
if group is None:
return None
match = group.groupdict()
match['certification'] = ''
return match
def get_userNums(html):
ptn1 = '<div class="userNums"><span><span class="cNote" >(?P<address>.+?)</span><a href="[^"]+">听众<strong id="[^"]+">(?P<follower>\d+)</strong>人</a></span><span><a href="[^"]+">收听<strong>(?P<following>\d+)</strong>人</a></span></div>'
group = re.search(ptn1, html, re.I | re.S)
if group is not None:
return group.groupdict()
ptn2 = '<div class="userNums"><span><span class="cNote" style="margin-right:0px" ></span><a href="[^"]+">听众<strong id="[^"]+">(?P<follower>\d+)</strong>人</a></span><span><a href="[^"]+">收听<strong>(?P<following>\d+)</strong>人</a></span></div>'
group = re.search(ptn2, html, re.I | re.S)
if group is None:
return None
match = group.groupdict()
match['address'] = ''
return match
def get_pubInfo(html):
ptn = '<div class="pubInfo"><p class="pubTime">(?P<weiboTime>.+?)(?P<weiboFrom>\<.+?)</p><p><a href="[^"]+">(?P<lastWeibo>.*?)</a></p></div>'
group = re.search(ptn, html, re.I | re.S)
if group is None:
return None
return group.groupdict()
def get_source(html):
ptn = '<div class="cNote source">通过<a boss="[^"]+" href="[^"]+">(?P<weiboClient>.+?)</a>收听</div>'
group = re.search(ptn, html, re.I | re.S)
if group is None:
return None
return group.groupdict()['weiboClient']
def parse_info(infos):
datas = []
for info in infos:
data = {'account' : '','nick_name' : '','address' : '','follower' : 0,'following' : 0,'last_weibo' : '','level' : 1,'weibo_from' : '','weibo_client' : '','weibo_time' : '','certification' : '',}
userName = get_userName(info)
userNums = get_userNums(info)
pubInfo = get_pubInfo(info)
if userName is None:
continue
data['account'] = userName['account']
data['nick_name'] = userName['nickName']
data['level'] = int(userName['level'])
data['certification'] = userName['certification']
if userNums is not None:
data['address'] = userNums['address']
data['follower'] = int(userNums['follower'])
data['following'] = int(userNums['following'])
if pubInfo is not None:
data['last_weibo'] = pubInfo['lastWeibo']
data['weibo_from'] = re.sub('<[^>]+>', '', pubInfo['weiboFrom'])
data['weibo_time'] = pubInfo['weiboTime']
data['weibo_client'] = get_source(info)
datas.append(data)
return datas
if __name__ == '__main__':
connect = db.connect(user='root',db='collection',password='',host="127.0.0.1")
cursor = connect.cursor()
cursor.execute("SET SQL_MODE = 'TRADITIONAL'")
sqlInsertFollower = "INSERT IGNORE INTO `follower`(`account`, `nick_name`, `address`, `follower`, `following`, `last_weibo`, `level`, `weibo_from`, `weibo_client`, `weibo_time`, `certification`, `following_account`) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); ";
fJson = open('dir.log', 'r')
while True:
jsonFile = fJson.readline().strip()
if jsonFile == '':
break
fp = open("follower\\%s" % jsonFile, 'r')
following_account = re.sub('^follower_(?P<account>.+?)\.json$', '\g<account>', jsonFile)
print("开始保存 %s 的听众!" % following_account)
fp.readline()
while True:
line = fp.readline()
if line == '':
break
if line.strip() == '':
continue
rs = json.loads(line)
group = re.search('<ul class="LC">(?P<LC>.+?)</ul>', rs['info'], re.S | re.I)
if group is None:
break
infos = re.sub('>\s+<', '><', group.groupdict()['LC'].replace('\n', '').replace('\r', '')).strip().replace('</li><li','</li>\r\n<li').split('\r\n')
for data in parse_info(infos):
cursor.execute(sqlInsertFollower, (data['account'], data['nick_name'], data['address'], data['follower'], data['following'], data['last_weibo'], data['level'], data['weibo_from'], data['weibo_client'], data['weibo_time'], data['certification'], following_account))
connect.commit()
fp.close()
fJson.close()
connect.commit()
cursor.close()
connect.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/Jackin/Python3_t.qq.com_client.git
[email protected]:Jackin/Python3_t.qq.com_client.git
Jackin
Python3_t.qq.com_client
Python3_t.qq.com_client
master

搜索帮助