1 Star 3 Fork 2

Jackin/Python3_t.qq.com_client

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
weiboContentGather.py 2.76 KB
一键复制 编辑 原始数据 按行查看 历史
Jackin CNLove 提交于 2013-11-09 16:15 . Init
# -*- coding: UTF-8 -*-
import mysql.connector as db
import client.tWeibo
import urllib.parse
import datetime
import random
import time
import math
import json
import sys
import re
# 微博内容采集 采集 列表 userList 里指定账户发布的原创微博
# 保存到本地weibo_content目录和数据库
uin = 'QQ号'
passwd = '密码'
wb = client.tWeibo.tWeibo(uin, passwd)
wb.login()
connect = db.connect(user='root',db='collection',password='',host="127.0.0.1")
cursor = connect.cursor()
cursor.execute("SET NAMES 'utf8'")
cursor.execute("SET SQL_MODE = 'TRADITIONAL'")
# userList = ['meilishuo6976','lanxinmiaoyu','Love520weimei','xianxuqinyuan','hyx0310_love','weigebo1103']
userList = ['xianxuqinyuan','hyx0310_love','weigebo1103']
for u in userList:
header = {
'Referer' : 'http://api1.t.qq.com/proxy.html',
'rf' : 'http://t.qq.com/%s' % u,
'User-Agent' : 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17',
}
# fp = open('weibo_content\\%s.json' % u, 'w', encoding='utf_8')
page = 1
sqlInsert = "INSERT INTO `weibo_content`(`weibo_id`,`content`,`name`,`nick`,`timestamp`, `pic`) VALUES(%s, %s, %s, %s, %s, %s)"
wb.get('http://t.qq.com/%s' % u)
apiURL = 'http://api1.t.qq.com/asyn/index.php?&'
furl = apiURL + urllib.parse.urlencode({
'u' : u,
'apiType' : '8',
'apiHost' : 'http://api.t.qq.com',
'_r' : math.floor(time.time() * 1000),
})
print(furl)
# sys.exit()
rs = wb.get(furl, header).data.decode('utf-8')
while True:
print("%s -- %d" %(u, page))
rs = re.sub("{result:0,msg:'成功','info':{'user':'(?P<user>[^']+)','hasNext':1,'time':(?P<time>\d+),'talk':", '{"result":0,"msg":"成功","info":{"user":"\g<user>","hasNext":1,"time":\g<time>,"talk":',rs.strip(), 1)
# fp.write(rs + '\n')
try:
result = json.loads(rs)
except ValueError as e:
print('源JSON:' + rs)
print('现JSON:' + result)
print(e)
sys.exit()
# print(result['info']['talk'])
for talk in result['info']['talk']:
try:
tid = talk['id']
timestamp = talk['timestamp']
# or ('【' not in talk['content'] and '『' not in talk['content'])
if talk['type'] != 1:
continue
cursor.execute(sqlInsert, (talk['id'], talk['content'].strip(), talk['name'], talk['nick'], talk['timestamp'], ','.join(talk['image'])))
except KeyError as e:
print(talk)
print(e)
connect.commit()
if len(result['info']['talk']) == 0:
break
page += 1
if page > 200:
break
params = {
'time' : timestamp,
'page' : page,
'id' : tid,
'u' : u,
'apiType' : '8',
'apiHost' : 'http://api.t.qq.com',
'_r' : str(math.floor(time.time() * 1000)),
}
rs = wb.get(apiURL + urllib.parse.urlencode(params), header).data.decode('utf-8')
# fp.close()
connect.commit()
cursor.close()
connect.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/Jackin/Python3_t.qq.com_client.git
[email protected]:Jackin/Python3_t.qq.com_client.git
Jackin
Python3_t.qq.com_client
Python3_t.qq.com_client
master

搜索帮助