1 Star 0 Fork 2

summer5/programRecommendation

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
dealWatchTable.py 2.16 KB
一键复制 编辑 原始数据 按行查看 历史
caijiahao 提交于 2018-04-10 20:33 +08:00 . 1.划分训练集和测试集
# encoding: utf-8
def filterWatchTimeLessThanFive(dataset):
import pandas as pd;
data = pd.read_excel(dataset)
data[u'观看时长'] = 0
for i in range(len(data)):
start = str(data.iloc[i:i+1,4:5].values[0][0])
end =str(data.iloc[i:i+1,5:6].values[0][0])
startyear = start[0:4]
startmonth = start[5:7]
startday = start[8:10]
endyear = end[0:4]
endmonth = end[5:7]
endday = end[8:10]
startH = start[11:13]
startM = start[14:16]
startS = start[17:19]
endH= end[11:13]
endM= end[14:16]
endS = end[17:19]
data.iloc[i:i+1,7:8] = calculateTime(startyear,startmonth,startday,endyear,endmonth,endday,startH,startM,startS,endH,endM,endS)
data = data[data[u'观看时长']>5]
data.to_excel('./dataset/filterWatchTimeLessThanFiveNew.xlsx',index=False)
#整理影片名称,方便去重
def dealTheOfVideo(dataset):
import pandas as pd;
data = pd.read_excel(dataset)
for i in range(len(data)):
str = data.iloc[i:i + 1, 3:4].values[0][0]
index = str.find('(')
if (index != -1):
str = str[0:index]
index = str.find(' ')
if (index != -1):
str = str[index+1:]
data.iloc[i:i + 1, 8:9] = str
#data = data[u'影片名称'].drop_duplicates();
#data.reset_index(drop=True)
#pd.DataFrame(data).to_excel('./dataset/dealTheOfVideoNew.xlsx',index=False)
data.to_excel('./dataset/filterWatchTimeLessThanFiveNew.xlsx',index=False)
#计算两个日期的秒数差
def calculateTime(startyear,startmonth,startday,endyear,endmonth,endday,startH,startM,startS,endH,endM,endS):
import datetime
start = datetime.datetime(int(startyear), int(startmonth), int(startday), int(startH), int(startM), int(startS))
end = datetime.datetime(int(endyear), int(endmonth), int(endday), int(endH), int(endM), int(endS))
value = end-start
return int(value.total_seconds()/60)
#dataset = './dataset/watch.xlsx'
dataset = './dataset/filterWatchTimeLessThanFiveNew.xlsx'
dealTheOfVideo(dataset=dataset)
#filterWatchTimeLessThanFive(dataset=dataset)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/summer5/programRecommendation.git
[email protected]:summer5/programRecommendation.git
summer5
programRecommendation
programRecommendation
master

搜索帮助