1 Star 0 Fork 0

华科开放原子开源俱乐部/linux-edu-rank

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
linux-statistic.py 7.70 KB
一键复制 编辑 原始数据 按行查看 历史
#!/usr/bin/env python3
import json
import os
import shutil
from argparse import ArgumentParser
from datetime import datetime
from functools import reduce
import git
import requests
from tqdm import tqdm
def is_university_domain(test_domain, uni_list):
'''
Verify if the provided domain is a university domain in the uni_list
'''
for university in uni_list:
if test_domain in university["domains"]:
return True
for university in uni_list:
for raw_domain in university["domains"]:
# domain: sc.edu
# raw_domain: osc.edu
if test_domain.endswith(raw_domain):
return True
return False
parser = ArgumentParser()
parser.add_argument("--branch", type=str, default="master")
parser.add_argument("--path", type=str, default="/tmp/linux")
parser.add_argument("--repo", type=str, default="Linux Mainline")
args = parser.parse_args()
branch = args.branch
path = args.path
repo_name = args.repo
repo = git.Repo(path)
print("Getting university list...")
university_list:list = requests.get(
"https://github.com/Hipo/university-domains-list/raw/master/world_universities_and_domains.json"
).json()
print("Getting commits list...")
commits = list(repo.iter_commits(branch))
meta = {
"update": datetime.now().isoformat(),
"repo": repo_name,
"branch": branch,
"commit": repo.commit("master").hexsha,
}
# exec command and turn pipe to iterator
result_patches = {}
result_lines = {}
result_detail = {}
result_authors = {}
print("Total commits: ", len(commits))
for commit in tqdm(commits):
email = commit.author.email
if not email:
continue
# get email domain
domain = email.split("@")[-1]
if not is_university_domain(domain, university_list):
continue
result_patches[domain] = result_patches.get(domain, 0) + 1
result_lines[domain] = result_lines.get(domain, 0) + commit.stats.total["lines"]
if result_detail.get(domain) is None:
result_detail[domain] = []
result_detail[domain].append(repo.git.show(commit.hexsha))
if result_authors.get(domain) is None:
result_authors[domain] = {}
if result_authors.get(domain).get(email) is None:
result_authors[domain][email] = [commit.author.name, 0, []]
result_authors[domain][email][1] = result_authors[domain][email][1] + 1
result_authors[domain][email][2].append(
{
"commit": commit.hexsha,
"summary": commit.summary,
"date": commit.authored_datetime.isoformat(),
"files": commit.stats.total["files"],
"lines": "-{}/+{}".format(
commit.stats.total["deletions"], commit.stats.total["insertions"]
),
}
)
def get_university(domain):
for university in university_list:
if domain in university["domains"]:
return university
for university in university_list:
for raw_domain in university["domains"]:
if domain.endswith(raw_domain) or raw_domain.endswith(domain):
return university
return None
# sort and save result to file
result = map(
lambda x: {
"domain": x[0],
"count": x[1],
"lines": result_lines[x[0]],
"university": get_university(x[0]),
},
result_patches.items(),
)
result_tmp = {}
# merge same university and set domain to list
for item in result:
def result_authors_transform(result_authors):
return list(
map(
lambda x: {
"email": x[0],
"name": x[1][0],
"count": x[1][1],
"commits": x[1][2],
},
result_authors.get(item["domain"], {}).items(),
)
)
if item["university"] is None:
authors = result_authors_transform(result_authors)
authors.sort(key=lambda x: x["count"], reverse=True),
result_tmp[item["domain"]] = {
"name": f"Unknown ({item['domain']})",
"domains": [item["domain"]],
"university": None,
"count": item["count"],
"lines": item["lines"],
"authors": authors,
}
continue
name = item["university"]["name"]
if result_tmp.get(name) is None:
result_tmp[name] = {
"name": name,
"domains": [],
"university": item["university"],
"count": 0,
"lines": 0,
"authors": [],
}
if item["domain"] not in result_tmp[name]["domains"]:
result_tmp[name]["domains"].append(item["domain"])
result_tmp[name]["authors"].extend(result_authors_transform(result_authors))
result_tmp[name]["authors"].sort(key=lambda x: x["count"], reverse=True)
result_tmp[name]["count"] += item["count"]
result_tmp[name]["lines"] += item["lines"]
result = list(result_tmp.values())
result.sort(key=lambda x: x["count"], reverse=True)
result = list(map(lambda x: x[1] | {"id": x[0] + 1}, enumerate(result)))
result = reduce(
lambda s, i: s
+ [
i
| {
"rank": (
i["id"]
if len(s) == 0 or i["count"] != s[-1]["count"]
else s[-1]["rank"]
)
}
],
result,
[],
)
with open("result.json", "w") as f:
f.write(json.dumps({"meta": meta, "data": result}, indent=2))
print("Result saved to result.json")
print("Save patches to detail dir...")
shutil.rmtree("detail", ignore_errors=True)
os.mkdir("detail")
def generate_html(id, title, patches):
PAGE_SIZE = 10
total = len(patches)
page_num = total // PAGE_SIZE + 1
def get_href(page):
return f"{id}.html" if page == 1 else f"{id}_{page}.html"
def get_pagination(page):
str = ""
if page > 1:
str += "<a href='{}'>&lt;&lt;Prev</a>".format(get_href(page - 1))
for i in range(1, page_num + 1):
if i == page:
str += f"<span>[{i}]</span>"
else:
str += "<a href='{}'>{}</a>".format(get_href(i), i)
if page < page_num:
str += "<a href='{}'>Next&gt;&gt;</a>".format(get_href(page + 1))
return str
template = """<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<style>
.pagination {{
border-top: 1px solid #ddd;
border-bottom: 1px solid #ddd;
overflow-wrap: break-word;
}}
.pagination a, .pagination span {{
margin: 0 4px;
}}
</style>
</head>
<body>
<h1>{title}</h1>
<div class="pagination">
{pagination}
</div>
<hr>
{content}
<div class="pagination">
{pagination}
<div>
</body>
"""
for i in range(1, page_num + 1):
with open(f"detail/{get_href(i)}", "w") as f:
f.write(
template.format(
title=title,
pagination=get_pagination(i),
content="<hr>".join(
map(
lambda x: "<pre>{}</pre>".format(
x.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
),
patches[(i - 1) * PAGE_SIZE : i * PAGE_SIZE],
)
),
)
.encode("utf-8", "replace")
.decode("utf-8")
)
for item in result:
domains = item["domains"]
patches = []
for d in domains:
patches.extend(result_detail[d])
generate_html(item["id"], "Patches contributed by " + item["name"], patches)
print("Done!")
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
HTML
1
https://gitee.com/hust-open-atom-club/linux-edu-rank.git
[email protected]:hust-open-atom-club/linux-edu-rank.git
hust-open-atom-club
linux-edu-rank
linux-edu-rank
master

搜索帮助