1 Star 0 Fork 2

shi2732577284/smilesToCasTyper

forked from Buckwheat/smilesToCasTyper 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
smilesToCas.py 5.92 KB
一键复制 编辑 原始数据 按行查看 历史
Buckwheat 提交于 2021-11-25 15:59 . 更新
from concurrent.futures import ThreadPoolExecutor, as_completed
import pubchempy as pcp
import ssl
import requests
import random
from rich.markdown import Markdown
from rich.table import Table
from rich.console import Console
from openpyxl import load_workbook
# 忽略requests证书警告
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def smilesToCas(smilesInfo):
smilesValue = smilesInfo[0]
smilesNowRow = smilesInfo[1]
casColumn = smilesInfo[2]
cid_for_smiles = pcp.get_compounds(smilesValue, "smiles")[0].cid
url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/%s/JSON/?heading=CAS" % cid_for_smiles
response = requests.get(url, headers={'User-Agent': random.choice(user_agent), "connection": "close"}, timeout=5,
verify=False)
cas_num = response.json()['Record']['Section'][0]['Section'][0]['Section'][0]["Information"][0]['Value'][
'StringWithMarkup'][0]['String']
worksheet.cell(smilesNowRow, casColumn).value = cas_num
table = Table(title="%s检索成功" % smilesValue)
table.add_column("SMILES", justify="center", style="green")
table.add_column("CAS", justify="center", style="green")
table.add_row(smilesValue, cas_num)
console.print(table)
def generateParameters():
smilesColumn = 0
for firstRow in worksheet[1]:
if type(firstRow.value) is int:
continue
if firstRow.value is None:
continue
if firstRow.value.upper().strip() == 'SMILES':
smilesColumn = firstRow.column
else:
pass
if smilesColumn:
print('找到符合的SMILES列,在第%d列.' % smilesColumn)
casColumn = worksheet.max_column + 1
print('一共有%s个SMILES待查询...' % str(worksheet.max_row - 1))
worksheet.cell(1, casColumn).value = 'CAS'
workBook.save(file_path)
print('CAS列名已建立')
smilesNumList = []
smilesNowRowList = []
for i in worksheet.iter_rows(min_col=smilesColumn, max_col=smilesColumn, min_row=2, max_row=worksheet.max_row):
smilesValue = i[0].value
smilesNowRow = i[0].row
if smilesValue is None:
continue
smilesNumList.append(smilesValue.strip())
smilesNowRowList.append(smilesNowRow)
allTasks = [thread_executor.submit(smilesToCas, (smilesNum, smilesNowRowList[smilesNumList.index(smilesNum)], casColumn))
for smilesNum in smilesNumList]
for task in as_completed(allTasks):
pass
else:
print('未找到任何符合SMILES字段的列')
if __name__ == "__main__":
import sys
# file_path = r"test.xlsx"
file_path = sys.argv[1]
workBook = load_workbook(file_path)
worksheet = workBook.active
user_agent = [
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
]
# 2. 表示忽略未经核实的SSL证书认证,全局取消证书验证
ssl._create_default_https_context = ssl._create_unverified_context
console = Console()
title = Markdown("# 欢迎使用SMILES-TO-CAS软件 Author:Buckwheat")
console.print(title, style="white on blue")
useMatters = '''
注意事项
- Excel文件列名必须有一列为SMILES且唯一,不区分大小写
- Excel文件暂时不支持加密,请解除密码后继续操作
'''
console.print(Markdown(useMatters), style="red")
try:
print('创建线程池...')
thread_executor = ThreadPoolExecutor(max_workers=4)
generateParameters()
workBook.save(file_path)
quitCode = input('任务执行完成,按回车键退出程序...')
if quitCode:
print('Author Buckwheat')
else:
print('Author Buckwheat')
except Exception as error:
print(error)
quitCode = input('出现错误,错误如上,按回车键退出程序...')
if quitCode:
print('Author Buckwheat')
else:
print('Author Buckwheat')
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/shi2732577284/smiles-to-cas-typer.git
[email protected]:shi2732577284/smiles-to-cas-typer.git
shi2732577284
smiles-to-cas-typer
smilesToCasTyper
master

搜索帮助