代码拉取完成,页面将自动刷新
同步操作将从 Buckwheat/smilesToCasTyper 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
from concurrent.futures import ThreadPoolExecutor, as_completed
import pubchempy as pcp
import ssl
import requests
import random
from rich.markdown import Markdown
from rich.table import Table
from rich.console import Console
from openpyxl import load_workbook
# 忽略requests证书警告
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def smilesToCas(smilesInfo):
smilesValue = smilesInfo[0]
smilesNowRow = smilesInfo[1]
casColumn = smilesInfo[2]
cid_for_smiles = pcp.get_compounds(smilesValue, "smiles")[0].cid
url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/%s/JSON/?heading=CAS" % cid_for_smiles
response = requests.get(url, headers={'User-Agent': random.choice(user_agent), "connection": "close"}, timeout=5,
verify=False)
cas_num = response.json()['Record']['Section'][0]['Section'][0]['Section'][0]["Information"][0]['Value'][
'StringWithMarkup'][0]['String']
worksheet.cell(smilesNowRow, casColumn).value = cas_num
table = Table(title="%s检索成功" % smilesValue)
table.add_column("SMILES", justify="center", style="green")
table.add_column("CAS", justify="center", style="green")
table.add_row(smilesValue, cas_num)
console.print(table)
def generateParameters():
smilesColumn = 0
for firstRow in worksheet[1]:
if type(firstRow.value) is int:
continue
if firstRow.value is None:
continue
if firstRow.value.upper().strip() == 'SMILES':
smilesColumn = firstRow.column
else:
pass
if smilesColumn:
print('找到符合的SMILES列,在第%d列.' % smilesColumn)
casColumn = worksheet.max_column + 1
print('一共有%s个SMILES待查询...' % str(worksheet.max_row - 1))
worksheet.cell(1, casColumn).value = 'CAS'
workBook.save(file_path)
print('CAS列名已建立')
smilesNumList = []
smilesNowRowList = []
for i in worksheet.iter_rows(min_col=smilesColumn, max_col=smilesColumn, min_row=2, max_row=worksheet.max_row):
smilesValue = i[0].value
smilesNowRow = i[0].row
if smilesValue is None:
continue
smilesNumList.append(smilesValue.strip())
smilesNowRowList.append(smilesNowRow)
allTasks = [thread_executor.submit(smilesToCas, (smilesNum, smilesNowRowList[smilesNumList.index(smilesNum)], casColumn))
for smilesNum in smilesNumList]
for task in as_completed(allTasks):
pass
else:
print('未找到任何符合SMILES字段的列')
if __name__ == "__main__":
import sys
# file_path = r"test.xlsx"
file_path = sys.argv[1]
workBook = load_workbook(file_path)
worksheet = workBook.active
user_agent = [
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
]
# 2. 表示忽略未经核实的SSL证书认证,全局取消证书验证
ssl._create_default_https_context = ssl._create_unverified_context
console = Console()
title = Markdown("# 欢迎使用SMILES-TO-CAS软件 Author:Buckwheat")
console.print(title, style="white on blue")
useMatters = '''
注意事项
- Excel文件列名必须有一列为SMILES且唯一,不区分大小写
- Excel文件暂时不支持加密,请解除密码后继续操作
'''
console.print(Markdown(useMatters), style="red")
try:
print('创建线程池...')
thread_executor = ThreadPoolExecutor(max_workers=4)
generateParameters()
workBook.save(file_path)
quitCode = input('任务执行完成,按回车键退出程序...')
if quitCode:
print('Author Buckwheat')
else:
print('Author Buckwheat')
except Exception as error:
print(error)
quitCode = input('出现错误,错误如上,按回车键退出程序...')
if quitCode:
print('Author Buckwheat')
else:
print('Author Buckwheat')
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。