1 Star 1 Fork 3

xiaoxiaoming0011/新华字库

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
Main3.java 3.91 KB
一键复制 编辑 原始数据 按行查看 历史
xiaoxiaoming0011 提交于 2023-07-16 09:14 . 数据源代码
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.ListAutoNumber;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.EOFException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class Main3 {
public static void main(String[] args) throws IOException, InvalidFormatException {
List<String> list = new ArrayList<>();
// Load the Excel file
String excelFilePath = "E:\\desktop\\ziyi\\ci.xlsx";
Workbook workbook = new XSSFWorkbook(new File(excelFilePath));
Sheet sheet = workbook.getSheetAt(0); // assuming you want the first sheet
for (Row row : sheet) {
Cell cell = row.getCell(0); // get the first column
String keyword = cell.getStringCellValue();
// Replace the keyword in the URL
String url = "https://hanyu.baidu.com/s?wd="+keyword+"&ptype=zici&from=poem#detailmean";
// Document document = Jsoup.parse(new URL(url),300000);
Document document = null;
try {
document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(30000).get();
// 在此处添加你的解析代码...
} catch (HttpStatusException e) {
System.out.println("HTTP error fetching URL: " + e.getUrl());
} catch (IOException e) {
System.out.println("Error in connecting to URL: " + e.getMessage());
}
Element element = null;
try{
element = document.getElementById("detailmean-wrapper");
}catch (Exception e){
System.out.println(keyword+"没有基本字义");
}
Element element2 = document.getElementById("basicmean-wrapper");
// System.out.println(element2);
//字义
Cell outputCell1 = row.createCell(1);
if(!keyword.equals("百度") && element2 != null){
String html = "";
try {
html = element.toString();
}catch (Exception e){
html = element2.toString();
}
Document parse = Jsoup.parse(html);
Element div = parse.getElementsByTag("div").first();
// System.out.println(div);
if (div != null) {
Element element1 = div.getElementsByClass("tab-content").first();
// System.out.println(element1);
if(element1 != null){
outputCell1.setCellValue(element1.text());
}else {
System.out.println("没有字义"+keyword);
list.add(keyword);
}
}else {
System.out.println("没有字义"+keyword);
list.add(keyword);
}
// Print a message to the console
System.out.println("Successfully parsed and wrote keyword: " + keyword);
}else {
System.out.println("百度一下,我没有");
outputCell1.setCellValue("百度");
System.out.println(keyword);
}
}
System.out.println(list);
// Save the changes to a new file
String outputExcelFilePath = "E:\\desktop\\ziyi\\ci001.xlsx";
try (FileOutputStream outputStream = new FileOutputStream(outputExcelFilePath)) {
workbook.write(outputStream);
}
workbook.close();
}
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
SQL
1
https://gitee.com/xiaoxiaoming0011/xinhua-word-library.git
[email protected]:xiaoxiaoming0011/xinhua-word-library.git
xiaoxiaoming0011
xinhua-word-library
新华字库
master

搜索帮助