代码拉取完成,页面将自动刷新
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.ListAutoNumber;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.EOFException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class Main3 {
public static void main(String[] args) throws IOException, InvalidFormatException {
List<String> list = new ArrayList<>();
// Load the Excel file
String excelFilePath = "E:\\desktop\\ziyi\\ci.xlsx";
Workbook workbook = new XSSFWorkbook(new File(excelFilePath));
Sheet sheet = workbook.getSheetAt(0); // assuming you want the first sheet
for (Row row : sheet) {
Cell cell = row.getCell(0); // get the first column
String keyword = cell.getStringCellValue();
// Replace the keyword in the URL
String url = "https://hanyu.baidu.com/s?wd="+keyword+"&ptype=zici&from=poem#detailmean";
// Document document = Jsoup.parse(new URL(url),300000);
Document document = null;
try {
document = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(30000).get();
// 在此处添加你的解析代码...
} catch (HttpStatusException e) {
System.out.println("HTTP error fetching URL: " + e.getUrl());
} catch (IOException e) {
System.out.println("Error in connecting to URL: " + e.getMessage());
}
Element element = null;
try{
element = document.getElementById("detailmean-wrapper");
}catch (Exception e){
System.out.println(keyword+"没有基本字义");
}
Element element2 = document.getElementById("basicmean-wrapper");
// System.out.println(element2);
//字义
Cell outputCell1 = row.createCell(1);
if(!keyword.equals("百度") && element2 != null){
String html = "";
try {
html = element.toString();
}catch (Exception e){
html = element2.toString();
}
Document parse = Jsoup.parse(html);
Element div = parse.getElementsByTag("div").first();
// System.out.println(div);
if (div != null) {
Element element1 = div.getElementsByClass("tab-content").first();
// System.out.println(element1);
if(element1 != null){
outputCell1.setCellValue(element1.text());
}else {
System.out.println("没有字义"+keyword);
list.add(keyword);
}
}else {
System.out.println("没有字义"+keyword);
list.add(keyword);
}
// Print a message to the console
System.out.println("Successfully parsed and wrote keyword: " + keyword);
}else {
System.out.println("百度一下,我没有");
outputCell1.setCellValue("百度");
System.out.println(keyword);
}
}
System.out.println(list);
// Save the changes to a new file
String outputExcelFilePath = "E:\\desktop\\ziyi\\ci001.xlsx";
try (FileOutputStream outputStream = new FileOutputStream(outputExcelFilePath)) {
workbook.write(outputStream);
}
workbook.close();
}
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。