This commit is contained in:
Niko 2026-05-05 19:27:47 +08:00
parent 9174a4cc40
commit a393653497
3 changed files with 164 additions and 86 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,87 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.ets</groupId>
<artifactId>ets-playwright</artifactId>
<version>1.0.0</version>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.14.0</version>
<configuration>
<release>25</release>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.5.0</version>
<configuration>
<mainClass>com.ets.scraper.EtsScraper</mainClass>
</configuration>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.5.2</version>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.6.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer>
<mainClass>com.ets.scraper.EtsScraper</mainClass>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.12.1</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>junit-jupiter-api</artifactId>
<groupId>org.junit.jupiter</groupId>
</exclusion>
<exclusion>
<artifactId>junit-jupiter-params</artifactId>
<groupId>org.junit.jupiter</groupId>
</exclusion>
<exclusion>
<artifactId>junit-jupiter-engine</artifactId>
<groupId>org.junit.jupiter</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.target>25</maven.compiler.target>
<maven.compiler.source>25</maven.compiler.source>
<playwright.version>1.55.0</playwright.version>
</properties>
</project>

View File

@ -34,7 +34,7 @@ public class EtsScraper {
private static final String PROXY_HOST = "http://127.0.0.1:8081";
public static void main(String[] args) throws Exception {
// Parse CLI arguments
// Parse CLI arguments
String proxyHost = null;
String proxyUser = null;
String proxyPass = null;
@ -61,22 +61,22 @@ public class EtsScraper {
System.err.println("[-] Unknown option: " + args[i]);
printHelp();
return;
}
}
}
}
if (proxyHost == null || proxyUser == null || proxyPass == null || dateStr == null) {
System.err.println("[-] Missing required arguments");
printHelp();
return;
}
}
java.time.LocalDate targetDate;
try {
targetDate = java.time.LocalDate.parse(dateStr);
} catch (Exception e) {
} catch (Exception e) {
System.err.println("[-] Invalid date format: " + dateStr + ", expected yyyy-MM-dd");
return;
}
}
String dateStrFormatted = targetDate.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd"));
String dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd"));
@ -89,53 +89,53 @@ public class EtsScraper {
System.out.println("[+] File size: " + java.nio.file.Files.size(savedFile) + " bytes");
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
return;
}
}
try {
createDirectories(SCREENSHOT_DIR);
} catch (Exception e) {
} catch (Exception e) {
System.err.println("Failed to create directories: " + e.getMessage());
}
}
try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().launch(
new BrowserType.LaunchOptions().setHeadless(true)
);
new BrowserType.LaunchOptions().setHeadless(true)
);
BrowserContext context = browser.newContext(
new Browser.NewContextOptions()
.setIgnoreHTTPSErrors(true)
.setViewportSize(1920, 1080)
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36")
);
new Browser.NewContextOptions()
.setIgnoreHTTPSErrors(true)
.setViewportSize(1920, 1080)
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36")
);
Page page = context.newPage();
try {
// Navigate to frame.html first to establish session/cookies
// Navigate to frame.html first to establish session/cookies
System.out.println("[*] Establishing session via " + FRAME_URL);
page.navigate(FRAME_URL, new Page.NavigateOptions()
.setTimeout(30000)
.setWaitUntil(WaitUntilState.DOMCONTENTLOADED));
.setTimeout(30000)
.setWaitUntil(WaitUntilState.DOMCONTENTLOADED));
sleep(3000);
// Navigate directly to the login page
// Navigate directly to the login page
System.out.println("[*] Navigating to login page: " + LOGIN_URL);
page.navigate(LOGIN_URL, new Page.NavigateOptions()
.setTimeout(30000)
.setWaitUntil(WaitUntilState.NETWORKIDLE));
.setTimeout(30000)
.setWaitUntil(WaitUntilState.NETWORKIDLE));
sleep(2000);
// Close notification dialog FIRST (before filling credentials)
// Close notification dialog FIRST (before filling credentials)
closeNotificationDialog(page);
screenshot(page, "after_close_dialog");
// Download captcha image
// Download captcha image
downloadCaptcha(page);
// Close dialog again after page reload
// Close dialog again after page reload
closeNotificationDialog(page);
// Recognize captcha and perform login
// Recognize captcha and perform login
boolean loggedin = doLoginWithCaptcha(page);
if (loggedin) {
@ -147,7 +147,7 @@ public class EtsScraper {
System.out.println("[+] Page title: " + page.title());
System.out.println("[+] Page URL: " + page.url());
// 点击三联单菜单
// 点击三联单菜单
System.out.println("[*] Clicking 三联单 menu...");
page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click();
sleep(3000);
@ -155,113 +155,104 @@ public class EtsScraper {
screenshot(page, "after_sanliandan");
System.out.println("[+] 三联单 page title: " + page.title());
// 设置日期筛选
// 设置日期筛选
System.out.println("[*] Setting date filter to: " + dateStrFormatted);
// 检查元素是否存在
// 检查元素是否存在
boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0;
boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0;
boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0;
System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists);
// 直接设置日期值WdatePicker 类型输入框
// 直接设置日期值WdatePicker 类型输入框
if (startDateExists) {
System.out.println("[*] Setting start date to: " + dateStrFormatted);
page.locator("#Search_ThreeBillList_startWdate").first().fill(dateStrFormatted);
sleep(500);
} else {
} else {
System.out.println("[!] Start date element not found");
}
}
// 设置结束日期
// 设置结束日期
if (endDateExists) {
System.out.println("[*] Setting end date to: " + dateStrFormatted);
page.locator("#Search_ThreeBillList_endWdate").first().fill(dateStrFormatted);
sleep(500);
} else {
} else {
System.out.println("[!] End date element not found");
}
}
// 点击查询按钮等待列表加载
// 点击查询按钮等待列表加载
if (queryBtnExists) {
System.out.println("[*] Clicking query button...");
page.locator("#Search_ThreeBillList_Button").first().click();
// 等待列表内容出现
// 等待列表内容出现
try {
page.waitForSelector("tbody tr", new Page.WaitForSelectorOptions()
.setTimeout(30000));
.setTimeout(30000));
System.out.println("[+] Query completed, list loaded");
} catch (Exception e) {
} catch (Exception e) {
System.out.println("[!] Wait for list timeout, but query was submitted");
}
} else {
}
} else {
System.out.println("[!] Query button not found");
}
}
screenshot(page, "after_query");
// 点击导出按钮
// 点击导出按钮
if (page.locator("#Export_ThreeBillList_Button").count() > 0) {
System.out.println("[*] Clicking export button...");
// 设置下载目录
// 点击主导出按钮打开对话框再用 JS click 触发对话框内导出按钮
Download dl = page.waitForDownload(
new Page.WaitForDownloadOptions().setTimeout(300000),
() -> {
page.locator("#Export_ThreeBillList_Button").first().click();
sleep(2000);
System.out.println("[*] Triggering dialog export via JS...");
page.evaluate("document.querySelectorAll('button').forEach(b => { if (b.textContent.trim() === '导出') b.click(); })");
});
// 设置下载目录
// 点击主导出按钮打开对话框再用 JS click 触发对话框内导出按钮
Download dl = page.waitForDownload(new Page.WaitForDownloadOptions().setTimeout(300000),
() -> {
page.locator("#Export_ThreeBillList_Button").first().click();
sleep(2000);
System.out.println("[*] Triggering dialog export via JS...");
page.evaluate("document.querySelectorAll('button').forEach(b => { if (b.textContent.trim() === '导出') b.click(); })");
});
System.out.println("[*] Waiting for download to complete...");
dl.saveAs(savedFile);
System.out.println("[+] Download saved to: " + savedFile);
if (java.nio.file.Files.size(savedFile) == 0) {
System.out.println("[-] Downloaded file is empty");
} else {
} else {
System.out.println("[+] Download size: " + java.nio.file.Files.size(savedFile) + " bytes");
// Auto-import to ets-proxy
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
// Auto-import to ets-proxy
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
}
}
screenshot(page, "after_export");
System.out.println("[+] Query and export completed!");
String content = page.textContent("body");
if (content != null) {
String preview = content.length() > 500
? content.substring(0, 500) + ".."
: content;
System.out.println("[+] Page content preview:\n" + preview);
}
} else {
} else {
System.out.println("[-] Login failed. Check screenshots/ for debugging.");
screenshot(page, "login_failed");
}
} finally {
}
} finally {
browser.close();
}
}
}
}
}
}
private static void printHelp() {
System.out.println("""
ETS 三联单爬虫 - 导出并导入三联单 Excel 数据
ETS 三联单爬虫 - 导出并导入三联单 Excel 数据
用法: java -jar ets-playwright.jar [选项]
用法: java -jar ets-playwright.jar [选项]
选项:
-s <url> ets-proxy 服务器地址
-u <user> ets-proxy 用户名
-p <pass> ets-proxy 密码
-d <date> 查询日期格式 yyyy-MM-dd
-h 显示此帮助信息
选项:
-s <url> ets-proxy 服务器地址
-u <user> ets-proxy 用户名
-p <pass> ets-proxy 密码
-d <date> 查询日期格式 yyyy-MM-dd
-h 显示此帮助信息
示例:
java -jar ets-playwright.jar -s https://api.ets.niko.red -u admin -p 123456 -d 2026-05-04
""");
}
示例:
java -jar ets-playwright.jar -s https://api.ets.niko.red -u admin -p 123456 -d 2026-05-04
""");
}
public static boolean doLoginWithCaptcha(Page page) throws Exception {
// Recognize captcha first
@ -486,7 +477,7 @@ public class EtsScraper {
.build();
java.net.http.HttpRequest request = java.net.http.HttpRequest.newBuilder()
.uri(uri)
.POST(java.net.http.HttpRequest.BodyPublishers.noBody())
.POST(java.net.http.HttpRequest.BodyPublishers.noBody())
.header("Content-Type", "application/json")
.build();
java.net.http.HttpResponse<String> response = client.send(request, java.net.http.HttpResponse.BodyHandlers.ofString());