format
This commit is contained in:
parent
9174a4cc40
commit
a393653497
87
dependency-reduced-pom.xml
Normal file
87
dependency-reduced-pom.xml
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<groupId>com.ets</groupId>
|
||||||
|
<artifactId>ets-playwright</artifactId>
|
||||||
|
<version>1.0.0</version>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<version>3.14.0</version>
|
||||||
|
<configuration>
|
||||||
|
<release>25</release>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>exec-maven-plugin</artifactId>
|
||||||
|
<version>3.5.0</version>
|
||||||
|
<configuration>
|
||||||
|
<mainClass>com.ets.scraper.EtsScraper</mainClass>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<version>3.5.2</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-shade-plugin</artifactId>
|
||||||
|
<version>3.6.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>shade</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<transformers>
|
||||||
|
<transformer>
|
||||||
|
<mainClass>com.ets.scraper.EtsScraper</mainClass>
|
||||||
|
</transformer>
|
||||||
|
</transformers>
|
||||||
|
<filters>
|
||||||
|
<filter>
|
||||||
|
<artifact>*:*</artifact>
|
||||||
|
<excludes>
|
||||||
|
<exclude>META-INF/*.SF</exclude>
|
||||||
|
<exclude>META-INF/*.DSA</exclude>
|
||||||
|
<exclude>META-INF/*.RSA</exclude>
|
||||||
|
</excludes>
|
||||||
|
</filter>
|
||||||
|
</filters>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter</artifactId>
|
||||||
|
<version>5.12.1</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>junit-jupiter-api</artifactId>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>junit-jupiter-params</artifactId>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>junit-jupiter-engine</artifactId>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
<properties>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<maven.compiler.target>25</maven.compiler.target>
|
||||||
|
<maven.compiler.source>25</maven.compiler.source>
|
||||||
|
<playwright.version>1.55.0</playwright.version>
|
||||||
|
</properties>
|
||||||
|
</project>
|
||||||
@ -34,7 +34,7 @@ public class EtsScraper {
|
|||||||
private static final String PROXY_HOST = "http://127.0.0.1:8081";
|
private static final String PROXY_HOST = "http://127.0.0.1:8081";
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
// Parse CLI arguments
|
// Parse CLI arguments
|
||||||
String proxyHost = null;
|
String proxyHost = null;
|
||||||
String proxyUser = null;
|
String proxyUser = null;
|
||||||
String proxyPass = null;
|
String proxyPass = null;
|
||||||
@ -61,22 +61,22 @@ public class EtsScraper {
|
|||||||
System.err.println("[-] Unknown option: " + args[i]);
|
System.err.println("[-] Unknown option: " + args[i]);
|
||||||
printHelp();
|
printHelp();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (proxyHost == null || proxyUser == null || proxyPass == null || dateStr == null) {
|
if (proxyHost == null || proxyUser == null || proxyPass == null || dateStr == null) {
|
||||||
System.err.println("[-] Missing required arguments");
|
System.err.println("[-] Missing required arguments");
|
||||||
printHelp();
|
printHelp();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
java.time.LocalDate targetDate;
|
java.time.LocalDate targetDate;
|
||||||
try {
|
try {
|
||||||
targetDate = java.time.LocalDate.parse(dateStr);
|
targetDate = java.time.LocalDate.parse(dateStr);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.err.println("[-] Invalid date format: " + dateStr + ", expected yyyy-MM-dd");
|
System.err.println("[-] Invalid date format: " + dateStr + ", expected yyyy-MM-dd");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
String dateStrFormatted = targetDate.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd"));
|
String dateStrFormatted = targetDate.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd"));
|
||||||
|
|
||||||
String dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd"));
|
String dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd"));
|
||||||
@ -89,53 +89,53 @@ public class EtsScraper {
|
|||||||
System.out.println("[+] File size: " + java.nio.file.Files.size(savedFile) + " bytes");
|
System.out.println("[+] File size: " + java.nio.file.Files.size(savedFile) + " bytes");
|
||||||
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
|
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
createDirectories(SCREENSHOT_DIR);
|
createDirectories(SCREENSHOT_DIR);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.err.println("Failed to create directories: " + e.getMessage());
|
System.err.println("Failed to create directories: " + e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
try (Playwright playwright = Playwright.create()) {
|
try (Playwright playwright = Playwright.create()) {
|
||||||
Browser browser = playwright.chromium().launch(
|
Browser browser = playwright.chromium().launch(
|
||||||
new BrowserType.LaunchOptions().setHeadless(true)
|
new BrowserType.LaunchOptions().setHeadless(true)
|
||||||
);
|
);
|
||||||
BrowserContext context = browser.newContext(
|
BrowserContext context = browser.newContext(
|
||||||
new Browser.NewContextOptions()
|
new Browser.NewContextOptions()
|
||||||
.setIgnoreHTTPSErrors(true)
|
.setIgnoreHTTPSErrors(true)
|
||||||
.setViewportSize(1920, 1080)
|
.setViewportSize(1920, 1080)
|
||||||
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36")
|
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36")
|
||||||
);
|
);
|
||||||
Page page = context.newPage();
|
Page page = context.newPage();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Navigate to frame.html first to establish session/cookies
|
// Navigate to frame.html first to establish session/cookies
|
||||||
System.out.println("[*] Establishing session via " + FRAME_URL);
|
System.out.println("[*] Establishing session via " + FRAME_URL);
|
||||||
page.navigate(FRAME_URL, new Page.NavigateOptions()
|
page.navigate(FRAME_URL, new Page.NavigateOptions()
|
||||||
.setTimeout(30000)
|
.setTimeout(30000)
|
||||||
.setWaitUntil(WaitUntilState.DOMCONTENTLOADED));
|
.setWaitUntil(WaitUntilState.DOMCONTENTLOADED));
|
||||||
sleep(3000);
|
sleep(3000);
|
||||||
|
|
||||||
// Navigate directly to the login page
|
// Navigate directly to the login page
|
||||||
System.out.println("[*] Navigating to login page: " + LOGIN_URL);
|
System.out.println("[*] Navigating to login page: " + LOGIN_URL);
|
||||||
page.navigate(LOGIN_URL, new Page.NavigateOptions()
|
page.navigate(LOGIN_URL, new Page.NavigateOptions()
|
||||||
.setTimeout(30000)
|
.setTimeout(30000)
|
||||||
.setWaitUntil(WaitUntilState.NETWORKIDLE));
|
.setWaitUntil(WaitUntilState.NETWORKIDLE));
|
||||||
sleep(2000);
|
sleep(2000);
|
||||||
|
|
||||||
// Close notification dialog FIRST (before filling credentials)
|
// Close notification dialog FIRST (before filling credentials)
|
||||||
closeNotificationDialog(page);
|
closeNotificationDialog(page);
|
||||||
|
|
||||||
screenshot(page, "after_close_dialog");
|
screenshot(page, "after_close_dialog");
|
||||||
|
|
||||||
// Download captcha image
|
// Download captcha image
|
||||||
downloadCaptcha(page);
|
downloadCaptcha(page);
|
||||||
|
|
||||||
// Close dialog again after page reload
|
// Close dialog again after page reload
|
||||||
closeNotificationDialog(page);
|
closeNotificationDialog(page);
|
||||||
|
|
||||||
// Recognize captcha and perform login
|
// Recognize captcha and perform login
|
||||||
boolean loggedin = doLoginWithCaptcha(page);
|
boolean loggedin = doLoginWithCaptcha(page);
|
||||||
|
|
||||||
if (loggedin) {
|
if (loggedin) {
|
||||||
@ -147,7 +147,7 @@ public class EtsScraper {
|
|||||||
System.out.println("[+] Page title: " + page.title());
|
System.out.println("[+] Page title: " + page.title());
|
||||||
System.out.println("[+] Page URL: " + page.url());
|
System.out.println("[+] Page URL: " + page.url());
|
||||||
|
|
||||||
// 点击三联单菜单
|
// 点击三联单菜单
|
||||||
System.out.println("[*] Clicking 三联单 menu...");
|
System.out.println("[*] Clicking 三联单 menu...");
|
||||||
page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click();
|
page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click();
|
||||||
sleep(3000);
|
sleep(3000);
|
||||||
@ -155,113 +155,104 @@ public class EtsScraper {
|
|||||||
screenshot(page, "after_sanliandan");
|
screenshot(page, "after_sanliandan");
|
||||||
System.out.println("[+] 三联单 page title: " + page.title());
|
System.out.println("[+] 三联单 page title: " + page.title());
|
||||||
|
|
||||||
// 设置日期筛选
|
// 设置日期筛选
|
||||||
System.out.println("[*] Setting date filter to: " + dateStrFormatted);
|
System.out.println("[*] Setting date filter to: " + dateStrFormatted);
|
||||||
|
|
||||||
// 检查元素是否存在
|
// 检查元素是否存在
|
||||||
boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0;
|
boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0;
|
||||||
boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0;
|
boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0;
|
||||||
boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0;
|
boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0;
|
||||||
System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists);
|
System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists);
|
||||||
|
|
||||||
// 直接设置日期值(WdatePicker 类型输入框)
|
// 直接设置日期值(WdatePicker 类型输入框)
|
||||||
if (startDateExists) {
|
if (startDateExists) {
|
||||||
System.out.println("[*] Setting start date to: " + dateStrFormatted);
|
System.out.println("[*] Setting start date to: " + dateStrFormatted);
|
||||||
page.locator("#Search_ThreeBillList_startWdate").first().fill(dateStrFormatted);
|
page.locator("#Search_ThreeBillList_startWdate").first().fill(dateStrFormatted);
|
||||||
sleep(500);
|
sleep(500);
|
||||||
} else {
|
} else {
|
||||||
System.out.println("[!] Start date element not found");
|
System.out.println("[!] Start date element not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 设置结束日期
|
// 设置结束日期
|
||||||
if (endDateExists) {
|
if (endDateExists) {
|
||||||
System.out.println("[*] Setting end date to: " + dateStrFormatted);
|
System.out.println("[*] Setting end date to: " + dateStrFormatted);
|
||||||
page.locator("#Search_ThreeBillList_endWdate").first().fill(dateStrFormatted);
|
page.locator("#Search_ThreeBillList_endWdate").first().fill(dateStrFormatted);
|
||||||
sleep(500);
|
sleep(500);
|
||||||
} else {
|
} else {
|
||||||
System.out.println("[!] End date element not found");
|
System.out.println("[!] End date element not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 点击查询按钮,等待列表加载
|
// 点击查询按钮,等待列表加载
|
||||||
if (queryBtnExists) {
|
if (queryBtnExists) {
|
||||||
System.out.println("[*] Clicking query button...");
|
System.out.println("[*] Clicking query button...");
|
||||||
page.locator("#Search_ThreeBillList_Button").first().click();
|
page.locator("#Search_ThreeBillList_Button").first().click();
|
||||||
|
|
||||||
// 等待列表内容出现
|
// 等待列表内容出现
|
||||||
try {
|
try {
|
||||||
page.waitForSelector("tbody tr", new Page.WaitForSelectorOptions()
|
page.waitForSelector("tbody tr", new Page.WaitForSelectorOptions()
|
||||||
.setTimeout(30000));
|
.setTimeout(30000));
|
||||||
System.out.println("[+] Query completed, list loaded");
|
System.out.println("[+] Query completed, list loaded");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.out.println("[!] Wait for list timeout, but query was submitted");
|
System.out.println("[!] Wait for list timeout, but query was submitted");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
System.out.println("[!] Query button not found");
|
System.out.println("[!] Query button not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
screenshot(page, "after_query");
|
screenshot(page, "after_query");
|
||||||
|
|
||||||
// 点击导出按钮
|
// 点击导出按钮
|
||||||
if (page.locator("#Export_ThreeBillList_Button").count() > 0) {
|
if (page.locator("#Export_ThreeBillList_Button").count() > 0) {
|
||||||
System.out.println("[*] Clicking export button...");
|
System.out.println("[*] Clicking export button...");
|
||||||
// 设置下载目录
|
// 设置下载目录
|
||||||
// 点击主导出按钮打开对话框,再用 JS click 触发对话框内导出按钮
|
// 点击主导出按钮打开对话框,再用 JS click 触发对话框内导出按钮
|
||||||
Download dl = page.waitForDownload(
|
Download dl = page.waitForDownload(new Page.WaitForDownloadOptions().setTimeout(300000),
|
||||||
new Page.WaitForDownloadOptions().setTimeout(300000),
|
() -> {
|
||||||
() -> {
|
page.locator("#Export_ThreeBillList_Button").first().click();
|
||||||
page.locator("#Export_ThreeBillList_Button").first().click();
|
sleep(2000);
|
||||||
sleep(2000);
|
System.out.println("[*] Triggering dialog export via JS...");
|
||||||
System.out.println("[*] Triggering dialog export via JS...");
|
page.evaluate("document.querySelectorAll('button').forEach(b => { if (b.textContent.trim() === '导出') b.click(); })");
|
||||||
page.evaluate("document.querySelectorAll('button').forEach(b => { if (b.textContent.trim() === '导出') b.click(); })");
|
});
|
||||||
});
|
|
||||||
System.out.println("[*] Waiting for download to complete...");
|
System.out.println("[*] Waiting for download to complete...");
|
||||||
dl.saveAs(savedFile);
|
dl.saveAs(savedFile);
|
||||||
System.out.println("[+] Download saved to: " + savedFile);
|
System.out.println("[+] Download saved to: " + savedFile);
|
||||||
if (java.nio.file.Files.size(savedFile) == 0) {
|
if (java.nio.file.Files.size(savedFile) == 0) {
|
||||||
System.out.println("[-] Downloaded file is empty");
|
System.out.println("[-] Downloaded file is empty");
|
||||||
} else {
|
} else {
|
||||||
System.out.println("[+] Download size: " + java.nio.file.Files.size(savedFile) + " bytes");
|
System.out.println("[+] Download size: " + java.nio.file.Files.size(savedFile) + " bytes");
|
||||||
// Auto-import to ets-proxy
|
// Auto-import to ets-proxy
|
||||||
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
|
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
screenshot(page, "after_export");
|
screenshot(page, "after_export");
|
||||||
System.out.println("[+] Query and export completed!");
|
System.out.println("[+] Query and export completed!");
|
||||||
|
} else {
|
||||||
String content = page.textContent("body");
|
|
||||||
if (content != null) {
|
|
||||||
String preview = content.length() > 500
|
|
||||||
? content.substring(0, 500) + ".."
|
|
||||||
: content;
|
|
||||||
System.out.println("[+] Page content preview:\n" + preview);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
System.out.println("[-] Login failed. Check screenshots/ for debugging.");
|
System.out.println("[-] Login failed. Check screenshots/ for debugging.");
|
||||||
screenshot(page, "login_failed");
|
screenshot(page, "login_failed");
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
browser.close();
|
browser.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printHelp() {
|
private static void printHelp() {
|
||||||
System.out.println("""
|
System.out.println("""
|
||||||
ETS 三联单爬虫 - 导出并导入三联单 Excel 数据
|
ETS 三联单爬虫 - 导出并导入三联单 Excel 数据
|
||||||
|
|
||||||
用法: java -jar ets-playwright.jar [选项]
|
用法: java -jar ets-playwright.jar [选项]
|
||||||
|
|
||||||
选项:
|
选项:
|
||||||
-s <url> ets-proxy 服务器地址
|
-s <url> ets-proxy 服务器地址
|
||||||
-u <user> ets-proxy 用户名
|
-u <user> ets-proxy 用户名
|
||||||
-p <pass> ets-proxy 密码
|
-p <pass> ets-proxy 密码
|
||||||
-d <date> 查询日期,格式 yyyy-MM-dd
|
-d <date> 查询日期,格式 yyyy-MM-dd
|
||||||
-h 显示此帮助信息
|
-h 显示此帮助信息
|
||||||
|
|
||||||
示例:
|
示例:
|
||||||
java -jar ets-playwright.jar -s https://api.ets.niko.red -u admin -p 123456 -d 2026-05-04
|
java -jar ets-playwright.jar -s https://api.ets.niko.red -u admin -p 123456 -d 2026-05-04
|
||||||
""");
|
""");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean doLoginWithCaptcha(Page page) throws Exception {
|
public static boolean doLoginWithCaptcha(Page page) throws Exception {
|
||||||
// Recognize captcha first
|
// Recognize captcha first
|
||||||
@ -486,7 +477,7 @@ public class EtsScraper {
|
|||||||
.build();
|
.build();
|
||||||
java.net.http.HttpRequest request = java.net.http.HttpRequest.newBuilder()
|
java.net.http.HttpRequest request = java.net.http.HttpRequest.newBuilder()
|
||||||
.uri(uri)
|
.uri(uri)
|
||||||
.POST(java.net.http.HttpRequest.BodyPublishers.noBody())
|
.POST(java.net.http.HttpRequest.BodyPublishers.noBody())
|
||||||
.header("Content-Type", "application/json")
|
.header("Content-Type", "application/json")
|
||||||
.build();
|
.build();
|
||||||
java.net.http.HttpResponse<String> response = client.send(request, java.net.http.HttpResponse.BodyHandlers.ofString());
|
java.net.http.HttpResponse<String> response = client.send(request, java.net.http.HttpResponse.BodyHandlers.ofString());
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user