This commit is contained in:
Niko 2026-05-22 09:53:07 +08:00
parent 2518b922af
commit b6f285bf68

View File

@ -29,9 +29,8 @@ public class EtsScraper {
private static final String USERNAME = "sccw"; private static final String USERNAME = "sccw";
private static final String PASSWORD = "slife@123"; private static final String PASSWORD = "slife@123";
private static final Path SCREENSHOT_DIR = Path.of("screenshots"); private static final Path SCREENSHOT_DIR = Path.of("screenshots");
private static final String OLLAMA_URL = "http://10.0.1.39:11434"; private static final String OLLAMA_URL = "http://127.0.0.1:11434";
private static final String OLLAMA_MODEL = "qwen3-vl:4b"; private static final String OLLAMA_MODEL = "qwen3-vl:4b";
private static final String PROXY_HOST = "http://127.0.0.1:8081";
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
// Parse CLI arguments // Parse CLI arguments
@ -39,6 +38,7 @@ public class EtsScraper {
String proxyUser = null; String proxyUser = null;
String proxyPass = null; String proxyPass = null;
String dateStr = null; String dateStr = null;
Integer repeatInterval = null;
for (int i = 0; i < args.length; i++) { for (int i = 0; i < args.length; i++) {
switch (args[i]) { switch (args[i]) {
@ -57,156 +57,179 @@ public class EtsScraper {
case "-d": case "-d":
dateStr = args[++i]; dateStr = args[++i];
break; break;
case "-r":
repeatInterval = Integer.parseInt(args[++i]);
break;
default: default:
System.err.println("[-] Unknown option: " + args[i]); System.err.println("[-] Unknown option: " + args[i]);
printHelp(); printHelp();
return; return;
} }
} }
if (proxyHost == null || proxyUser == null || proxyPass == null || dateStr == null) { if (proxyHost == null || proxyUser == null || proxyPass == null || dateStr == null) {
System.err.println("[-] Missing required arguments"); System.err.println("[-] Missing required arguments");
printHelp(); printHelp();
return; return;
} }
java.time.LocalDate targetDate; java.time.LocalDate targetDate;
try { try {
targetDate = java.time.LocalDate.parse(dateStr); targetDate = java.time.LocalDate.parse(dateStr);
} catch (Exception e) { } catch (Exception e) {
System.err.println("[-] Invalid date format: " + dateStr + ", expected yyyy-MM-dd"); System.err.println("[-] Invalid date format: " + dateStr + ", expected yyyy-MM-dd");
return; return;
} }
if (repeatInterval != null) {
// Polling mode: run every N seconds
System.out.println("[*] Polling mode: running every " + repeatInterval + " seconds (target date: " + dateStr + ")");
while (true) {
try {
runScraper(proxyHost, proxyUser, proxyPass, targetDate, true);
} catch (Exception e) {
System.err.println("[-] Scraper failed: " + e.getMessage());
}
System.out.println("[*] Sleeping " + repeatInterval + "s before next run...");
sleep(repeatInterval * 1000L);
}
} else {
// Single-shot mode: run once
runScraper(proxyHost, proxyUser, proxyPass, targetDate, false);
}
}
private static void runScraper(String proxyHost, String proxyUser, String proxyPass,
java.time.LocalDate targetDate, boolean isPolling) throws Exception {
String dateStrFormatted = targetDate.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd")); String dateStrFormatted = targetDate.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd"));
String dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmm")); // In one-shot mode, use target date as filename (for "file exists" check)
// In polling mode, use current time as filename to always re-run
String dateStrFileName;
if (isPolling) {
dateStrFileName = java.time.LocalDateTime.now()
.format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmm"));
} else {
dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmm"));
}
Path savedFile = Path.of("downloads").resolve("三联单列表_" + dateStrFileName + ".xls"); Path savedFile = Path.of("downloads").resolve("三联单列表_" + dateStrFileName + ".xls");
if (java.nio.file.Files.exists(savedFile) && java.nio.file.Files.size(savedFile) > 0) { if (!isPolling && java.nio.file.Files.exists(savedFile) && java.nio.file.Files.size(savedFile) > 0) {
System.out.println("[+] File already exists: " + savedFile); System.out.println("[+] File already exists: " + savedFile);
System.out.println("[+] File size: " + java.nio.file.Files.size(savedFile) + " bytes"); System.out.println("[+] File size: " + java.nio.file.Files.size(savedFile) + " bytes");
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass); autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
return; return;
} }
try { try {
createDirectories(SCREENSHOT_DIR); createDirectories(SCREENSHOT_DIR);
} catch (Exception e) { } catch (Exception e) {
System.err.println("Failed to create directories: " + e.getMessage()); System.err.println("Failed to create directories: " + e.getMessage());
} }
System.out.println("[*] --- Scraper run at " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + " ---");
try (Playwright playwright = Playwright.create()) { try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().launch( Browser browser = playwright.chromium().launch(
new BrowserType.LaunchOptions() new BrowserType.LaunchOptions()
.setHeadless(true) .setHeadless(true)
); );
BrowserContext context = browser.newContext( BrowserContext context = browser.newContext(
new Browser.NewContextOptions() new Browser.NewContextOptions()
.setIgnoreHTTPSErrors(true) .setIgnoreHTTPSErrors(true)
.setViewportSize(1920, 1080) .setViewportSize(1920, 1080)
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36") .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36")
); );
Page page = context.newPage(); Page page = context.newPage();
try { try {
// Navigate to frame.html first to establish session/cookies // Navigate to frame.html first to establish session/cookies
System.out.println("[*] Establishing session via " + FRAME_URL); System.out.println("[*] Establishing session via " + FRAME_URL);
page.navigate(FRAME_URL, new Page.NavigateOptions() page.navigate(FRAME_URL, new Page.NavigateOptions()
.setTimeout(30000) .setTimeout(30000)
.setWaitUntil(WaitUntilState.DOMCONTENTLOADED)); .setWaitUntil(WaitUntilState.DOMCONTENTLOADED));
sleep(3000); sleep(3000);
// Navigate directly to the login page // Navigate directly to the login page
System.out.println("[*] Navigating to login page: " + LOGIN_URL); System.out.println("[*] Navigating to login page: " + LOGIN_URL);
page.navigate(LOGIN_URL, new Page.NavigateOptions() page.navigate(LOGIN_URL, new Page.NavigateOptions()
.setTimeout(30000) .setTimeout(30000)
.setWaitUntil(WaitUntilState.NETWORKIDLE)); .setWaitUntil(WaitUntilState.NETWORKIDLE));
sleep(2000); sleep(2000);
// Close notification dialog FIRST (before filling credentials) // Close notification dialog FIRST (before filling credentials)
closeNotificationDialog(page); closeNotificationDialog(page);
// screenshot(page, "after_close_dialog"); // Download captcha image
// Download captcha image
downloadCaptcha(page); downloadCaptcha(page);
// Close dialog again after page reload // Close dialog again after page reload
closeNotificationDialog(page); closeNotificationDialog(page);
// Recognize captcha and perform login // Recognize captcha and perform login
boolean loggedin = doLoginWithCaptcha(page); boolean loggedin = doLoginWithCaptcha(page);
if (loggedin) { if (loggedin) {
System.out.println("[+] Login successful!"); System.out.println("[+] Login successful!");
sleep(2000); sleep(2000);
// screenshot(page, "after_login");
System.out.println("[+] Page title: " + page.title()); System.out.println("[+] Page title: " + page.title());
System.out.println("[+] Page URL: " + page.url()); System.out.println("[+] Page URL: " + page.url());
// 点击三联单菜单 // 点击三联单菜单
System.out.println("[*] Clicking 三联单 menu..."); System.out.println("[*] Clicking 三联单 menu...");
page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click(); page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click();
sleep(3000); sleep(3000);
// screenshot(page, "after_sanliandan");
System.out.println("[+] 三联单 page title: " + page.title()); System.out.println("[+] 三联单 page title: " + page.title());
// 设置日期筛选 // 设置日期筛选
System.out.println("[*] Setting date filter to: " + dateStrFormatted); System.out.println("[*] Setting date filter to: " + dateStrFormatted);
// 检查元素是否存在 // 检查元素是否存在
boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0; boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0;
boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0; boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0;
boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0; boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0;
System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists); System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists);
// 直接设置日期值WdatePicker 类型输入框 // 直接设置日期值WdatePicker 类型输入框
if (startDateExists) { if (startDateExists) {
System.out.println("[*] Setting start date to: " + dateStrFormatted); System.out.println("[*] Setting start date to: " + dateStrFormatted);
page.locator("#Search_ThreeBillList_startWdate").first().fill(dateStrFormatted); page.locator("#Search_ThreeBillList_startWdate").first().fill(dateStrFormatted);
sleep(500); sleep(500);
} else { } else {
System.out.println("[!] Start date element not found"); System.out.println("[!] Start date element not found");
} }
// 设置结束日期 // 设置结束日期
if (endDateExists) { if (endDateExists) {
System.out.println("[*] Setting end date to: " + dateStrFormatted); System.out.println("[*] Setting end date to: " + dateStrFormatted);
page.locator("#Search_ThreeBillList_endWdate").first().fill(dateStrFormatted); page.locator("#Search_ThreeBillList_endWdate").first().fill(dateStrFormatted);
sleep(500); sleep(500);
} else { } else {
System.out.println("[!] End date element not found"); System.out.println("[!] End date element not found");
} }
// 点击查询按钮等待列表加载 // 点击查询按钮等待列表加载
if (queryBtnExists) { if (queryBtnExists) {
System.out.println("[*] Clicking query button..."); System.out.println("[*] Clicking query button...");
// 等待列表内容出现
page.waitForResponse("https://101.227.180.215/SHCityEnvCW/Services/CWSServ.asmx/ThreeBillQueryBiTripList", () -> { page.waitForResponse("https://101.227.180.215/SHCityEnvCW/Services/CWSServ.asmx/ThreeBillQueryBiTripList", () -> {
page.locator("#Search_ThreeBillList_Button").first().click(); page.locator("#Search_ThreeBillList_Button").first().click();
}); });
page.waitForTimeout(3 * 1000); page.waitForTimeout(3 * 1000);
} else { } else {
System.out.println("[!] Query button not found"); System.out.println("[!] Query button not found");
} }
// screenshot(page, "after_query"); // 点击导出按钮
// 点击导出按钮
if (page.locator("#Export_ThreeBillList_Button").count() > 0) { if (page.locator("#Export_ThreeBillList_Button").count() > 0) {
System.out.println("[*] Clicking export button..."); System.out.println("[*] Clicking export button...");
// 设置下载目录
// 点击主导出按钮打开对话框再用 JS click 触发对话框内导出按钮
Download dl = page.waitForDownload(new Page.WaitForDownloadOptions().setTimeout(300000), Download dl = page.waitForDownload(new Page.WaitForDownloadOptions().setTimeout(300000),
() -> { () -> {
page.locator("#Export_ThreeBillList_Button").first().click(); page.locator("#Export_ThreeBillList_Button").first().click();
sleep(2000); sleep(2000);
}); });
System.out.println("[*] Waiting for download to complete..."); System.out.println("[*] Waiting for download to complete...");
dl.saveAs(savedFile); dl.saveAs(savedFile);
@ -215,22 +238,21 @@ public class EtsScraper {
System.out.println("[+] Download saved to: " + savedFile + " (" + totalBytes + " bytes)"); System.out.println("[+] Download saved to: " + savedFile + " (" + totalBytes + " bytes)");
if (totalBytes == 0) { if (totalBytes == 0) {
System.out.println("[-] Downloaded file is empty"); System.out.println("[-] Downloaded file is empty");
} else { } else {
System.out.println("[+] Download size: " + totalBytes + " bytes"); System.out.println("[+] Download size: " + totalBytes + " bytes");
autoImportBill(savedFile, proxyHost, proxyUser, proxyPass); autoImportBill(savedFile, proxyHost, proxyUser, proxyPass);
} }
} }
// screenshot(page, "after_export");
System.out.println("[+] Query and export completed!"); System.out.println("[+] Query and export completed!");
} else { } else {
System.out.println("[-] Login failed. Check screenshots/ for debugging."); System.out.println("[-] Login failed. Check screenshots/ for debugging.");
// screenshot(page, "login_failed"); }
} } finally {
} finally {
browser.close(); browser.close();
} }
} }
} }
private static void printHelp() { private static void printHelp() {
System.out.println(""" System.out.println("""
@ -239,16 +261,18 @@ public class EtsScraper {
用法: java -jar ets-playwright.jar [选项] 用法: java -jar ets-playwright.jar [选项]
选项: 选项:
-s <url> ets-proxy 服务器地址 -s <url> ets-proxy 服务器地址
-u <user> ets-proxy 用户名 -u <user> ets-proxy 用户名
-p <pass> ets-proxy 密码 -p <pass> ets-proxy 密码
-d <date> 查询日期格式 yyyy-MM-dd -d <date> 查询日期格式 yyyy-MM-dd
-h 显示此帮助信息 -r <seconds> 定时执行间隔不传则只执行一次
-h 显示此帮助信息
示例: 示例:
java -jar ets-playwright.jar -s https://api.ets.niko.red -u admin -p 123456 -d 2026-05-04 java -jar ets-playwright.jar -s http://127.0.0.1:8081 -u sccw -p slife@123 -d 2026-05-21
"""); java -jar ets-playwright.jar -s http://127.0.0.1:8081 -u sccw -p slife@123 -d 2026-05-21 -r 300
} """);
}
public static boolean doLoginWithCaptcha(Page page) throws Exception { public static boolean doLoginWithCaptcha(Page page) throws Exception {
// Recognize captcha first // Recognize captcha first