From b6f285bf68f5a3cc58464598a38eec34fe7ee202 Mon Sep 17 00:00:00 2001 From: Niko <1377382065@qq.com> Date: Fri, 22 May 2026 09:53:07 +0800 Subject: [PATCH] add -r --- src/main/java/com/ets/scraper/EtsScraper.java | 170 ++++++++++-------- 1 file changed, 97 insertions(+), 73 deletions(-) diff --git a/src/main/java/com/ets/scraper/EtsScraper.java b/src/main/java/com/ets/scraper/EtsScraper.java index 867cc3f..a8fb2d1 100644 --- a/src/main/java/com/ets/scraper/EtsScraper.java +++ b/src/main/java/com/ets/scraper/EtsScraper.java @@ -29,9 +29,8 @@ public class EtsScraper { private static final String USERNAME = "sccw"; private static final String PASSWORD = "slife@123"; private static final Path SCREENSHOT_DIR = Path.of("screenshots"); - private static final String OLLAMA_URL = "http://10.0.1.39:11434"; + private static final String OLLAMA_URL = "http://127.0.0.1:11434"; private static final String OLLAMA_MODEL = "qwen3-vl:4b"; - private static final String PROXY_HOST = "http://127.0.0.1:8081"; public static void main(String[] args) throws Exception { // Parse CLI arguments @@ -39,6 +38,7 @@ public class EtsScraper { String proxyUser = null; String proxyPass = null; String dateStr = null; + Integer repeatInterval = null; for (int i = 0; i < args.length; i++) { switch (args[i]) { @@ -57,156 +57,179 @@ public class EtsScraper { case "-d": dateStr = args[++i]; break; + case "-r": + repeatInterval = Integer.parseInt(args[++i]); + break; default: System.err.println("[-] Unknown option: " + args[i]); printHelp(); return; - } - } + } + } if (proxyHost == null || proxyUser == null || proxyPass == null || dateStr == null) { System.err.println("[-] Missing required arguments"); printHelp(); return; - } + } java.time.LocalDate targetDate; try { targetDate = java.time.LocalDate.parse(dateStr); - } catch (Exception e) { + } catch (Exception e) { System.err.println("[-] Invalid date format: " + dateStr + ", expected yyyy-MM-dd"); return; - } + } + + if (repeatInterval != null) { + // Polling mode: run every N seconds + System.out.println("[*] Polling mode: running every " + repeatInterval + " seconds (target date: " + dateStr + ")"); + while (true) { + try { + runScraper(proxyHost, proxyUser, proxyPass, targetDate, true); + } catch (Exception e) { + System.err.println("[-] Scraper failed: " + e.getMessage()); + } + System.out.println("[*] Sleeping " + repeatInterval + "s before next run..."); + sleep(repeatInterval * 1000L); + } + } else { + // Single-shot mode: run once + runScraper(proxyHost, proxyUser, proxyPass, targetDate, false); + } + } + + private static void runScraper(String proxyHost, String proxyUser, String proxyPass, + java.time.LocalDate targetDate, boolean isPolling) throws Exception { String dateStrFormatted = targetDate.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd")); - String dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmm")); + // In one-shot mode, use target date as filename (for "file exists" check) + // In polling mode, use current time as filename to always re-run + String dateStrFileName; + if (isPolling) { + dateStrFileName = java.time.LocalDateTime.now() + .format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmm")); + } else { + dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmm")); + } Path savedFile = Path.of("downloads").resolve("三联单列表_" + dateStrFileName + ".xls"); - if (java.nio.file.Files.exists(savedFile) && java.nio.file.Files.size(savedFile) > 0) { + if (!isPolling && java.nio.file.Files.exists(savedFile) && java.nio.file.Files.size(savedFile) > 0) { System.out.println("[+] File already exists: " + savedFile); System.out.println("[+] File size: " + java.nio.file.Files.size(savedFile) + " bytes"); autoImportBill(savedFile, proxyHost, proxyUser, proxyPass); return; - } + } try { createDirectories(SCREENSHOT_DIR); - } catch (Exception e) { + } catch (Exception e) { System.err.println("Failed to create directories: " + e.getMessage()); - } + } + + System.out.println("[*] --- Scraper run at " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + " ---"); try (Playwright playwright = Playwright.create()) { Browser browser = playwright.chromium().launch( new BrowserType.LaunchOptions() - .setHeadless(true) - ); + .setHeadless(true) + ); BrowserContext context = browser.newContext( new Browser.NewContextOptions() - .setIgnoreHTTPSErrors(true) - .setViewportSize(1920, 1080) - .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36") - ); + .setIgnoreHTTPSErrors(true) + .setViewportSize(1920, 1080) + .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36") + ); Page page = context.newPage(); try { - // Navigate to frame.html first to establish session/cookies + // Navigate to frame.html first to establish session/cookies System.out.println("[*] Establishing session via " + FRAME_URL); page.navigate(FRAME_URL, new Page.NavigateOptions() - .setTimeout(30000) - .setWaitUntil(WaitUntilState.DOMCONTENTLOADED)); + .setTimeout(30000) + .setWaitUntil(WaitUntilState.DOMCONTENTLOADED)); sleep(3000); - // Navigate directly to the login page + // Navigate directly to the login page System.out.println("[*] Navigating to login page: " + LOGIN_URL); page.navigate(LOGIN_URL, new Page.NavigateOptions() - .setTimeout(30000) - .setWaitUntil(WaitUntilState.NETWORKIDLE)); + .setTimeout(30000) + .setWaitUntil(WaitUntilState.NETWORKIDLE)); sleep(2000); - // Close notification dialog FIRST (before filling credentials) + // Close notification dialog FIRST (before filling credentials) closeNotificationDialog(page); -// screenshot(page, "after_close_dialog"); - - // Download captcha image + // Download captcha image downloadCaptcha(page); - // Close dialog again after page reload + // Close dialog again after page reload closeNotificationDialog(page); - // Recognize captcha and perform login + // Recognize captcha and perform login boolean loggedin = doLoginWithCaptcha(page); if (loggedin) { System.out.println("[+] Login successful!"); sleep(2000); -// screenshot(page, "after_login"); - System.out.println("[+] Page title: " + page.title()); System.out.println("[+] Page URL: " + page.url()); - // 点击三联单菜单 + // 点击三联单菜单 System.out.println("[*] Clicking 三联单 menu..."); page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click(); sleep(3000); - -// screenshot(page, "after_sanliandan"); System.out.println("[+] 三联单 page title: " + page.title()); - // 设置日期筛选 + // 设置日期筛选 System.out.println("[*] Setting date filter to: " + dateStrFormatted); - // 检查元素是否存在 + // 检查元素是否存在 boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0; boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0; boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0; System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists); - // 直接设置日期值(WdatePicker 类型输入框) + // 直接设置日期值(WdatePicker 类型输入框) if (startDateExists) { System.out.println("[*] Setting start date to: " + dateStrFormatted); page.locator("#Search_ThreeBillList_startWdate").first().fill(dateStrFormatted); sleep(500); - } else { + } else { System.out.println("[!] Start date element not found"); - } + } - // 设置结束日期 + // 设置结束日期 if (endDateExists) { System.out.println("[*] Setting end date to: " + dateStrFormatted); page.locator("#Search_ThreeBillList_endWdate").first().fill(dateStrFormatted); sleep(500); - } else { + } else { System.out.println("[!] End date element not found"); - } + } - // 点击查询按钮,等待列表加载 + // 点击查询按钮,等待列表加载 if (queryBtnExists) { System.out.println("[*] Clicking query button..."); - // 等待列表内容出现 page.waitForResponse("https://101.227.180.215/SHCityEnvCW/Services/CWSServ.asmx/ThreeBillQueryBiTripList", () -> { page.locator("#Search_ThreeBillList_Button").first().click(); - }); + }); page.waitForTimeout(3 * 1000); - } else { + } else { System.out.println("[!] Query button not found"); - } + } -// screenshot(page, "after_query"); - - // 点击导出按钮 + // 点击导出按钮 if (page.locator("#Export_ThreeBillList_Button").count() > 0) { System.out.println("[*] Clicking export button..."); - // 设置下载目录 - // 点击主导出按钮打开对话框,再用 JS click 触发对话框内导出按钮 Download dl = page.waitForDownload(new Page.WaitForDownloadOptions().setTimeout(300000), - () -> { + () -> { page.locator("#Export_ThreeBillList_Button").first().click(); sleep(2000); - }); + }); System.out.println("[*] Waiting for download to complete..."); dl.saveAs(savedFile); @@ -215,22 +238,21 @@ public class EtsScraper { System.out.println("[+] Download saved to: " + savedFile + " (" + totalBytes + " bytes)"); if (totalBytes == 0) { System.out.println("[-] Downloaded file is empty"); - } else { + } else { System.out.println("[+] Download size: " + totalBytes + " bytes"); autoImportBill(savedFile, proxyHost, proxyUser, proxyPass); - } + } } -// screenshot(page, "after_export"); System.out.println("[+] Query and export completed!"); - } else { + } else { System.out.println("[-] Login failed. Check screenshots/ for debugging."); -// screenshot(page, "login_failed"); - } - } finally { + } + } finally { browser.close(); - } - } - } + } + } + } + private static void printHelp() { System.out.println(""" @@ -239,16 +261,18 @@ public class EtsScraper { 用法: java -jar ets-playwright.jar [选项] 选项: - -s ets-proxy 服务器地址 - -u ets-proxy 用户名 - -p ets-proxy 密码 - -d 查询日期,格式 yyyy-MM-dd - -h 显示此帮助信息 + -s ets-proxy 服务器地址 + -u ets-proxy 用户名 + -p ets-proxy 密码 + -d 查询日期,格式 yyyy-MM-dd + -r 定时执行间隔(秒),不传则只执行一次 + -h 显示此帮助信息 示例: - java -jar ets-playwright.jar -s https://api.ets.niko.red -u admin -p 123456 -d 2026-05-04 - """); - } + java -jar ets-playwright.jar -s http://127.0.0.1:8081 -u sccw -p slife@123 -d 2026-05-21 + java -jar ets-playwright.jar -s http://127.0.0.1:8081 -u sccw -p slife@123 -d 2026-05-21 -r 300 + """); + } public static boolean doLoginWithCaptcha(Page page) throws Exception { // Recognize captcha first