package com.ets.scraper; import com.microsoft.playwright.*; import com.microsoft.playwright.Frame; import com.microsoft.playwright.options.LoadState; import com.microsoft.playwright.options.WaitUntilState; import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.*; import java.net.HttpURLConnection; import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.Base64; import static java.nio.file.Files.createDirectories; /** * ETS (Construction Waste Management Platform) Web Scraper * Uses Playwright to automate login and data extraction. */ public class EtsScraper { public static final String FRAME_URL = "https://101.227.180.215/SHCityEnvCW/CWS/frame.html"; public static final String LOGIN_URL = "https://101.227.180.215/SHCityEnvCW/CWS/userlogin.html"; private static final String USERNAME = "sccw"; private static final String PASSWORD = "slife@123"; private static final Path SCREENSHOT_DIR = Path.of("screenshots"); private static final String OLLAMA_URL = "http://10.0.1.39:11434"; private static final String OLLAMA_MODEL = "qwen3-vl:4b"; private static final String PROXY_HOST = "http://127.0.0.1:8081"; public static void main(String[] args) throws Exception { // Parse CLI arguments String proxyHost = null; String proxyUser = null; String proxyPass = null; String dateStr = null; for (int i = 0; i < args.length; i++) { switch (args[i]) { case "-h": printHelp(); return; case "-s": proxyHost = args[++i]; break; case "-u": proxyUser = args[++i]; break; case "-p": proxyPass = args[++i]; break; case "-d": dateStr = args[++i]; break; default: System.err.println("[-] Unknown option: " + args[i]); printHelp(); return; } } if (proxyHost == null || proxyUser == null || proxyPass == null || dateStr == null) { System.err.println("[-] Missing required arguments"); printHelp(); return; } java.time.LocalDate targetDate; try { targetDate = java.time.LocalDate.parse(dateStr); } catch (Exception e) { System.err.println("[-] Invalid date format: " + dateStr + ", expected yyyy-MM-dd"); return; } String dateStrFormatted = targetDate.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd")); String dateStrFileName = targetDate.format(DateTimeFormatter.ofPattern("yyyyMMdd")); Path downloadPath = Path.of("downloads").toAbsolutePath().normalize(); Path savedFile = downloadPath.resolve("三联单列表_" + dateStrFileName + ".xls"); if (java.nio.file.Files.exists(savedFile) && java.nio.file.Files.size(savedFile) > 0) { System.out.println("[+] File already exists: " + savedFile); System.out.println("[+] File size: " + java.nio.file.Files.size(savedFile) + " bytes"); autoImportBill(savedFile, proxyHost, proxyUser, proxyPass); return; } try { createDirectories(SCREENSHOT_DIR); } catch (Exception e) { System.err.println("Failed to create directories: " + e.getMessage()); } try (Playwright playwright = Playwright.create()) { Browser browser = playwright.chromium().launch( new BrowserType.LaunchOptions() .setHeadless(false) ); BrowserContext context = browser.newContext( new Browser.NewContextOptions() .setIgnoreHTTPSErrors(true) .setViewportSize(1920, 1080) .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36") ); Page page = context.newPage(); try { // Navigate to frame.html first to establish session/cookies System.out.println("[*] Establishing session via " + FRAME_URL); page.navigate(FRAME_URL, new Page.NavigateOptions() .setTimeout(30000) .setWaitUntil(WaitUntilState.DOMCONTENTLOADED)); sleep(3000); // Navigate directly to the login page System.out.println("[*] Navigating to login page: " + LOGIN_URL); page.navigate(LOGIN_URL, new Page.NavigateOptions() .setTimeout(30000) .setWaitUntil(WaitUntilState.NETWORKIDLE)); sleep(2000); // Close notification dialog FIRST (before filling credentials) closeNotificationDialog(page); // screenshot(page, "after_close_dialog"); // Download captcha image downloadCaptcha(page); // Close dialog again after page reload closeNotificationDialog(page); // Recognize captcha and perform login boolean loggedin = doLoginWithCaptcha(page); if (loggedin) { System.out.println("[+] Login successful!"); sleep(2000); // screenshot(page, "after_login"); System.out.println("[+] Page title: " + page.title()); System.out.println("[+] Page URL: " + page.url()); // 点击三联单菜单 System.out.println("[*] Clicking 三联单 menu..."); page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click(); sleep(3000); // screenshot(page, "after_sanliandan"); System.out.println("[+] 三联单 page title: " + page.title()); // 设置日期筛选 System.out.println("[*] Setting date filter to: " + dateStrFormatted); // 检查元素是否存在 boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0; boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0; boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0; System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists); // 直接设置日期值(WdatePicker 类型输入框) if (startDateExists) { System.out.println("[*] Setting start date to: " + dateStrFormatted); page.locator("#Search_ThreeBillList_startWdate").first().fill(dateStrFormatted); sleep(500); } else { System.out.println("[!] Start date element not found"); } // 设置结束日期 if (endDateExists) { System.out.println("[*] Setting end date to: " + dateStrFormatted); page.locator("#Search_ThreeBillList_endWdate").first().fill(dateStrFormatted); sleep(500); } else { System.out.println("[!] End date element not found"); } // 点击查询按钮,等待列表加载 if (queryBtnExists) { System.out.println("[*] Clicking query button..."); // 等待列表内容出现 page.waitForResponse("https://101.227.180.215/SHCityEnvCW/Services/CWSServ.asmx/ThreeBillQueryBiTripList", () -> { page.locator("#Search_ThreeBillList_Button").first().click(); }); page.waitForTimeout(3 * 1000); } else { System.out.println("[!] Query button not found"); } // screenshot(page, "after_query"); // 点击导出按钮 if (page.locator("#Export_ThreeBillList_Button").count() > 0) { System.out.println("[*] Clicking export button..."); // 设置下载目录 // 点击主导出按钮打开对话框,再用 JS click 触发对话框内导出按钮 Download dl = page.waitForDownload(new Page.WaitForDownloadOptions().setTimeout(300000), () -> { page.locator("#Export_ThreeBillList_Button").first().click(); sleep(2000); }); System.out.println("[*] Waiting for download to complete..."); dl.saveAs(savedFile); long totalBytes = savedFile.toFile().length(); System.out.println("[+] Download saved to: " + savedFile + " (" + totalBytes + " bytes)"); if (totalBytes == 0) { System.out.println("[-] Downloaded file is empty"); } else { System.out.println("[+] Download size: " + totalBytes + " bytes"); autoImportBill(savedFile, proxyHost, proxyUser, proxyPass); } } // screenshot(page, "after_export"); System.out.println("[+] Query and export completed!"); } else { System.out.println("[-] Login failed. Check screenshots/ for debugging."); // screenshot(page, "login_failed"); } } finally { browser.close(); } } } private static void printHelp() { System.out.println(""" ETS 三联单爬虫 - 导出并导入三联单 Excel 数据 用法: java -jar ets-playwright.jar [选项] 选项: -s ets-proxy 服务器地址 -u ets-proxy 用户名 -p ets-proxy 密码 -d 查询日期,格式 yyyy-MM-dd -h 显示此帮助信息 示例: java -jar ets-playwright.jar -s https://api.ets.niko.red -u admin -p 123456 -d 2026-05-04 """); } public static boolean doLoginWithCaptcha(Page page) throws Exception { // Recognize captcha first Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png"); System.out.println("[*] Recognizing captcha with Ollama..."); String captchaText = recognizeCaptcha(captchaPath); if (captchaText == null || captchaText.isEmpty()) { System.out.println("[-] Failed to recognize captcha"); return false; } System.out.println("[+] Captcha recognized: " + captchaText); // Fill using correct ID selectors System.out.println("[*] Filling credentials..."); page.locator("#inputLoginUser").first().fill(USERNAME); sleep(300); page.locator("#inputLoginPassWord").first().fill(PASSWORD); sleep(300); page.locator("#txt_ValidatePic").first().fill(captchaText); sleep(500); // Click submit button System.out.println("[*] Clicking login button..."); page.locator("#inputLoginButton").first().click(); try { page.waitForLoadState(LoadState.DOMCONTENTLOADED, new Page.WaitForLoadStateOptions().setTimeout(10000)); return true; } catch (Exception e) { System.out.println("[!] Navigation timed out, but credentials were submitted"); return true; } } public static void downloadCaptcha(Page page) { try { // Set up listener FIRST, then reload to trigger the request Response resp = page.waitForResponse( "https://101.227.180.215/SHCityEnvCW/Services/ValiDateImage.ashx*", () -> { page.reload(new Page.ReloadOptions() .setWaitUntil(WaitUntilState.NETWORKIDLE) .setTimeout(10000)); } ); if (resp != null) { byte[] body = resp.body(); Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png"); java.nio.file.Files.write(captchaPath, body); System.out.println("[+] Captcha saved to: " + captchaPath); System.out.println("[+] Captcha size: " + body.length + " bytes"); } } catch (Exception e) { System.out.println("[-] Failed to download captcha: " + e.getMessage()); } } public static void closeNotificationDialog(Page page) { // Find the frame that contains the notification dialog Frame dialogFrame = null; for (Frame f : page.frames()) { try { String hasDialog = (String) f.evaluate( "() => document.getElementById('Div_GG_Box') ? 'FOUND' : 'NOT_HERE'"); if ("FOUND".equals(hasDialog)) { dialogFrame = f; break; } } catch (Exception ignored) { } } if (dialogFrame == null) { System.out.println("[*] No notification dialog found"); return; } System.out.println("[*] Closing notification dialog in frame: " + dialogFrame.url()); // Click the X button in the correct frame dialogFrame.locator(".green_popup_close").first().click(); sleep(500); // Force hide via JS in the correct frame (onclick uses jQuery which may fail) dialogFrame.evaluate("document.getElementById('Div_GG_Box').style.display = 'none';"); sleep(500); System.out.println("[*] Notification dialog closed"); } private static void screenshot(Page page, String name) { try { String timestamp = LocalDateTime.now() .format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); Path path = SCREENSHOT_DIR.resolve(name + "_" + timestamp + ".png"); page.screenshot(new Page.ScreenshotOptions().setPath(path)); System.out.println("[+] Screenshot saved: " + path); } catch (Exception e) { System.err.println("[-] Screenshot failed: " + e.getMessage()); } } public static void sleep(long ms) { try { Thread.sleep(ms); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } public static String recognizeCaptcha(Path imagePath) throws Exception { byte[] imageBytes = Files.readAllBytes(imagePath); // Convert GIF to PNG and resize (Ollama qwen3-vl needs larger PNG images) ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes); BufferedImage srcImage = ImageIO.read(bais); if (srcImage == null) { String base64 = Base64.getEncoder().encodeToString(imageBytes); return callOllama(base64); } // Resize to at least 200px width for better recognition int scale = Math.max(1, 200 / srcImage.getWidth()); if (scale < 1) scale = 1; int newWidth = srcImage.getWidth() * scale; int newHeight = srcImage.getHeight() * scale; Image scaled = srcImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH); BufferedImage resized = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB); Graphics2D g2d = resized.createGraphics(); g2d.drawImage(scaled, 0, 0, null); g2d.dispose(); ByteArrayOutputStream pngOut = new ByteArrayOutputStream(); ImageIO.write(resized, "png", pngOut); byte[] pngBytes = pngOut.toByteArray(); String base64 = Base64.getEncoder().encodeToString(pngBytes); return callOllama(base64); } private static String callOllama(String base64Image) throws Exception { String json = "{" + "\"model\":\"" + OLLAMA_MODEL + "\"," + "\"messages\":[" + " {" + " \"role\":\"user\"," + " \"content\":\"识别图中的验证码文字,只返回文字内容\"," + " \"images\":[\"" + base64Image + "\"]" + " }" + "]" + "}"; URL url = new URL(OLLAMA_URL + "/api/chat"); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("POST"); conn.setConnectTimeout(15000); conn.setReadTimeout(60000); conn.setDoOutput(true); conn.setRequestProperty("Content-Type", "application/json; charset=utf-8"); conn.getOutputStream().write(json.getBytes("utf-8")); conn.getOutputStream().flush(); conn.getOutputStream().close(); BufferedReader reader = new BufferedReader( new InputStreamReader(conn.getInputStream(), "utf-8")); try { StringBuilder fullContent = new StringBuilder(); String line; while ((line = reader.readLine()) != null) { // Parse each line as a separate JSON object (streaming response) int contentIdx = line.indexOf("\"content\":"); if (contentIdx >= 0) { int start = line.indexOf('"', contentIdx + 10) + 1; int end = line.indexOf('"', start); if (start > 0 && end > start) { fullContent.append(line.substring(start, end)); } } // Check for done marker if (line.contains("\"done\":true")) { break; } } return normalizeCaptcha(fullContent.toString()); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // ignore } } conn.disconnect(); } } private static String normalizeCaptcha(String raw) { if (raw == null || raw.isBlank()) { return ""; } String s = raw.strip().replaceAll("\\s+", ""); s = s.replaceAll("^[`'\\\"]|[`'\\\"]+$", ""); return s; } public static void autoImportBill(Path filePath, String proxyHost, String username, String password) { String token = proxyLogin(proxyHost, username, password); if (token == null) { System.out.println("[-] Proxy login failed, skipping import"); return; } proxyImport(filePath, proxyHost, token); } public static String proxyLogin(String proxyHost, String username, String password) { try { String loginUrl = proxyHost + "/api/auth/login?username=" + username + "&password=" + password; java.net.URI uri = java.net.URI.create(loginUrl); java.net.http.HttpClient client = java.net.http.HttpClient.newBuilder() .connectTimeout(java.time.Duration.ofSeconds(10)) .build(); java.net.http.HttpRequest request = java.net.http.HttpRequest.newBuilder() .uri(uri) .POST(java.net.http.HttpRequest.BodyPublishers.noBody()) .header("Content-Type", "application/json") .build(); java.net.http.HttpResponse response = client.send(request, java.net.http.HttpResponse.BodyHandlers.ofString()); String body = response.body(); int dataIdx = body.indexOf("\"data\""); int tokenIdx = body.indexOf("\"accessToken\""); if (dataIdx < 0 || tokenIdx < 0) { System.out.println("[-] Login response unexpected: " + body.substring(0, Math.min(200, body.length()))); return null; } // Extract accessToken value int colonStart = body.indexOf("\":", tokenIdx); if (colonStart < 0) return null; int quoteStart = body.indexOf("\"", colonStart + 2); int quoteEnd = body.indexOf("\"", quoteStart + 1); if (quoteStart < 0 || quoteEnd <= quoteStart) return null; String token = body.substring(quoteStart + 1, quoteEnd); System.out.println("[+] Proxy login successful, token: " + token.substring(0, Math.min(20, token.length())) + "..."); return token; } catch (Exception e) { System.out.println("[-] Proxy login failed: " + e.getMessage()); return null; } } public static void proxyImport(Path filePath, String proxyHost, String token) { try { java.io.File file = filePath.toFile(); String boundary = "----FormBoundary" + System.currentTimeMillis(); String boundaryLine = "--" + boundary; java.net.URI uri = java.net.URI.create(proxyHost + "/api/bill/import"); java.io.ByteArrayOutputStream out = new java.io.ByteArrayOutputStream(); java.io.OutputStream os = out; // Write file part os.write((boundaryLine + "\r\n").getBytes()); os.write(("Content-Disposition: form-data; name=\"file\"; filename=\"" + file.getName() + "\"\r\n").getBytes()); os.write("Content-Type: application/octet-stream\r\n\r\n".getBytes()); os.flush(); // Write file bytes try (java.io.FileInputStream fis = new java.io.FileInputStream(file)) { byte[] buf = new byte[8192]; int n; while ((n = fis.read(buf)) > 0) { os.write(buf, 0, n); } } os.flush(); // Write closing boundary os.write(("\r\n" + boundaryLine + "--\r\n").getBytes()); os.flush(); byte[] entityBytes = out.toByteArray(); java.net.http.HttpClient client = java.net.http.HttpClient.newBuilder() .connectTimeout(java.time.Duration.ofSeconds(30)) .build(); java.net.http.HttpRequest request = java.net.http.HttpRequest.newBuilder() .uri(uri) .header("Content-Type", "multipart/form-data; boundary=" + boundary) .header("authorization", token) .POST(java.net.http.HttpRequest.BodyPublishers.ofByteArray(entityBytes)) .build(); java.net.http.HttpResponse response = client.send(request, java.net.http.HttpResponse.BodyHandlers.ofString()); String body = response.body(); System.out.println("[+] Import response (" + response.statusCode() + "): " + body); } catch (Exception e) { System.out.println("[-] Import failed: " + e.getMessage()); e.printStackTrace(); } } }