From 882c1b9b38cbbe93b129ffcc5a39e65f2ce908d8 Mon Sep 17 00:00:00 2001 From: Niko <1377382065@qq.com> Date: Mon, 4 May 2026 23:48:20 +0800 Subject: [PATCH] Initial commit: ETS web scraper with Playwright - Maven + Java 25 + Playwright 1.55.0 - Login automation for ETS construction waste management platform - Captcha download via waitForResponse - Notification dialog auto-close Co-Authored-By: Claude Opus 4.7 --- .mvn/java-toolchains.xml | 9 + pom.xml | 47 ++++ src/main/java/com/ets/scraper/EtsScraper.java | 262 ++++++++++++++++++ 3 files changed, 318 insertions(+) create mode 100644 .mvn/java-toolchains.xml create mode 100644 pom.xml create mode 100644 src/main/java/com/ets/scraper/EtsScraper.java diff --git a/.mvn/java-toolchains.xml b/.mvn/java-toolchains.xml new file mode 100644 index 0000000..e2e6ea3 --- /dev/null +++ b/.mvn/java-toolchains.xml @@ -0,0 +1,9 @@ + + + + jdk + 25 + 25.0.2-graalce + /Users/niko/.sdkman/candidates/java/25.0.2-graalce + + diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..9390090 --- /dev/null +++ b/pom.xml @@ -0,0 +1,47 @@ + + + 4.0.0 + + com.ets + ets-playwright + 1.0.0 + jar + + + 25 + 25 + UTF-8 + 1.55.0 + + + + + com.microsoft.playwright + playwright + ${playwright.version} + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.14.0 + + 25 + + + + org.codehaus.mojo + exec-maven-plugin + 3.5.0 + + com.ets.scraper.EtsScraper + + + + + diff --git a/src/main/java/com/ets/scraper/EtsScraper.java b/src/main/java/com/ets/scraper/EtsScraper.java new file mode 100644 index 0000000..bc36184 --- /dev/null +++ b/src/main/java/com/ets/scraper/EtsScraper.java @@ -0,0 +1,262 @@ +package com.ets.scraper; + +import com.microsoft.playwright.*; +import com.microsoft.playwright.options.LoadState; +import com.microsoft.playwright.options.WaitUntilState; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +import static java.nio.file.Files.createDirectories; + +/** + * ETS (Construction Waste Management Platform) Web Scraper + * Uses Playwright to automate login and data extraction. + */ +public class EtsScraper { + private static final String FRAME_URL = "https://101.227.180.215/SHCityEnvCW/CWS/frame.html"; + private static final String LOGIN_URL = "https://101.227.180.215/SHCityEnvCW/CWS/userlogin.html"; + private static final String USERNAME = "sccw"; + private static final String PASSWORD = "slife@123"; + private static final Path SCREENSHOT_DIR = Path.of("screenshots"); + + public static void main(String[] args) { + try { + createDirectories(SCREENSHOT_DIR); + } catch (Exception e) { + System.err.println("Failed to create screenshots dir: " + e.getMessage()); + } + + try (Playwright playwright = Playwright.create()) { + Browser browser = playwright.chromium().launch( + new BrowserType.LaunchOptions().setHeadless(false) + ); + BrowserContext context = browser.newContext( + new Browser.NewContextOptions().setIgnoreHTTPSErrors(true) + ); + Page page = context.newPage(); + + try { + // Navigate to frame.html first to establish session/cookies + System.out.println("[*] Establishing session via " + FRAME_URL); + page.navigate(FRAME_URL, new Page.NavigateOptions() + .setTimeout(30000) + .setWaitUntil(WaitUntilState.DOMCONTENTLOADED)); + sleep(3000); + + // Navigate directly to the login page + System.out.println("[*] Navigating to login page: " + LOGIN_URL); + page.navigate(LOGIN_URL, new Page.NavigateOptions() + .setTimeout(30000) + .setWaitUntil(WaitUntilState.NETWORKIDLE)); + sleep(2000); + + // Close notification dialog FIRST (before filling credentials) + closeNotificationDialog(page); + + screenshot(page, "after_close_dialog"); + + // Download captcha image + downloadCaptcha(page); + + // Close dialog again after page reload + closeNotificationDialog(page); + + // Perform login + boolean loggedin = doLogin(page); + + if (loggedin) { + System.out.println("[+] Login successful!"); + sleep(2000); + + screenshot(page, "after_login"); + + System.out.println("[+] Page title: " + page.title()); + System.out.println("[+] Page URL: " + page.url()); + + String content = page.textContent("body"); + if (content != null) { + String preview = content.length() > 500 + ? content.substring(0, 500) + "..." + : content; + System.out.println("[+] Page content preview:\n" + preview); + } + } else { + System.out.println("[-] Login failed. Check screenshots/ for debugging."); + screenshot(page, "login_failed"); + } + } finally { + browser.close(); + } + } + } + + private static boolean doLogin(Page page) { + // Find and fill username + String usernameInput = findInput(page, new String[]{ + "input[placeholder*='用户名']", + "input[placeholder*='username']", + "input[placeholder*='账号']", + "input[name*='user']", + "input[name='username']", + "input[type='text']", + }); + if (usernameInput == null) { + System.out.println("[-] Could not find username input"); + return false; + } + + // Find and fill password + String passwordInput = findInput(page, new String[]{ + "input[placeholder*='密码']", + "input[placeholder*='password']", + "input[name*='pass']", + "input[name='password']", + "input[name='pwd']", + "input[type='password']", + }); + if (passwordInput == null) { + System.out.println("[-] Could not find password input"); + return false; + } + + System.out.println("[*] Filling credentials..."); + page.locator(usernameInput).first().fill(USERNAME); + page.locator(passwordInput).first().fill(PASSWORD); + sleep(500); + + // Find and click submit, or press Enter + String submitBtn = findSubmit(page); + if (submitBtn != null) { + System.out.println("[*] Clicking submit button: " + submitBtn); + page.locator(submitBtn).first().click(); + } else { + System.out.println("[*] No submit button found, pressing Enter"); + page.locator(passwordInput).first().press("Enter"); + } + + try { + page.waitForLoadState(LoadState.DOMCONTENTLOADED, + new Page.WaitForLoadStateOptions().setTimeout(10000)); + return true; + } catch (Exception e) { + System.out.println("[!] Navigation timed out, but credentials were submitted"); + return true; + } + } + private static void downloadCaptcha(Page page) { + try { + // Set up listener FIRST, then reload to trigger the request + Response resp = page.waitForResponse( + "https://101.227.180.215/SHCityEnvCW/Services/ValiDateImage.ashx*", + () -> { + page.reload(new Page.ReloadOptions() + .setWaitUntil(WaitUntilState.NETWORKIDLE) + .setTimeout(10000)); + } + ); + if (resp != null) { + byte[] body = resp.body(); + Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png"); + java.nio.file.Files.write(captchaPath, body); + System.out.println("[+] Captcha saved to: " + captchaPath); + System.out.println("[+] Captcha size: " + body.length + " bytes"); + } + } catch (Exception e) { + System.out.println("[-] Failed to download captcha: " + e.getMessage()); + } + } + + + + private static void closeNotificationDialog(Page page) { + // Find the frame that contains the notification dialog + Frame dialogFrame = null; + for (Frame f : page.frames()) { + try { + String hasDialog = (String) f.evaluate( + "() => document.getElementById('Div_GG_Box') ? 'FOUND' : 'NOT_HERE'"); + if ("FOUND".equals(hasDialog)) { + dialogFrame = f; + break; + } + } catch (Exception ignored) { + } + } + + if (dialogFrame == null) { + System.out.println("[*] No notification dialog found"); + return; + } + + System.out.println("[*] Closing notification dialog in frame: " + dialogFrame.url()); + // Click the X button in the correct frame + dialogFrame.locator(".green_popup_close").first().click(); + sleep(500); + + // Force hide via JS in the correct frame (onclick uses jQuery which may fail) + dialogFrame.evaluate("document.getElementById('Div_GG_Box').style.display = 'none';"); + sleep(500); + + System.out.println("[*] Notification dialog closed"); + } + + + + private static String findInput(Page page, String[] selectors) { + for (String selector : selectors) { + try { + if (page.locator(selector).first().isVisible( + new Locator.IsVisibleOptions().setTimeout(1000))) { + return selector; + } + } catch (Exception ignored) { + } + } + return null; + } + + private static String findSubmit(Page page) { + String[] selectors = new String[]{ + "button[type='submit']", + "input[type='submit']", + "button:has-text('登录')", + "button:has-text('Login')", + ".login-btn", + "#loginBtn", + }; + for (String selector : selectors) { + try { + if (page.locator(selector).first().isVisible( + new Locator.IsVisibleOptions().setTimeout(1000))) { + return selector; + } + } catch (Exception ignored) { + } + } + return null; + } + + private static void screenshot(Page page, String name) { + try { + String timestamp = LocalDateTime.now() + .format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + Path path = SCREENSHOT_DIR.resolve(name + "_" + timestamp + ".png"); + page.screenshot(new Page.ScreenshotOptions().setPath(path)); + System.out.println("[+] Screenshot saved: " + path); + } catch (Exception e) { + System.err.println("[-] Screenshot failed: " + e.getMessage()); + } + } + + private static void sleep(long ms) { + try { + Thread.sleep(ms); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +}