Initial commit: ETS web scraper with Playwright
- Maven + Java 25 + Playwright 1.55.0 - Login automation for ETS construction waste management platform - Captcha download via waitForResponse - Notification dialog auto-close Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
46529e9de3
commit
882c1b9b38
9
.mvn/java-toolchains.xml
Normal file
9
.mvn/java-toolchains.xml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<toolchains>
|
||||||
|
<toolchain>
|
||||||
|
<type>jdk</type>
|
||||||
|
<version>25</version>
|
||||||
|
<id>25.0.2-graalce</id>
|
||||||
|
<path>/Users/niko/.sdkman/candidates/java/25.0.2-graalce</path>
|
||||||
|
</toolchain>
|
||||||
|
</toolchains>
|
||||||
47
pom.xml
Normal file
47
pom.xml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>com.ets</groupId>
|
||||||
|
<artifactId>ets-playwright</artifactId>
|
||||||
|
<version>1.0.0</version>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>25</maven.compiler.source>
|
||||||
|
<maven.compiler.target>25</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<playwright.version>1.55.0</playwright.version>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.microsoft.playwright</groupId>
|
||||||
|
<artifactId>playwright</artifactId>
|
||||||
|
<version>${playwright.version}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<version>3.14.0</version>
|
||||||
|
<configuration>
|
||||||
|
<release>25</release>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>exec-maven-plugin</artifactId>
|
||||||
|
<version>3.5.0</version>
|
||||||
|
<configuration>
|
||||||
|
<mainClass>com.ets.scraper.EtsScraper</mainClass>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
||||||
262
src/main/java/com/ets/scraper/EtsScraper.java
Normal file
262
src/main/java/com/ets/scraper/EtsScraper.java
Normal file
@ -0,0 +1,262 @@
|
|||||||
|
package com.ets.scraper;
|
||||||
|
|
||||||
|
import com.microsoft.playwright.*;
|
||||||
|
import com.microsoft.playwright.options.LoadState;
|
||||||
|
import com.microsoft.playwright.options.WaitUntilState;
|
||||||
|
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
import static java.nio.file.Files.createDirectories;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ETS (Construction Waste Management Platform) Web Scraper
|
||||||
|
* Uses Playwright to automate login and data extraction.
|
||||||
|
*/
|
||||||
|
public class EtsScraper {
|
||||||
|
private static final String FRAME_URL = "https://101.227.180.215/SHCityEnvCW/CWS/frame.html";
|
||||||
|
private static final String LOGIN_URL = "https://101.227.180.215/SHCityEnvCW/CWS/userlogin.html";
|
||||||
|
private static final String USERNAME = "sccw";
|
||||||
|
private static final String PASSWORD = "slife@123";
|
||||||
|
private static final Path SCREENSHOT_DIR = Path.of("screenshots");
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
try {
|
||||||
|
createDirectories(SCREENSHOT_DIR);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Failed to create screenshots dir: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
try (Playwright playwright = Playwright.create()) {
|
||||||
|
Browser browser = playwright.chromium().launch(
|
||||||
|
new BrowserType.LaunchOptions().setHeadless(false)
|
||||||
|
);
|
||||||
|
BrowserContext context = browser.newContext(
|
||||||
|
new Browser.NewContextOptions().setIgnoreHTTPSErrors(true)
|
||||||
|
);
|
||||||
|
Page page = context.newPage();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Navigate to frame.html first to establish session/cookies
|
||||||
|
System.out.println("[*] Establishing session via " + FRAME_URL);
|
||||||
|
page.navigate(FRAME_URL, new Page.NavigateOptions()
|
||||||
|
.setTimeout(30000)
|
||||||
|
.setWaitUntil(WaitUntilState.DOMCONTENTLOADED));
|
||||||
|
sleep(3000);
|
||||||
|
|
||||||
|
// Navigate directly to the login page
|
||||||
|
System.out.println("[*] Navigating to login page: " + LOGIN_URL);
|
||||||
|
page.navigate(LOGIN_URL, new Page.NavigateOptions()
|
||||||
|
.setTimeout(30000)
|
||||||
|
.setWaitUntil(WaitUntilState.NETWORKIDLE));
|
||||||
|
sleep(2000);
|
||||||
|
|
||||||
|
// Close notification dialog FIRST (before filling credentials)
|
||||||
|
closeNotificationDialog(page);
|
||||||
|
|
||||||
|
screenshot(page, "after_close_dialog");
|
||||||
|
|
||||||
|
// Download captcha image
|
||||||
|
downloadCaptcha(page);
|
||||||
|
|
||||||
|
// Close dialog again after page reload
|
||||||
|
closeNotificationDialog(page);
|
||||||
|
|
||||||
|
// Perform login
|
||||||
|
boolean loggedin = doLogin(page);
|
||||||
|
|
||||||
|
if (loggedin) {
|
||||||
|
System.out.println("[+] Login successful!");
|
||||||
|
sleep(2000);
|
||||||
|
|
||||||
|
screenshot(page, "after_login");
|
||||||
|
|
||||||
|
System.out.println("[+] Page title: " + page.title());
|
||||||
|
System.out.println("[+] Page URL: " + page.url());
|
||||||
|
|
||||||
|
String content = page.textContent("body");
|
||||||
|
if (content != null) {
|
||||||
|
String preview = content.length() > 500
|
||||||
|
? content.substring(0, 500) + "..."
|
||||||
|
: content;
|
||||||
|
System.out.println("[+] Page content preview:\n" + preview);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
System.out.println("[-] Login failed. Check screenshots/ for debugging.");
|
||||||
|
screenshot(page, "login_failed");
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean doLogin(Page page) {
|
||||||
|
// Find and fill username
|
||||||
|
String usernameInput = findInput(page, new String[]{
|
||||||
|
"input[placeholder*='用户名']",
|
||||||
|
"input[placeholder*='username']",
|
||||||
|
"input[placeholder*='账号']",
|
||||||
|
"input[name*='user']",
|
||||||
|
"input[name='username']",
|
||||||
|
"input[type='text']",
|
||||||
|
});
|
||||||
|
if (usernameInput == null) {
|
||||||
|
System.out.println("[-] Could not find username input");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find and fill password
|
||||||
|
String passwordInput = findInput(page, new String[]{
|
||||||
|
"input[placeholder*='密码']",
|
||||||
|
"input[placeholder*='password']",
|
||||||
|
"input[name*='pass']",
|
||||||
|
"input[name='password']",
|
||||||
|
"input[name='pwd']",
|
||||||
|
"input[type='password']",
|
||||||
|
});
|
||||||
|
if (passwordInput == null) {
|
||||||
|
System.out.println("[-] Could not find password input");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("[*] Filling credentials...");
|
||||||
|
page.locator(usernameInput).first().fill(USERNAME);
|
||||||
|
page.locator(passwordInput).first().fill(PASSWORD);
|
||||||
|
sleep(500);
|
||||||
|
|
||||||
|
// Find and click submit, or press Enter
|
||||||
|
String submitBtn = findSubmit(page);
|
||||||
|
if (submitBtn != null) {
|
||||||
|
System.out.println("[*] Clicking submit button: " + submitBtn);
|
||||||
|
page.locator(submitBtn).first().click();
|
||||||
|
} else {
|
||||||
|
System.out.println("[*] No submit button found, pressing Enter");
|
||||||
|
page.locator(passwordInput).first().press("Enter");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
page.waitForLoadState(LoadState.DOMCONTENTLOADED,
|
||||||
|
new Page.WaitForLoadStateOptions().setTimeout(10000));
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.println("[!] Navigation timed out, but credentials were submitted");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private static void downloadCaptcha(Page page) {
|
||||||
|
try {
|
||||||
|
// Set up listener FIRST, then reload to trigger the request
|
||||||
|
Response resp = page.waitForResponse(
|
||||||
|
"https://101.227.180.215/SHCityEnvCW/Services/ValiDateImage.ashx*",
|
||||||
|
() -> {
|
||||||
|
page.reload(new Page.ReloadOptions()
|
||||||
|
.setWaitUntil(WaitUntilState.NETWORKIDLE)
|
||||||
|
.setTimeout(10000));
|
||||||
|
}
|
||||||
|
);
|
||||||
|
if (resp != null) {
|
||||||
|
byte[] body = resp.body();
|
||||||
|
Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png");
|
||||||
|
java.nio.file.Files.write(captchaPath, body);
|
||||||
|
System.out.println("[+] Captcha saved to: " + captchaPath);
|
||||||
|
System.out.println("[+] Captcha size: " + body.length + " bytes");
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.println("[-] Failed to download captcha: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private static void closeNotificationDialog(Page page) {
|
||||||
|
// Find the frame that contains the notification dialog
|
||||||
|
Frame dialogFrame = null;
|
||||||
|
for (Frame f : page.frames()) {
|
||||||
|
try {
|
||||||
|
String hasDialog = (String) f.evaluate(
|
||||||
|
"() => document.getElementById('Div_GG_Box') ? 'FOUND' : 'NOT_HERE'");
|
||||||
|
if ("FOUND".equals(hasDialog)) {
|
||||||
|
dialogFrame = f;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dialogFrame == null) {
|
||||||
|
System.out.println("[*] No notification dialog found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("[*] Closing notification dialog in frame: " + dialogFrame.url());
|
||||||
|
// Click the X button in the correct frame
|
||||||
|
dialogFrame.locator(".green_popup_close").first().click();
|
||||||
|
sleep(500);
|
||||||
|
|
||||||
|
// Force hide via JS in the correct frame (onclick uses jQuery which may fail)
|
||||||
|
dialogFrame.evaluate("document.getElementById('Div_GG_Box').style.display = 'none';");
|
||||||
|
sleep(500);
|
||||||
|
|
||||||
|
System.out.println("[*] Notification dialog closed");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private static String findInput(Page page, String[] selectors) {
|
||||||
|
for (String selector : selectors) {
|
||||||
|
try {
|
||||||
|
if (page.locator(selector).first().isVisible(
|
||||||
|
new Locator.IsVisibleOptions().setTimeout(1000))) {
|
||||||
|
return selector;
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String findSubmit(Page page) {
|
||||||
|
String[] selectors = new String[]{
|
||||||
|
"button[type='submit']",
|
||||||
|
"input[type='submit']",
|
||||||
|
"button:has-text('登录')",
|
||||||
|
"button:has-text('Login')",
|
||||||
|
".login-btn",
|
||||||
|
"#loginBtn",
|
||||||
|
};
|
||||||
|
for (String selector : selectors) {
|
||||||
|
try {
|
||||||
|
if (page.locator(selector).first().isVisible(
|
||||||
|
new Locator.IsVisibleOptions().setTimeout(1000))) {
|
||||||
|
return selector;
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void screenshot(Page page, String name) {
|
||||||
|
try {
|
||||||
|
String timestamp = LocalDateTime.now()
|
||||||
|
.format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
|
||||||
|
Path path = SCREENSHOT_DIR.resolve(name + "_" + timestamp + ".png");
|
||||||
|
page.screenshot(new Page.ScreenshotOptions().setPath(path));
|
||||||
|
System.out.println("[+] Screenshot saved: " + path);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("[-] Screenshot failed: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void sleep(long ms) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(ms);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user