diff --git a/src/main/java/com/ets/scraper/EtsScraper.java b/src/main/java/com/ets/scraper/EtsScraper.java index bc36184..b1e0310 100644 --- a/src/main/java/com/ets/scraper/EtsScraper.java +++ b/src/main/java/com/ets/scraper/EtsScraper.java @@ -4,11 +4,17 @@ import com.microsoft.playwright.*; import com.microsoft.playwright.options.LoadState; import com.microsoft.playwright.options.WaitUntilState; +import java.io.BufferedReader; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; +import java.util.Base64; import static java.nio.file.Files.createDirectories; @@ -22,51 +28,53 @@ public class EtsScraper { private static final String USERNAME = "sccw"; private static final String PASSWORD = "slife@123"; private static final Path SCREENSHOT_DIR = Path.of("screenshots"); + private static final String OLLAMA_URL = "http://10.0.1.39:11434"; + private static final String OLLAMA_MODEL = "qwen3-vl:4b"; - public static void main(String[] args) { + public static void main(String[] args) throws Exception { try { createDirectories(SCREENSHOT_DIR); - } catch (Exception e) { + } catch (Exception e) { System.err.println("Failed to create screenshots dir: " + e.getMessage()); - } + } try (Playwright playwright = Playwright.create()) { Browser browser = playwright.chromium().launch( new BrowserType.LaunchOptions().setHeadless(false) - ); + ); BrowserContext context = browser.newContext( new Browser.NewContextOptions().setIgnoreHTTPSErrors(true) - ); + ); Page page = context.newPage(); try { - // Navigate to frame.html first to establish session/cookies + // Navigate to frame.html first to establish session/cookies System.out.println("[*] Establishing session via " + FRAME_URL); page.navigate(FRAME_URL, new Page.NavigateOptions() - .setTimeout(30000) - .setWaitUntil(WaitUntilState.DOMCONTENTLOADED)); + .setTimeout(30000) + .setWaitUntil(WaitUntilState.DOMCONTENTLOADED)); sleep(3000); - // Navigate directly to the login page + // Navigate directly to the login page System.out.println("[*] Navigating to login page: " + LOGIN_URL); page.navigate(LOGIN_URL, new Page.NavigateOptions() - .setTimeout(30000) - .setWaitUntil(WaitUntilState.NETWORKIDLE)); + .setTimeout(30000) + .setWaitUntil(WaitUntilState.NETWORKIDLE)); sleep(2000); - // Close notification dialog FIRST (before filling credentials) + // Close notification dialog FIRST (before filling credentials) closeNotificationDialog(page); screenshot(page, "after_close_dialog"); - // Download captcha image + // Download captcha image downloadCaptcha(page); - // Close dialog again after page reload + // Close dialog again after page reload closeNotificationDialog(page); - // Perform login - boolean loggedin = doLogin(page); + // Recognize captcha and perform login + boolean loggedin = doLoginWithCaptcha(page); if (loggedin) { System.out.println("[+] Login successful!"); @@ -80,131 +88,206 @@ public class EtsScraper { String content = page.textContent("body"); if (content != null) { String preview = content.length() > 500 - ? content.substring(0, 500) + "..." - : content; + ? content.substring(0, 500) + "..." + : content; System.out.println("[+] Page content preview:\n" + preview); - } - } else { + } + } else { System.out.println("[-] Login failed. Check screenshots/ for debugging."); screenshot(page, "login_failed"); - } - } finally { + } + } finally { browser.close(); - } - } - } + } + } + } private static boolean doLogin(Page page) { - // Find and fill username + // Find and fill username String usernameInput = findInput(page, new String[]{ - "input[placeholder*='用户名']", - "input[placeholder*='username']", - "input[placeholder*='账号']", - "input[name*='user']", - "input[name='username']", - "input[type='text']", - }); + "input[placeholder*='用户名']", + "input[placeholder*='username']", + "input[placeholder*='账号']", + "input[name*='user']", + "input[name='username']", + "input[type='text']", + }); if (usernameInput == null) { System.out.println("[-] Could not find username input"); return false; - } + } - // Find and fill password + // Find and fill password String passwordInput = findInput(page, new String[]{ - "input[placeholder*='密码']", - "input[placeholder*='password']", - "input[name*='pass']", - "input[name='password']", - "input[name='pwd']", - "input[type='password']", - }); + "input[placeholder*='密码']", + "input[placeholder*='password']", + "input[name*='pass']", + "input[name='password']", + "input[name='pwd']", + "input[type='password']", + }); if (passwordInput == null) { System.out.println("[-] Could not find password input"); return false; - } + } System.out.println("[*] Filling credentials..."); page.locator(usernameInput).first().fill(USERNAME); page.locator(passwordInput).first().fill(PASSWORD); sleep(500); - // Find and click submit, or press Enter + // Find and click submit, or press Enter String submitBtn = findSubmit(page); if (submitBtn != null) { System.out.println("[*] Clicking submit button: " + submitBtn); page.locator(submitBtn).first().click(); - } else { + } else { System.out.println("[*] No submit button found, pressing Enter"); page.locator(passwordInput).first().press("Enter"); - } + } try { page.waitForLoadState(LoadState.DOMCONTENTLOADED, new Page.WaitForLoadStateOptions().setTimeout(10000)); return true; - } catch (Exception e) { + } catch (Exception e) { System.out.println("[!] Navigation timed out, but credentials were submitted"); return true; - } - } + } + } + + + private static boolean doLoginWithCaptcha(Page page) throws Exception { + // Find and fill username + String usernameInput = findInput(page, new String[]{ + "input[placeholder*='用户名']", + "input[placeholder*='username']", + "input[placeholder*='账号']", + "input[name*='user']", + "input[name='username']", + "input[type='text']", + }); + if (usernameInput == null) { + System.out.println("[-] Could not find username input"); + return false; + } + + // Find and fill password + String passwordInput = findInput(page, new String[]{ + "input[placeholder*='密码']", + "input[placeholder*='password']", + "input[name*='pass']", + "input[name='password']", + "input[name='pwd']", + "input[type='password']", + }); + if (passwordInput == null) { + System.out.println("[-] Could not find password input"); + return false; + } + + // Find and fill captcha + String captchaInput = findInput(page, new String[]{ + "input[placeholder*='验证码']", + "input[placeholder*='captcha']", + "input[name*='captcha']", + "input[name='code']", + "input[type='text']", + }); + if (captchaInput == null) { + System.out.println("[-] Could not find captcha input"); + return false; + } + + // Recognize captcha + Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png"); + System.out.println("[*] Recognizing captcha with Ollama..."); + String captchaText = recognizeCaptcha(captchaPath); + if (captchaText == null || captchaText.isEmpty()) { + System.out.println("[-] Failed to recognize captcha"); + return false; + } + System.out.println("[+] Captcha recognized: " + captchaText); + + System.out.println("[*] Filling credentials..."); + page.locator(usernameInput).first().fill(USERNAME); + page.locator(passwordInput).first().fill(PASSWORD); + page.locator(captchaInput).first().fill(captchaText); + sleep(500); + + // Click submit or press Enter + String submitBtn = findSubmit(page); + if (submitBtn != null) { + System.out.println("[*] Clicking submit button: " + submitBtn); + page.locator(submitBtn).first().click(); + } else { + System.out.println("[*] No submit button found, pressing Enter"); + page.locator(captchaInput).first().press("Enter"); + } + + try { + page.waitForLoadState(LoadState.DOMCONTENTLOADED, + new Page.WaitForLoadStateOptions().setTimeout(10000)); + return true; + } catch (Exception e) { + System.out.println("[!] Navigation timed out, but credentials were submitted"); + return true; + } + } private static void downloadCaptcha(Page page) { try { - // Set up listener FIRST, then reload to trigger the request + // Set up listener FIRST, then reload to trigger the request Response resp = page.waitForResponse( - "https://101.227.180.215/SHCityEnvCW/Services/ValiDateImage.ashx*", - () -> { - page.reload(new Page.ReloadOptions() - .setWaitUntil(WaitUntilState.NETWORKIDLE) - .setTimeout(10000)); - } - ); + "https://101.227.180.215/SHCityEnvCW/Services/ValiDateImage.ashx*", + () -> { + page.reload(new Page.ReloadOptions() + .setWaitUntil(WaitUntilState.NETWORKIDLE) + .setTimeout(10000)); + } + ); if (resp != null) { byte[] body = resp.body(); Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png"); java.nio.file.Files.write(captchaPath, body); System.out.println("[+] Captcha saved to: " + captchaPath); System.out.println("[+] Captcha size: " + body.length + " bytes"); - } - } catch (Exception e) { + } + } catch (Exception e) { System.out.println("[-] Failed to download captcha: " + e.getMessage()); - } - } - - + } + } private static void closeNotificationDialog(Page page) { - // Find the frame that contains the notification dialog + // Find the frame that contains the notification dialog Frame dialogFrame = null; for (Frame f : page.frames()) { try { String hasDialog = (String) f.evaluate( - "() => document.getElementById('Div_GG_Box') ? 'FOUND' : 'NOT_HERE'"); + "() => document.getElementById('Div_GG_Box') ? 'FOUND' : 'NOT_HERE'"); if ("FOUND".equals(hasDialog)) { dialogFrame = f; break; - } - } catch (Exception ignored) { - } - } + } + } catch (Exception ignored) { + } + } if (dialogFrame == null) { System.out.println("[*] No notification dialog found"); return; - } + } System.out.println("[*] Closing notification dialog in frame: " + dialogFrame.url()); - // Click the X button in the correct frame + // Click the X button in the correct frame dialogFrame.locator(".green_popup_close").first().click(); sleep(500); - // Force hide via JS in the correct frame (onclick uses jQuery which may fail) + // Force hide via JS in the correct frame (onclick uses jQuery which may fail) dialogFrame.evaluate("document.getElementById('Div_GG_Box').style.display = 'none';"); sleep(500); System.out.println("[*] Notification dialog closed"); - } - - + } private static String findInput(Page page, String[] selectors) { for (String selector : selectors) { @@ -212,51 +295,101 @@ public class EtsScraper { if (page.locator(selector).first().isVisible( new Locator.IsVisibleOptions().setTimeout(1000))) { return selector; - } - } catch (Exception ignored) { - } - } + } + } catch (Exception ignored) { + } + } return null; - } + } private static String findSubmit(Page page) { String[] selectors = new String[]{ - "button[type='submit']", - "input[type='submit']", - "button:has-text('登录')", - "button:has-text('Login')", - ".login-btn", - "#loginBtn", - }; + "button[type='submit']", + "input[type='submit']", + "button:has-text('登录')", + "button:has-text('Login')", + ".login-btn", + "#loginBtn", + }; for (String selector : selectors) { try { if (page.locator(selector).first().isVisible( new Locator.IsVisibleOptions().setTimeout(1000))) { return selector; - } - } catch (Exception ignored) { - } - } + } + } catch (Exception ignored) { + } + } return null; - } + } private static void screenshot(Page page, String name) { try { String timestamp = LocalDateTime.now() - .format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + .format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); Path path = SCREENSHOT_DIR.resolve(name + "_" + timestamp + ".png"); page.screenshot(new Page.ScreenshotOptions().setPath(path)); System.out.println("[+] Screenshot saved: " + path); - } catch (Exception e) { + } catch (Exception e) { System.err.println("[-] Screenshot failed: " + e.getMessage()); - } - } + } + } private static void sleep(long ms) { try { Thread.sleep(ms); - } catch (InterruptedException e) { + } catch (InterruptedException e) { Thread.currentThread().interrupt(); - } - } -} + } + } + + public static String recognizeCaptcha(Path imagePath) throws Exception { + byte[] imageBytes = Files.readAllBytes(imagePath); + String base64 = Base64.getEncoder().encodeToString(imageBytes); + + String json = "{" + + "\"model\":\"" + OLLAMA_MODEL + "\"," + + "\"messages\":[" + + " {" + + " \"role\":\"user\"," + + " \"content\":\"识别图中的验证码文字,只返回文字内容,不要有其他解释\"," + + " \"images\":[\"" + base64 + "\"]" + + " }" + + "]" + + "}"; + + URL url = new URL(OLLAMA_URL + "/api/chat"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("POST"); + conn.setConnectTimeout(15000); + conn.setReadTimeout(30000); + conn.setDoOutput(true); + conn.setRequestProperty("Content-Type", "application/json; charset=utf-8"); + + conn.getOutputStream().write(json.getBytes("utf-8")); + conn.getOutputStream().flush(); + conn.getOutputStream().close(); + + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder sb = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) { + sb.append(line); + } + String response = sb.toString(); + // Parse "content":"..." from the JSON response + int contentIdx = response.indexOf("\"content\":"); + if (contentIdx >= 0) { + int start = response.indexOf('"', contentIdx + 10) + 1; + int end = response.indexOf('"', start); + if (start > 0 && end > start) { + return response.substring(start, end).trim(); + } + } + return null; + } finally { + conn.disconnect(); + } + } + }