fix: 修复导出下载逻辑 — 使用 onDownload + saveAs 替代 sleep
之前只 sleep(5000) 就关浏览器,不配置下载目录也不等待下载完成。 现在用 page.onDownload 捕获下载事件,saveAs 阻塞等待完成, 并校验文件大小防止空文件。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
f1d3036c7a
commit
b9e17a2588
Binary file not shown.
|
Before Width: | Height: | Size: 121 KiB |
@ -1,26 +1,21 @@
|
|||||||
package com.ets.scraper;
|
package com.ets.scraper;
|
||||||
|
|
||||||
import com.microsoft.playwright.*;
|
import com.microsoft.playwright.*;
|
||||||
|
import com.microsoft.playwright.Frame;
|
||||||
import com.microsoft.playwright.options.LoadState;
|
import com.microsoft.playwright.options.LoadState;
|
||||||
import com.microsoft.playwright.options.WaitUntilState;
|
import com.microsoft.playwright.options.WaitUntilState;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import javax.imageio.ImageIO;
|
||||||
import java.io.FileOutputStream;
|
import java.awt.*;
|
||||||
import java.io.IOException;
|
import java.awt.image.BufferedImage;
|
||||||
import java.io.InputStreamReader;
|
import java.io.*;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.awt.image.BufferedImage;
|
|
||||||
import java.awt.Image;
|
|
||||||
import java.awt.Graphics2D;
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.Base64;
|
import java.util.Base64;
|
||||||
import javax.imageio.ImageIO;
|
|
||||||
|
|
||||||
import static java.nio.file.Files.createDirectories;
|
import static java.nio.file.Files.createDirectories;
|
||||||
|
|
||||||
@ -41,7 +36,7 @@ public class EtsScraper {
|
|||||||
try {
|
try {
|
||||||
createDirectories(SCREENSHOT_DIR);
|
createDirectories(SCREENSHOT_DIR);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.err.println("Failed to create screenshots dir: " + e.getMessage());
|
System.err.println("Failed to create directories: " + e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
try (Playwright playwright = Playwright.create()) {
|
try (Playwright playwright = Playwright.create()) {
|
||||||
@ -91,6 +86,92 @@ public class EtsScraper {
|
|||||||
System.out.println("[+] Page title: " + page.title());
|
System.out.println("[+] Page title: " + page.title());
|
||||||
System.out.println("[+] Page URL: " + page.url());
|
System.out.println("[+] Page URL: " + page.url());
|
||||||
|
|
||||||
|
// 点击三联单菜单
|
||||||
|
System.out.println("[*] Clicking 三联单 menu...");
|
||||||
|
page.locator("#module_2094F683-C542-4904-B33E-0D227C4DE199").first().click();
|
||||||
|
sleep(3000);
|
||||||
|
|
||||||
|
screenshot(page, "after_sanliandan");
|
||||||
|
System.out.println("[+] 三联单 page title: " + page.title());
|
||||||
|
|
||||||
|
// 设置日期筛选:选择昨天的日期
|
||||||
|
java.time.LocalDate yesterday = java.time.LocalDate.now().minusDays(1);
|
||||||
|
String yesterdayStr = yesterday.format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd"));
|
||||||
|
System.out.println("[*] Setting date filter to yesterday: " + yesterdayStr);
|
||||||
|
|
||||||
|
// 检查元素是否存在
|
||||||
|
boolean startDateExists = page.locator("#Search_ThreeBillList_startWdate").count() > 0;
|
||||||
|
boolean endDateExists = page.locator("#Search_ThreeBillList_endWdate").count() > 0;
|
||||||
|
boolean queryBtnExists = page.locator("#Search_ThreeBillList_Button").count() > 0;
|
||||||
|
System.out.println("[*] Elements found - startDate: " + startDateExists + ", endDate: " + endDateExists + ", queryBtn: " + queryBtnExists);
|
||||||
|
|
||||||
|
// 直接设置日期值(WdatePicker 类型输入框)
|
||||||
|
if (startDateExists) {
|
||||||
|
System.out.println("[*] Setting start date to: " + yesterdayStr);
|
||||||
|
page.locator("#Search_ThreeBillList_startWdate").first().fill(yesterdayStr);
|
||||||
|
sleep(500);
|
||||||
|
} else {
|
||||||
|
System.out.println("[!] Start date element not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 设置结束日期(也用昨天的日期)
|
||||||
|
if (endDateExists) {
|
||||||
|
System.out.println("[*] Setting end date to: " + yesterdayStr);
|
||||||
|
page.locator("#Search_ThreeBillList_endWdate").first().fill(yesterdayStr);
|
||||||
|
sleep(500);
|
||||||
|
} else {
|
||||||
|
System.out.println("[!] End date element not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 点击查询按钮,等待列表加载
|
||||||
|
if (queryBtnExists) {
|
||||||
|
System.out.println("[*] Clicking query button...");
|
||||||
|
page.locator("#Search_ThreeBillList_Button").first().click();
|
||||||
|
|
||||||
|
// 等待列表内容出现
|
||||||
|
try {
|
||||||
|
page.waitForSelector("tbody tr", new Page.WaitForSelectorOptions()
|
||||||
|
.setTimeout(30000));
|
||||||
|
System.out.println("[+] Query completed, list loaded");
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.println("[!] Wait for list timeout, but query was submitted");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
System.out.println("[!] Query button not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
screenshot(page, "after_query");
|
||||||
|
|
||||||
|
// 点击导出按钮
|
||||||
|
boolean exportBtnExists = page.locator("#Export_ThreeBillList_Button").count() > 0;
|
||||||
|
if (exportBtnExists) {
|
||||||
|
System.out.println("[*] Clicking export button...");
|
||||||
|
Download[] downloadHolder = new Download[1];
|
||||||
|
page.onDownload((java.util.function.Consumer<Download>) download -> {
|
||||||
|
downloadHolder[0] = download;
|
||||||
|
});
|
||||||
|
page.locator("#Export_ThreeBillList_Button").first().click();
|
||||||
|
if (downloadHolder[0] != null) {
|
||||||
|
System.out.println("[*] Waiting for download to complete...");
|
||||||
|
Download dl = downloadHolder[0];
|
||||||
|
Path downloadPath = Path.of("downloads");
|
||||||
|
java.nio.file.Files.createDirectories(downloadPath);
|
||||||
|
Path savedFile = downloadPath.resolve(dl.suggestedFilename());
|
||||||
|
dl.saveAs(savedFile);
|
||||||
|
System.out.println("[+] Downloaded to: " + savedFile);
|
||||||
|
if (java.nio.file.Files.size(savedFile) == 0) {
|
||||||
|
System.out.println("[-] Downloaded file is empty");
|
||||||
|
} else {
|
||||||
|
System.out.println("[+] Download size: " + java.nio.file.Files.size(savedFile) + " bytes");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
System.out.println("[!] Export button not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
screenshot(page, "after_export");
|
||||||
|
System.out.println("[+] Query and export completed!");
|
||||||
|
|
||||||
String content = page.textContent("body");
|
String content = page.textContent("body");
|
||||||
if (content != null) {
|
if (content != null) {
|
||||||
String preview = content.length() > 500
|
String preview = content.length() > 500
|
||||||
@ -108,103 +189,8 @@ public class EtsScraper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean doLogin(Page page) {
|
|
||||||
// Find and fill username
|
|
||||||
String usernameInput = findInput(page, new String[]{
|
|
||||||
"input[placeholder*='用户名']",
|
|
||||||
"input[placeholder*='username']",
|
|
||||||
"input[placeholder*='账号']",
|
|
||||||
"input[name*='user']",
|
|
||||||
"input[name='username']",
|
|
||||||
"input[type='text']",
|
|
||||||
});
|
|
||||||
if (usernameInput == null) {
|
|
||||||
System.out.println("[-] Could not find username input");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find and fill password
|
|
||||||
String passwordInput = findInput(page, new String[]{
|
|
||||||
"input[placeholder*='密码']",
|
|
||||||
"input[placeholder*='password']",
|
|
||||||
"input[name*='pass']",
|
|
||||||
"input[name='password']",
|
|
||||||
"input[name='pwd']",
|
|
||||||
"input[type='password']",
|
|
||||||
});
|
|
||||||
if (passwordInput == null) {
|
|
||||||
System.out.println("[-] Could not find password input");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println("[*] Filling credentials...");
|
|
||||||
page.locator(usernameInput).first().fill(USERNAME);
|
|
||||||
page.locator(passwordInput).first().fill(PASSWORD);
|
|
||||||
sleep(500);
|
|
||||||
|
|
||||||
// Find and click submit, or press Enter
|
|
||||||
String submitBtn = findSubmit(page);
|
|
||||||
if (submitBtn != null) {
|
|
||||||
System.out.println("[*] Clicking submit button: " + submitBtn);
|
|
||||||
page.locator(submitBtn).first().click();
|
|
||||||
} else {
|
|
||||||
System.out.println("[*] No submit button found, pressing Enter");
|
|
||||||
page.locator(passwordInput).first().press("Enter");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
page.waitForLoadState(LoadState.DOMCONTENTLOADED,
|
|
||||||
new Page.WaitForLoadStateOptions().setTimeout(10000));
|
|
||||||
return true;
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.out.println("[!] Navigation timed out, but credentials were submitted");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean doLoginWithCaptcha(Page page) throws Exception {
|
public static boolean doLoginWithCaptcha(Page page) throws Exception {
|
||||||
// Find and fill username
|
// Recognize captcha first
|
||||||
String usernameInput = findInput(page, new String[]{
|
|
||||||
"input[placeholder*='用户名']",
|
|
||||||
"input[placeholder*='username']",
|
|
||||||
"input[placeholder*='账号']",
|
|
||||||
"input[name*='user']",
|
|
||||||
"input[name='username']",
|
|
||||||
"input[type='text']",
|
|
||||||
});
|
|
||||||
if (usernameInput == null) {
|
|
||||||
System.out.println("[-] Could not find username input");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find and fill password
|
|
||||||
String passwordInput = findInput(page, new String[]{
|
|
||||||
"input[placeholder*='密码']",
|
|
||||||
"input[placeholder*='password']",
|
|
||||||
"input[name*='pass']",
|
|
||||||
"input[name='password']",
|
|
||||||
"input[name='pwd']",
|
|
||||||
"input[type='password']",
|
|
||||||
});
|
|
||||||
if (passwordInput == null) {
|
|
||||||
System.out.println("[-] Could not find password input");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find and fill captcha
|
|
||||||
String captchaInput = findInput(page, new String[]{
|
|
||||||
"input[placeholder*='验证码']",
|
|
||||||
"input[placeholder*='captcha']",
|
|
||||||
"input[name*='captcha']",
|
|
||||||
"input[name='code']",
|
|
||||||
"input[type='text']",
|
|
||||||
});
|
|
||||||
if (captchaInput == null) {
|
|
||||||
System.out.println("[-] Could not find captcha input");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recognize captcha
|
|
||||||
Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png");
|
Path captchaPath = SCREENSHOT_DIR.resolve("captcha.png");
|
||||||
System.out.println("[*] Recognizing captcha with Ollama...");
|
System.out.println("[*] Recognizing captcha with Ollama...");
|
||||||
String captchaText = recognizeCaptcha(captchaPath);
|
String captchaText = recognizeCaptcha(captchaPath);
|
||||||
@ -214,21 +200,18 @@ public class EtsScraper {
|
|||||||
}
|
}
|
||||||
System.out.println("[+] Captcha recognized: " + captchaText);
|
System.out.println("[+] Captcha recognized: " + captchaText);
|
||||||
|
|
||||||
|
// Fill using correct ID selectors
|
||||||
System.out.println("[*] Filling credentials...");
|
System.out.println("[*] Filling credentials...");
|
||||||
page.locator(usernameInput).first().fill(USERNAME);
|
page.locator("#inputLoginUser").first().fill(USERNAME);
|
||||||
page.locator(passwordInput).first().fill(PASSWORD);
|
sleep(300);
|
||||||
page.locator(captchaInput).first().fill(captchaText);
|
page.locator("#inputLoginPassWord").first().fill(PASSWORD);
|
||||||
|
sleep(300);
|
||||||
|
page.locator("#txt_ValidatePic").first().fill(captchaText);
|
||||||
sleep(500);
|
sleep(500);
|
||||||
|
|
||||||
// Click submit or press Enter
|
// Click submit button
|
||||||
String submitBtn = findSubmit(page);
|
System.out.println("[*] Clicking login button...");
|
||||||
if (submitBtn != null) {
|
page.locator("#inputLoginButton").first().click();
|
||||||
System.out.println("[*] Clicking submit button: " + submitBtn);
|
|
||||||
page.locator(submitBtn).first().click();
|
|
||||||
} else {
|
|
||||||
System.out.println("[*] No submit button found, pressing Enter");
|
|
||||||
page.locator(captchaInput).first().press("Enter");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
page.waitForLoadState(LoadState.DOMCONTENTLOADED,
|
page.waitForLoadState(LoadState.DOMCONTENTLOADED,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user