fix: 修复验证码识别模块
- 添加图像放大功能 (缩放到至少 200px 宽度) - 修复流式响应解析 (逐行解析 JSON,累加 content 字段) - 添加 Graphics2D 导入用于图像缩放 验证结果:验证码 9347 识别成功
This commit is contained in:
parent
6abec1860b
commit
0bd4065230
@ -11,6 +11,8 @@ import java.io.InputStreamReader;
|
|||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.awt.image.BufferedImage;
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.awt.Image;
|
||||||
|
import java.awt.Graphics2D;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@ -92,7 +94,7 @@ public class EtsScraper {
|
|||||||
String content = page.textContent("body");
|
String content = page.textContent("body");
|
||||||
if (content != null) {
|
if (content != null) {
|
||||||
String preview = content.length() > 500
|
String preview = content.length() > 500
|
||||||
? content.substring(0, 500) + "..."
|
? content.substring(0, 500) + ".."
|
||||||
: content;
|
: content;
|
||||||
System.out.println("[+] Page content preview:\n" + preview);
|
System.out.println("[+] Page content preview:\n" + preview);
|
||||||
}
|
}
|
||||||
@ -160,7 +162,6 @@ public class EtsScraper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static boolean doLoginWithCaptcha(Page page) throws Exception {
|
private static boolean doLoginWithCaptcha(Page page) throws Exception {
|
||||||
// Find and fill username
|
// Find and fill username
|
||||||
String usernameInput = findInput(page, new String[]{
|
String usernameInput = findInput(page, new String[]{
|
||||||
@ -238,6 +239,7 @@ public class EtsScraper {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void downloadCaptcha(Page page) {
|
private static void downloadCaptcha(Page page) {
|
||||||
try {
|
try {
|
||||||
// Set up listener FIRST, then reload to trigger the request
|
// Set up listener FIRST, then reload to trigger the request
|
||||||
@ -350,16 +352,28 @@ public class EtsScraper {
|
|||||||
public static String recognizeCaptcha(Path imagePath) throws Exception {
|
public static String recognizeCaptcha(Path imagePath) throws Exception {
|
||||||
byte[] imageBytes = Files.readAllBytes(imagePath);
|
byte[] imageBytes = Files.readAllBytes(imagePath);
|
||||||
|
|
||||||
// Convert GIF to PNG (Ollama doesn't support GIF)
|
// Convert GIF to PNG and resize (Ollama qwen3-vl needs larger PNG images)
|
||||||
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
|
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
|
||||||
BufferedImage gifImage = ImageIO.read(bais);
|
BufferedImage srcImage = ImageIO.read(bais);
|
||||||
if (gifImage == null) {
|
if (srcImage == null) {
|
||||||
// Fallback: send raw bytes if conversion fails
|
|
||||||
String base64 = Base64.getEncoder().encodeToString(imageBytes);
|
String base64 = Base64.getEncoder().encodeToString(imageBytes);
|
||||||
return callOllama(base64);
|
return callOllama(base64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Resize to at least 200px width for better recognition
|
||||||
|
int scale = Math.max(1, 200 / srcImage.getWidth());
|
||||||
|
if (scale < 1) scale = 1;
|
||||||
|
int newWidth = srcImage.getWidth() * scale;
|
||||||
|
int newHeight = srcImage.getHeight() * scale;
|
||||||
|
|
||||||
|
Image scaled = srcImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
|
||||||
|
BufferedImage resized = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
|
||||||
|
Graphics2D g2d = resized.createGraphics();
|
||||||
|
g2d.drawImage(scaled, 0, 0, null);
|
||||||
|
g2d.dispose();
|
||||||
|
|
||||||
ByteArrayOutputStream pngOut = new ByteArrayOutputStream();
|
ByteArrayOutputStream pngOut = new ByteArrayOutputStream();
|
||||||
ImageIO.write(gifImage, "png", pngOut);
|
ImageIO.write(resized, "png", pngOut);
|
||||||
byte[] pngBytes = pngOut.toByteArray();
|
byte[] pngBytes = pngOut.toByteArray();
|
||||||
String base64 = Base64.getEncoder().encodeToString(pngBytes);
|
String base64 = Base64.getEncoder().encodeToString(pngBytes);
|
||||||
return callOllama(base64);
|
return callOllama(base64);
|
||||||
@ -371,7 +385,7 @@ public class EtsScraper {
|
|||||||
+ "\"messages\":["
|
+ "\"messages\":["
|
||||||
+ " {"
|
+ " {"
|
||||||
+ " \"role\":\"user\","
|
+ " \"role\":\"user\","
|
||||||
+ " \"content\":\"识别图中的验证码文字,只返回文字内容,不要有其他解释\","
|
+ " \"content\":\"识别图中的验证码文字,只返回文字内容\","
|
||||||
+ " \"images\":[\"" + base64Image + "\"]"
|
+ " \"images\":[\"" + base64Image + "\"]"
|
||||||
+ " }"
|
+ " }"
|
||||||
+ "]"
|
+ "]"
|
||||||
@ -381,7 +395,7 @@ public class EtsScraper {
|
|||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
conn.setRequestMethod("POST");
|
conn.setRequestMethod("POST");
|
||||||
conn.setConnectTimeout(15000);
|
conn.setConnectTimeout(15000);
|
||||||
conn.setReadTimeout(30000);
|
conn.setReadTimeout(60000);
|
||||||
conn.setDoOutput(true);
|
conn.setDoOutput(true);
|
||||||
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
|
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
|
||||||
|
|
||||||
@ -389,26 +403,45 @@ public class EtsScraper {
|
|||||||
conn.getOutputStream().flush();
|
conn.getOutputStream().flush();
|
||||||
conn.getOutputStream().close();
|
conn.getOutputStream().close();
|
||||||
|
|
||||||
try (BufferedReader reader = new BufferedReader(
|
BufferedReader reader = new BufferedReader(
|
||||||
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
new InputStreamReader(conn.getInputStream(), "utf-8"));
|
||||||
StringBuilder sb = new StringBuilder();
|
try {
|
||||||
|
StringBuilder fullContent = new StringBuilder();
|
||||||
String line;
|
String line;
|
||||||
while ((line = reader.readLine()) != null) {
|
while ((line = reader.readLine()) != null) {
|
||||||
sb.append(line);
|
// Parse each line as a separate JSON object (streaming response)
|
||||||
}
|
int contentIdx = line.indexOf("\"content\":");
|
||||||
String response = sb.toString();
|
|
||||||
// Parse "content":"..." from the JSON response
|
|
||||||
int contentIdx = response.indexOf("\"content\":");
|
|
||||||
if (contentIdx >= 0) {
|
if (contentIdx >= 0) {
|
||||||
int start = response.indexOf('"', contentIdx + 10) + 1;
|
int start = line.indexOf('"', contentIdx + 10) + 1;
|
||||||
int end = response.indexOf('"', start);
|
int end = line.indexOf('"', start);
|
||||||
if (start > 0 && end > start) {
|
if (start > 0 && end > start) {
|
||||||
return response.substring(start, end).trim();
|
fullContent.append(line.substring(start, end));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
// Check for done marker
|
||||||
|
if (line.contains("\"done\":true")) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return normalizeCaptcha(fullContent.toString());
|
||||||
} finally {
|
} finally {
|
||||||
|
if (reader != null) {
|
||||||
|
try {
|
||||||
|
reader.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
conn.disconnect();
|
conn.disconnect();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String normalizeCaptcha(String raw) {
|
||||||
|
if (raw == null || raw.isBlank()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
String s = raw.strip().replaceAll("\\s+", "");
|
||||||
|
s = s.replaceAll("^[`'\\\"]|[`'\\\"]+$", "");
|
||||||
|
return s;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@ -76,4 +76,15 @@ class EtsScraperTest {
|
|||||||
byte[] decoded = java.util.Base64.getDecoder().decode(base64);
|
byte[] decoded = java.util.Base64.getDecoder().decode(base64);
|
||||||
assertArrayEquals(imageBytes, decoded, "Base64 roundtrip should match original");
|
assertArrayEquals(imageBytes, decoded, "Base64 roundtrip should match original");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCaptchaRecognition() throws Exception {
|
||||||
|
Path captchaPath = Path.of("screenshots/captcha.png");
|
||||||
|
String captchaText = EtsScraper.recognizeCaptcha(captchaPath);
|
||||||
|
|
||||||
|
System.out.println("[+] Recognized captcha: " + captchaText);
|
||||||
|
assertNotNull(captchaText, "Captcha recognition should return a result");
|
||||||
|
assertFalse(captchaText.isEmpty(), "Captcha text should not be empty");
|
||||||
|
System.out.println("[+] Captcha length: " + captchaText.length() + " chars");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user