fix: 修复验证码识别模块
- 添加图像放大功能 (缩放到至少 200px 宽度) - 修复流式响应解析 (逐行解析 JSON,累加 content 字段) - 添加 Graphics2D 导入用于图像缩放 验证结果:验证码 9347 识别成功
This commit is contained in:
parent
6abec1860b
commit
0bd4065230
@ -11,6 +11,8 @@ import java.io.InputStreamReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.Image;
|
||||
import java.awt.Graphics2D;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.file.Files;
|
||||
@ -92,7 +94,7 @@ public class EtsScraper {
|
||||
String content = page.textContent("body");
|
||||
if (content != null) {
|
||||
String preview = content.length() > 500
|
||||
? content.substring(0, 500) + "..."
|
||||
? content.substring(0, 500) + ".."
|
||||
: content;
|
||||
System.out.println("[+] Page content preview:\n" + preview);
|
||||
}
|
||||
@ -160,7 +162,6 @@ public class EtsScraper {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static boolean doLoginWithCaptcha(Page page) throws Exception {
|
||||
// Find and fill username
|
||||
String usernameInput = findInput(page, new String[]{
|
||||
@ -238,6 +239,7 @@ public class EtsScraper {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private static void downloadCaptcha(Page page) {
|
||||
try {
|
||||
// Set up listener FIRST, then reload to trigger the request
|
||||
@ -350,16 +352,28 @@ public class EtsScraper {
|
||||
public static String recognizeCaptcha(Path imagePath) throws Exception {
|
||||
byte[] imageBytes = Files.readAllBytes(imagePath);
|
||||
|
||||
// Convert GIF to PNG (Ollama doesn't support GIF)
|
||||
// Convert GIF to PNG and resize (Ollama qwen3-vl needs larger PNG images)
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
|
||||
BufferedImage gifImage = ImageIO.read(bais);
|
||||
if (gifImage == null) {
|
||||
// Fallback: send raw bytes if conversion fails
|
||||
BufferedImage srcImage = ImageIO.read(bais);
|
||||
if (srcImage == null) {
|
||||
String base64 = Base64.getEncoder().encodeToString(imageBytes);
|
||||
return callOllama(base64);
|
||||
}
|
||||
|
||||
// Resize to at least 200px width for better recognition
|
||||
int scale = Math.max(1, 200 / srcImage.getWidth());
|
||||
if (scale < 1) scale = 1;
|
||||
int newWidth = srcImage.getWidth() * scale;
|
||||
int newHeight = srcImage.getHeight() * scale;
|
||||
|
||||
Image scaled = srcImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
|
||||
BufferedImage resized = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
|
||||
Graphics2D g2d = resized.createGraphics();
|
||||
g2d.drawImage(scaled, 0, 0, null);
|
||||
g2d.dispose();
|
||||
|
||||
ByteArrayOutputStream pngOut = new ByteArrayOutputStream();
|
||||
ImageIO.write(gifImage, "png", pngOut);
|
||||
ImageIO.write(resized, "png", pngOut);
|
||||
byte[] pngBytes = pngOut.toByteArray();
|
||||
String base64 = Base64.getEncoder().encodeToString(pngBytes);
|
||||
return callOllama(base64);
|
||||
@ -371,7 +385,7 @@ public class EtsScraper {
|
||||
+ "\"messages\":["
|
||||
+ " {"
|
||||
+ " \"role\":\"user\","
|
||||
+ " \"content\":\"识别图中的验证码文字,只返回文字内容,不要有其他解释\","
|
||||
+ " \"content\":\"识别图中的验证码文字,只返回文字内容\","
|
||||
+ " \"images\":[\"" + base64Image + "\"]"
|
||||
+ " }"
|
||||
+ "]"
|
||||
@ -381,7 +395,7 @@ public class EtsScraper {
|
||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||
conn.setRequestMethod("POST");
|
||||
conn.setConnectTimeout(15000);
|
||||
conn.setReadTimeout(30000);
|
||||
conn.setReadTimeout(60000);
|
||||
conn.setDoOutput(true);
|
||||
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
|
||||
|
||||
@ -389,26 +403,45 @@ public class EtsScraper {
|
||||
conn.getOutputStream().flush();
|
||||
conn.getOutputStream().close();
|
||||
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(conn.getInputStream(), "utf-8"));
|
||||
try {
|
||||
StringBuilder fullContent = new StringBuilder();
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
sb.append(line);
|
||||
}
|
||||
String response = sb.toString();
|
||||
// Parse "content":"..." from the JSON response
|
||||
int contentIdx = response.indexOf("\"content\":");
|
||||
// Parse each line as a separate JSON object (streaming response)
|
||||
int contentIdx = line.indexOf("\"content\":");
|
||||
if (contentIdx >= 0) {
|
||||
int start = response.indexOf('"', contentIdx + 10) + 1;
|
||||
int end = response.indexOf('"', start);
|
||||
int start = line.indexOf('"', contentIdx + 10) + 1;
|
||||
int end = line.indexOf('"', start);
|
||||
if (start > 0 && end > start) {
|
||||
return response.substring(start, end).trim();
|
||||
fullContent.append(line.substring(start, end));
|
||||
}
|
||||
}
|
||||
return null;
|
||||
// Check for done marker
|
||||
if (line.contains("\"done\":true")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return normalizeCaptcha(fullContent.toString());
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
conn.disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
private static String normalizeCaptcha(String raw) {
|
||||
if (raw == null || raw.isBlank()) {
|
||||
return "";
|
||||
}
|
||||
String s = raw.strip().replaceAll("\\s+", "");
|
||||
s = s.replaceAll("^[`'\\\"]|[`'\\\"]+$", "");
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,4 +76,15 @@ class EtsScraperTest {
|
||||
byte[] decoded = java.util.Base64.getDecoder().decode(base64);
|
||||
assertArrayEquals(imageBytes, decoded, "Base64 roundtrip should match original");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCaptchaRecognition() throws Exception {
|
||||
Path captchaPath = Path.of("screenshots/captcha.png");
|
||||
String captchaText = EtsScraper.recognizeCaptcha(captchaPath);
|
||||
|
||||
System.out.println("[+] Recognized captcha: " + captchaText);
|
||||
assertNotNull(captchaText, "Captcha recognition should return a result");
|
||||
assertFalse(captchaText.isEmpty(), "Captcha text should not be empty");
|
||||
System.out.println("[+] Captcha length: " + captchaText.length() + " chars");
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user