diff --git a/.cp b/.cp new file mode 100644 index 0000000..b430eca --- /dev/null +++ b/.cp @@ -0,0 +1 @@ +/Users/niko/.m2/repository/com/microsoft/playwright/playwright/1.55.0/playwright-1.55.0.jar:/Users/niko/.m2/repository/com/google/code/gson/gson/2.12.1/gson-2.12.1.jar:/Users/niko/.m2/repository/com/google/errorprone/error_prone_annotations/2.36.0/error_prone_annotations-2.36.0.jar:/Users/niko/.m2/repository/org/opentest4j/opentest4j/1.3.0/opentest4j-1.3.0.jar:/Users/niko/.m2/repository/com/microsoft/playwright/driver/1.55.0/driver-1.55.0.jar:/Users/niko/.m2/repository/com/microsoft/playwright/driver-bundle/1.55.0/driver-bundle-1.55.0.jar \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..30cf57e --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..6f261af --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..712ab9d --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..1e72725 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,13 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 9390090..2edb223 100644 --- a/pom.xml +++ b/pom.xml @@ -22,6 +22,12 @@ playwright ${playwright.version} + + org.junit.jupiter + junit-jupiter + 5.12.1 + test + @@ -42,6 +48,11 @@ com.ets.scraper.EtsScraper + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.2 + diff --git a/screenshots/after_close_dialog_20260505_000115.png b/screenshots/after_close_dialog_20260505_000115.png new file mode 100644 index 0000000..993f8f1 Binary files /dev/null and b/screenshots/after_close_dialog_20260505_000115.png differ diff --git a/screenshots/captcha.png b/screenshots/captcha.png new file mode 100644 index 0000000..0cbd727 Binary files /dev/null and b/screenshots/captcha.png differ diff --git a/src/main/java/com/ets/scraper/EtsScraper.java b/src/main/java/com/ets/scraper/EtsScraper.java index b1e0310..c605772 100644 --- a/src/main/java/com/ets/scraper/EtsScraper.java +++ b/src/main/java/com/ets/scraper/EtsScraper.java @@ -10,11 +10,15 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.Base64; +import javax.imageio.ImageIO; import static java.nio.file.Files.createDirectories; @@ -345,18 +349,33 @@ public class EtsScraper { public static String recognizeCaptcha(Path imagePath) throws Exception { byte[] imageBytes = Files.readAllBytes(imagePath); - String base64 = Base64.getEncoder().encodeToString(imageBytes); + // Convert GIF to PNG (Ollama doesn't support GIF) + ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes); + BufferedImage gifImage = ImageIO.read(bais); + if (gifImage == null) { + // Fallback: send raw bytes if conversion fails + String base64 = Base64.getEncoder().encodeToString(imageBytes); + return callOllama(base64); + } + ByteArrayOutputStream pngOut = new ByteArrayOutputStream(); + ImageIO.write(gifImage, "png", pngOut); + byte[] pngBytes = pngOut.toByteArray(); + String base64 = Base64.getEncoder().encodeToString(pngBytes); + return callOllama(base64); + } + + private static String callOllama(String base64Image) throws Exception { String json = "{" - + "\"model\":\"" + OLLAMA_MODEL + "\"," - + "\"messages\":[" - + " {" - + " \"role\":\"user\"," - + " \"content\":\"识别图中的验证码文字,只返回文字内容,不要有其他解释\"," - + " \"images\":[\"" + base64 + "\"]" - + " }" - + "]" - + "}"; + + "\"model\":\"" + OLLAMA_MODEL + "\"," + + "\"messages\":[" + + " {" + + " \"role\":\"user\"," + + " \"content\":\"识别图中的验证码文字,只返回文字内容,不要有其他解释\"," + + " \"images\":[\"" + base64Image + "\"]" + + " }" + + "]" + + "}"; URL url = new URL(OLLAMA_URL + "/api/chat"); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); @@ -376,20 +395,20 @@ public class EtsScraper { String line; while ((line = reader.readLine()) != null) { sb.append(line); - } + } String response = sb.toString(); - // Parse "content":"..." from the JSON response + // Parse "content":"..." from the JSON response int contentIdx = response.indexOf("\"content\":"); if (contentIdx >= 0) { int start = response.indexOf('"', contentIdx + 10) + 1; int end = response.indexOf('"', start); if (start > 0 && end > start) { return response.substring(start, end).trim(); - } - } + } + } return null; - } finally { + } finally { conn.disconnect(); - } - } - } + } + } + } \ No newline at end of file diff --git a/src/test/java/com/ets/scraper/EtsScraperTest.java b/src/test/java/com/ets/scraper/EtsScraperTest.java new file mode 100644 index 0000000..194b414 --- /dev/null +++ b/src/test/java/com/ets/scraper/EtsScraperTest.java @@ -0,0 +1,79 @@ +package com.ets.scraper; + +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class EtsScraperTest { + + @TempDir + Path tempDir; + + @Test + void testGifToPngConversion() throws Exception { + Path gifPath = tempDir.resolve("captcha.png"); + // Copy the actual captcha (GIF stored as .png) to temp + Files.copy( + Path.of("screenshots/captcha.png").toAbsolutePath(), + gifPath + ); + + byte[] imageBytes = Files.readAllBytes(gifPath); + ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes); + BufferedImage gifImage = ImageIO.read(bais); + + assertNotNull(gifImage, "GIF should be readable by ImageIO"); + assertTrue(gifImage.getWidth() > 0, "Image should have positive width"); + + ByteArrayOutputStream pngOut = new ByteArrayOutputStream(); + ImageIO.write(gifImage, "png", pngOut); + byte[] pngBytes = pngOut.toByteArray(); + + assertTrue(pngBytes.length > 0, "PNG output should not be empty"); + + // Verify converted PNG is valid + ByteArrayInputStream bais2 = new ByteArrayInputStream(pngBytes); + BufferedImage pngImage = ImageIO.read(bais2); + assertNotNull(pngImage, "Converted PNG should be readable"); + } + + @Test + void testGifToPngProducesValidPng() throws Exception { + byte[] gifBytes = Files.readAllBytes(Path.of("screenshots/captcha.png").toAbsolutePath()); + ByteArrayInputStream bais = new ByteArrayInputStream(gifBytes); + BufferedImage image = ImageIO.read(bais); + + ByteArrayOutputStream pngOut = new ByteArrayOutputStream(); + ImageIO.write(image, "png", pngOut); + + // PNG header: 89 50 4E 47 0D 0A 1A 0A + byte[] pngHeader = pngOut.toByteArray(); + // bytes are signed in Java, mask with & 0xFF + assertEquals(0x89 & 0xFF, pngHeader[0] & 0xFF, "PNG magic number"); + assertEquals(0x50 & 0xFF, pngHeader[1] & 0xFF, "P"); + assertEquals(0x4E & 0xFF, pngHeader[2] & 0xFF, "N"); + assertEquals(0x47 & 0xFF, pngHeader[3] & 0xFF, "G"); + } + + @Test + void testBase64Encoding() throws Exception { + byte[] imageBytes = Files.readAllBytes(Path.of("screenshots/captcha.png").toAbsolutePath()); + String base64 = java.util.Base64.getEncoder().encodeToString(imageBytes); + + assertNotNull(base64); + assertTrue(base64.length() > 0, "Base64 should not be empty"); + assertFalse(base64.contains("\n"), "Base64 should be single line"); + + // Verify roundtrip + byte[] decoded = java.util.Base64.getDecoder().decode(base64); + assertArrayEquals(imageBytes, decoded, "Base64 roundtrip should match original"); + } +} diff --git a/target/classes/com/ets/scraper/EtsScraper.class b/target/classes/com/ets/scraper/EtsScraper.class new file mode 100644 index 0000000..53bad70 Binary files /dev/null and b/target/classes/com/ets/scraper/EtsScraper.class differ diff --git a/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..5ec45df --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1 @@ +com/ets/scraper/EtsScraper.class diff --git a/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..975bef9 --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1 @@ +/Users/niko/workspace/ets/ets-playwright/src/main/java/com/ets/scraper/EtsScraper.java diff --git a/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst new file mode 100644 index 0000000..9b93a6a --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst @@ -0,0 +1 @@ +com/ets/scraper/EtsScraperTest.class diff --git a/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst new file mode 100644 index 0000000..9cffddd --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst @@ -0,0 +1 @@ +/Users/niko/workspace/ets/ets-playwright/src/test/java/com/ets/scraper/EtsScraperTest.java diff --git a/target/surefire-reports/TEST-com.ets.scraper.EtsScraperTest.xml b/target/surefire-reports/TEST-com.ets.scraper.EtsScraperTest.xml new file mode 100644 index 0000000..d068611 --- /dev/null +++ b/target/surefire-reports/TEST-com.ets.scraper.EtsScraperTest.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/target/surefire-reports/com.ets.scraper.EtsScraperTest.txt b/target/surefire-reports/com.ets.scraper.EtsScraperTest.txt new file mode 100644 index 0000000..d0f1b68 --- /dev/null +++ b/target/surefire-reports/com.ets.scraper.EtsScraperTest.txt @@ -0,0 +1,4 @@ +------------------------------------------------------------------------------- +Test set: com.ets.scraper.EtsScraperTest +------------------------------------------------------------------------------- +Tests run: 3, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.048 s -- in com.ets.scraper.EtsScraperTest diff --git a/target/test-classes/com/ets/scraper/EtsScraperTest.class b/target/test-classes/com/ets/scraper/EtsScraperTest.class new file mode 100644 index 0000000..3328dd5 Binary files /dev/null and b/target/test-classes/com/ets/scraper/EtsScraperTest.class differ