From 06d1f934e9bc0bf5f93bd71d429488f9c5dd8e13 Mon Sep 17 00:00:00 2001 From: Arif Burak Demiray Date: Wed, 15 Apr 2026 22:44:08 +0300 Subject: [PATCH 1/4] feat: changelog for deadlock bug --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6573fca2..3c2688d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## XX.XX.XX +* Fixed a bug where a non-JSON server response would cause a permanent networking deadlock, preventing all subsequent requests from being sent. + ## 24.1.4 * ! Minor breaking change ! User properties will now be automatically saved under the following conditions: * When an event is recorded From 0485be854a0d48486192820e40e7c7c8d66be18e Mon Sep 17 00:00:00 2001 From: Arif Burak Demiray Date: Wed, 15 Apr 2026 22:46:18 +0300 Subject: [PATCH 2/4] feat: tests about it --- .../ScenarioNetworkDeadlockTests.java | 202 ++++++++++++ .../internal/TasksExceptionRecoveryTests.java | 290 ++++++++++++++++++ .../sdk/java/internal/TransportTests.java | 138 +++++++++ 3 files changed, 630 insertions(+) create mode 100644 sdk-java/src/test/java/ly/count/sdk/java/internal/ScenarioNetworkDeadlockTests.java create mode 100644 sdk-java/src/test/java/ly/count/sdk/java/internal/TasksExceptionRecoveryTests.java create mode 100644 sdk-java/src/test/java/ly/count/sdk/java/internal/TransportTests.java diff --git a/sdk-java/src/test/java/ly/count/sdk/java/internal/ScenarioNetworkDeadlockTests.java b/sdk-java/src/test/java/ly/count/sdk/java/internal/ScenarioNetworkDeadlockTests.java new file mode 100644 index 00000000..5a2cd31a --- /dev/null +++ b/sdk-java/src/test/java/ly/count/sdk/java/internal/ScenarioNetworkDeadlockTests.java @@ -0,0 +1,202 @@ +package ly.count.sdk.java.internal; + +import com.sun.net.httpserver.HttpServer; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.util.concurrent.atomic.AtomicInteger; +import ly.count.sdk.java.Config; +import ly.count.sdk.java.Countly; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * End-to-end scenario tests for GitHub issue #264: + * "Non-JSON Server Response Causes Permanent Networking Deadlock" + * + * These tests spin up a local HTTP server that returns non-JSON responses + * (simulating 502/503 error pages), then verify the SDK recovers gracefully + * and resumes sending requests when the server comes back. + */ +@RunWith(JUnit4.class) +public class ScenarioNetworkDeadlockTests { + + private HttpServer server; + private int port; + + @Before + public void setUp() throws Exception { + TestUtils.createCleanTestState(); + } + + @After + public void tearDown() { + Countly.instance().halt(); + if (server != null) { + server.stop(0); + } + } + + /** + * Start a local HTTP server with a custom handler. + * Returns the port number. + */ + private void startServer(ServerBehavior behavior) throws Exception { + server = HttpServer.create(new InetSocketAddress(0), 0); + port = server.getAddress().getPort(); + server.createContext("/", exchange -> { + Response resp = behavior.respond(); + exchange.sendResponseHeaders(resp.code, resp.body.length()); + OutputStream os = exchange.getResponseBody(); + os.write(resp.body.getBytes()); + os.close(); + }); + server.start(); + } + + private Config configForLocalServer() { + return new Config("http://localhost:" + port, TestUtils.SERVER_APP_KEY, TestUtils.getTestSDirectory()) + .setLoggingLevel(Config.LoggingLevel.VERBOSE) + .setDeviceIdStrategy(Config.DeviceIdStrategy.UUID) + .enableFeatures(Config.Feature.Events, Config.Feature.Sessions) + .setEventQueueSizeToSend(1); + } + + // ==================== Scenario tests ==================== + + /** + * Helper: starts a server with the given response, inits the SDK, records an event, + * waits for the request round-trip, and asserts isSending() is false. + */ + private void assertResponseDoesNotDeadlock(int code, String body) throws Exception { + startServer(() -> new Response(code, body)); + + Countly.instance().init(configForLocalServer()); + Countly.session().begin(); + + Countly.instance().events().recordEvent("test_event"); + Thread.sleep(2000); + + Assert.assertFalse( + "SDK networking should NOT be stuck after response [" + code + ": " + body.substring(0, Math.min(body.length(), 40)) + "]", + SDKCore.instance.networking.isSending() + ); + } + + /** + * Server returns HTML 502 error — the primary scenario from issue #264. + * Before the fix: JSONException propagates, executor deadlocks permanently. + */ + @Test + public void html502Response_sdkDoesNotDeadlock() throws Exception { + assertResponseDoesNotDeadlock(502, "

502 Bad Gateway

"); + } + + /** Plain text response from a load balancer should not deadlock. */ + @Test + public void plainTextResponse_sdkDoesNotDeadlock() throws Exception { + assertResponseDoesNotDeadlock(200, "OK"); + } + + /** Empty response body should not deadlock. */ + @Test + public void emptyResponse_sdkDoesNotDeadlock() throws Exception { + assertResponseDoesNotDeadlock(200, ""); + } + + /** Valid JSON with non-2xx code should not deadlock. */ + @Test + public void jsonErrorResponse_sdkDoesNotDeadlock() throws Exception { + assertResponseDoesNotDeadlock(500, "{\"result\":\"Internal Server Error\"}"); + } + + /** + * Scenario: Server initially returns HTML, then starts returning valid JSON. + * Verifies the SDK can resume sending after transient 502 errors. + */ + @Test + public void serverRecovery_sdkResumesSending() throws Exception { + AtomicInteger requestCount = new AtomicInteger(0); + AtomicInteger successCount = new AtomicInteger(0); + + startServer(() -> { + int count = requestCount.incrementAndGet(); + if (count <= 1) { + return new Response(502, "

502 Bad Gateway

"); + } else { + successCount.incrementAndGet(); + return new Response(200, "{\"result\":\"Success\"}"); + } + }); + + Countly.instance().init(configForLocalServer()); + Countly.session().begin(); + Thread.sleep(2000); + + Assert.assertFalse( + "SDK should recover from 502 HTML response", + SDKCore.instance.networking.isSending() + ); + + for (int i = 0; i < 5; i++) { + if (!SDKCore.instance.networking.isSending()) { + SDKCore.instance.networking.check(SDKCore.instance.config); + } + Thread.sleep(1000); + } + + Assert.assertTrue( + "SDK should have sent requests after server recovered (got " + successCount.get() + " successes)", + successCount.get() > 0 + ); + } + + /** + * Scenario: Server closes connection abruptly without sending a response body. + * Transport.response() returns null, which used to cause NPE in processResponse(). + * The catch(Exception) in send() is the last safety net for this path. + */ + @Test + public void connectionReset_sdkDoesNotDeadlock() throws Exception { + server = HttpServer.create(new InetSocketAddress(0), 0); + port = server.getAddress().getPort(); + server.createContext("/", exchange -> { + // Send headers but close immediately — some JVMs produce IOException, + // others may produce unexpected exceptions in the response reader + exchange.sendResponseHeaders(200, 0); + exchange.getResponseBody().close(); + }); + server.start(); + + Countly.instance().init(configForLocalServer()); + Countly.session().begin(); + + Countly.instance().events().recordEvent("test_event"); + Thread.sleep(2000); + + Assert.assertFalse( + "SDK networking should NOT be stuck after abrupt connection close", + SDKCore.instance.networking.isSending() + ); + } + + // ==================== Helpers ==================== + + @FunctionalInterface + interface ServerBehavior { + Response respond(); + } + + static class Response { + final int code; + final String body; + + Response(int code, String body) { + this.code = code; + this.body = body; + } + } +} diff --git a/sdk-java/src/test/java/ly/count/sdk/java/internal/TasksExceptionRecoveryTests.java b/sdk-java/src/test/java/ly/count/sdk/java/internal/TasksExceptionRecoveryTests.java new file mode 100644 index 00000000..af67ec05 --- /dev/null +++ b/sdk-java/src/test/java/ly/count/sdk/java/internal/TasksExceptionRecoveryTests.java @@ -0,0 +1,290 @@ +package ly.count.sdk.java.internal; + +import java.io.IOException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Tests that the Tasks executor properly recovers from exceptions thrown by tasks. + * These tests verify the fix for issue #264 where an uncaught exception in a task + * would leave the "running" state permanently set, causing a networking deadlock. + */ +@RunWith(JUnit4.class) +public class TasksExceptionRecoveryTests { + private Tasks tasks; + + @Before + public void setUp() { + tasks = new Tasks("test-recovery", null); + } + + @After + public void tearDown() { + tasks.shutdown(); + } + + /** + * When a task throws a RuntimeException, the executor should recover + * and isRunning() should return false after the task completes. + * This is the core scenario from issue #264. + */ + @Test + public void taskRuntimeException_executorRecovers() throws Exception { + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + throw new RuntimeException("Simulated JSONException from non-JSON response"); + } + }); + + Thread.sleep(200); + Assert.assertFalse("Executor should not be stuck in running state after RuntimeException", tasks.isRunning()); + } + + /** + * After a task throws an exception, the executor should be able to + * successfully run subsequent tasks. + */ + @Test + public void taskException_subsequentTaskSucceeds() throws Exception { + // First task: throws exception + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + throw new RuntimeException("Simulated failure"); + } + }); + + Thread.sleep(200); + + // Second task: should succeed + final boolean[] secondTaskRan = { false }; + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + secondTaskRan[0] = true; + return true; + } + }); + + Thread.sleep(200); + Assert.assertTrue("Subsequent task should have executed after previous task threw exception", secondTaskRan[0]); + Assert.assertFalse("Executor should not be running after second task completes", tasks.isRunning()); + } + + /** + * When a task throws a NullPointerException (e.g., from SDKCore.instance being null), + * the executor should recover. + */ + @Test + public void taskNullPointerException_executorRecovers() throws Exception { + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + String nullStr = null; + nullStr.length(); // throws NPE + return true; + } + }); + + Thread.sleep(200); + Assert.assertFalse("Executor should recover from NullPointerException", tasks.isRunning()); + } + + /** + * When a task with a non-zero ID throws an exception, it should be + * removed from the pending map so new tasks with the same ID can be submitted. + */ + @Test + public void taskWithId_exceptionClearsPending() throws Exception { + Long taskId = 42L; + + // First task with ID: throws exception + tasks.run(new Tasks.Task(taskId) { + @Override + public Boolean call() { + throw new RuntimeException("Simulated failure"); + } + }); + + Thread.sleep(200); + + // Second task with same ID: should be accepted and run (not deduplicated against the failed one) + final boolean[] secondTaskRan = { false }; + Future future = tasks.run(new Tasks.Task(taskId) { + @Override + public Boolean call() { + secondTaskRan[0] = true; + return true; + } + }); + + Thread.sleep(200); + Assert.assertTrue("Task with same ID should run after previous one failed", secondTaskRan[0]); + Assert.assertFalse("Executor should not be running", tasks.isRunning()); + } + + /** + * When a callback throws an exception, the executor should still recover + * and not deadlock. The callback runs inside the try block, so its exception + * is caught by the finally block. + */ + @Test + public void callbackException_executorRecovers() throws Exception { + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + return true; + } + }, result -> { + throw new RuntimeException("Simulated callback failure"); + }); + + Thread.sleep(200); + Assert.assertFalse("Executor should recover from callback exception", tasks.isRunning()); + } + + /** + * After a callback throws an exception, subsequent tasks should still execute. + */ + @Test + public void callbackException_subsequentTaskSucceeds() throws Exception { + // First task: succeeds but callback throws + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + return true; + } + }, result -> { + throw new RuntimeException("Callback failure"); + }); + + Thread.sleep(200); + + // Second task: should succeed + final boolean[] secondTaskRan = { false }; + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + secondTaskRan[0] = true; + return true; + } + }); + + Thread.sleep(200); + Assert.assertTrue("Task should run after previous callback threw exception", secondTaskRan[0]); + } + + /** + * Multiple consecutive failing tasks should not accumulate stuck state. + * The executor should recover after each one. + */ + @Test + public void multipleConsecutiveFailures_executorRecovers() throws Exception { + for (int i = 0; i < 5; i++) { + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + throw new RuntimeException("Failure #" + System.currentTimeMillis()); + } + }); + } + + Thread.sleep(500); + Assert.assertFalse("Executor should recover after multiple consecutive failures", tasks.isRunning()); + + // Verify executor still works + final boolean[] taskRan = { false }; + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + taskRan[0] = true; + return true; + } + }); + + Thread.sleep(200); + Assert.assertTrue("Executor should still work after multiple failures", taskRan[0]); + } + + /** + * When a task throws a checked Exception (e.g. IOException), the executor + * should recover. The original bug equally applied to checked exceptions since + * the cleanup code was not in a finally block. The ExecutorService wraps + * checked exceptions in ExecutionException, and running must still be reset. + */ + @Test + public void taskCheckedException_executorRecovers() throws Exception { + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() throws Exception { + throw new IOException("Simulated I/O failure during request"); + } + }); + + Thread.sleep(200); + Assert.assertFalse("Executor should recover from checked IOException", tasks.isRunning()); + + // Verify executor still works after checked exception + final boolean[] taskRan = { false }; + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() { + taskRan[0] = true; + return true; + } + }); + + Thread.sleep(200); + Assert.assertTrue("Subsequent task should run after checked exception", taskRan[0]); + } + + /** + * Deterministic test for volatile correctness of the "running" field. + * Uses a CountDownLatch instead of Thread.sleep to verify that the calling + * thread sees running=null immediately after the task signals completion. + * Without volatile, a stale cached value could cause isRunning() to return + * true even though the executor thread already set running=null. + */ + @Test + public void volatileCorrectness_isRunningVisibleAcrossThreads() throws Exception { + CountDownLatch taskStarted = new CountDownLatch(1); + CountDownLatch taskCanFinish = new CountDownLatch(1); + + // Submit a task that signals when it starts, then waits for permission to finish + tasks.run(new Tasks.Task(0L) { + @Override + public Boolean call() throws Exception { + taskStarted.countDown(); + taskCanFinish.await(5, TimeUnit.SECONDS); + return true; + } + }); + + // Wait for the task to start executing on the executor thread + Assert.assertTrue("Task should have started", taskStarted.await(2, TimeUnit.SECONDS)); + + // While the task is running, isRunning() must be true + Assert.assertTrue("isRunning() should be true while task is executing", tasks.isRunning()); + + // Allow the task to finish + taskCanFinish.countDown(); + + // Submit a no-op task and wait for it — this guarantees the previous task + // (including its finally block) has fully completed + tasks.await(); + + // Without volatile, this read from the test thread could see the stale value + Assert.assertFalse( + "isRunning() should be false immediately after task completes (volatile visibility)", + tasks.isRunning() + ); + } +} diff --git a/sdk-java/src/test/java/ly/count/sdk/java/internal/TransportTests.java b/sdk-java/src/test/java/ly/count/sdk/java/internal/TransportTests.java new file mode 100644 index 00000000..6afbfd19 --- /dev/null +++ b/sdk-java/src/test/java/ly/count/sdk/java/internal/TransportTests.java @@ -0,0 +1,138 @@ +package ly.count.sdk.java.internal; + +import ly.count.sdk.java.Config; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import java.lang.reflect.Field; + +@RunWith(JUnit4.class) +public class TransportTests { + + private Transport transport; + + @Before + public void setUp() throws Exception { + transport = new Transport(); + Log L = new Log(Config.LoggingLevel.VERBOSE, null); + Field logField = Transport.class.getDeclaredField("L"); + logField.setAccessible(true); + logField.set(transport, L); + } + + // ==================== processResponse tests ==================== + + /** + * Valid JSON response with "result" key and 200 status code + * should return true (success) + */ + @Test + public void processResponse_validJsonSuccess() { + Boolean result = transport.processResponse(200, "{\"result\":\"Success\"}", 1L); + Assert.assertTrue(result); + } + + /** + * Valid JSON response with "result" key and various 2xx status codes + * should return true (success) + */ + @Test + public void processResponse_validJson2xxRange() { + Assert.assertTrue(transport.processResponse(200, "{\"result\":\"ok\"}", 1L)); + Assert.assertTrue(transport.processResponse(201, "{\"result\":\"created\"}", 2L)); + Assert.assertTrue(transport.processResponse(299, "{\"result\":\"ok\"}", 3L)); + } + + /** + * Valid JSON response with "result" key but non-2xx status code + * should return false (failure) + */ + @Test + public void processResponse_validJsonNon2xxCode() { + Assert.assertFalse(transport.processResponse(400, "{\"result\":\"Bad Request\"}", 1L)); + Assert.assertFalse(transport.processResponse(500, "{\"result\":\"Internal Server Error\"}", 2L)); + Assert.assertFalse(transport.processResponse(302, "{\"result\":\"redirect\"}", 3L)); + Assert.assertFalse(transport.processResponse(199, "{\"result\":\"ok\"}", 4L)); + } + + /** + * Valid JSON response but missing "result" key with 200 status code + * should return false (failure) + */ + @Test + public void processResponse_validJsonMissingResultKey() { + Assert.assertFalse(transport.processResponse(200, "{\"error\":\"something\"}", 1L)); + Assert.assertFalse(transport.processResponse(200, "{}", 2L)); + } + + /** + * Null response should return false without throwing NPE + * This was the original bug path — response() returns null on IOException + */ + @Test + public void processResponse_nullResponse() { + Boolean result = transport.processResponse(200, null, 1L); + Assert.assertFalse(result); + } + + /** + * HTML response (e.g., 502/503 error page) should return false + * This is the primary scenario from issue #264 + */ + @Test + public void processResponse_htmlResponse() { + String html502 = "

502 Bad Gateway

"; + Boolean result = transport.processResponse(502, html502, 1L); + Assert.assertFalse(result); + } + + /** + * Plain text non-JSON response should return false + */ + @Test + public void processResponse_plainTextResponse() { + Boolean result = transport.processResponse(200, "OK", 1L); + Assert.assertFalse(result); + } + + /** + * Empty string response should return false + */ + @Test + public void processResponse_emptyStringResponse() { + Boolean result = transport.processResponse(200, "", 1L); + Assert.assertFalse(result); + } + + /** + * Malformed JSON should return false without propagating JSONException + */ + @Test + public void processResponse_malformedJson() { + Assert.assertFalse(transport.processResponse(200, "{invalid json", 1L)); + Assert.assertFalse(transport.processResponse(200, "not json at all", 2L)); + Assert.assertFalse(transport.processResponse(200, "{{{{", 3L)); + } + + /** + * Edge case: JSON array instead of JSON object should return false + */ + @Test + public void processResponse_jsonArrayResponse() { + Boolean result = transport.processResponse(200, "[{\"result\":\"ok\"}]", 1L); + Assert.assertFalse(result); + } + + /** + * Boundary status codes around the 200-299 range + */ + @Test + public void processResponse_boundaryStatusCodes() { + Assert.assertTrue(transport.processResponse(200, "{\"result\":\"ok\"}", 1L)); + Assert.assertTrue(transport.processResponse(299, "{\"result\":\"ok\"}", 2L)); + Assert.assertFalse(transport.processResponse(199, "{\"result\":\"ok\"}", 3L)); + Assert.assertFalse(transport.processResponse(300, "{\"result\":\"ok\"}", 4L)); + } +} From 6c93c73ae10c3cff27cef27fab30de7cafc0a845 Mon Sep 17 00:00:00 2001 From: Arif Burak Demiray Date: Wed, 15 Apr 2026 22:46:37 +0300 Subject: [PATCH 3/4] feat: reproduce issue 264 --- app-java/build.gradle | 2 +- .../ly/count/java/demo/ReproduceIssue264.java | 179 ++++++++++++++++++ 2 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 app-java/src/main/java/ly/count/java/demo/ReproduceIssue264.java diff --git a/app-java/build.gradle b/app-java/build.gradle index 6c0872f6..3b90276a 100644 --- a/app-java/build.gradle +++ b/app-java/build.gradle @@ -16,4 +16,4 @@ dependencies { //implementation "ly.count.sdk:java:${CLY_VERSION}" } -mainClassName = 'ly.count.java.demo.Sample' +mainClassName = 'ly.count.java.demo.ReproduceIssue264' diff --git a/app-java/src/main/java/ly/count/java/demo/ReproduceIssue264.java b/app-java/src/main/java/ly/count/java/demo/ReproduceIssue264.java new file mode 100644 index 00000000..d7b23963 --- /dev/null +++ b/app-java/src/main/java/ly/count/java/demo/ReproduceIssue264.java @@ -0,0 +1,179 @@ +package ly.count.java.demo; + +import com.sun.net.httpserver.HttpServer; +import java.io.File; +import java.io.OutputStream; +import java.lang.reflect.Field; +import java.net.InetSocketAddress; +import java.util.concurrent.atomic.AtomicInteger; +import ly.count.sdk.java.Config; +import ly.count.sdk.java.Countly; + +/** + * Reproduces GitHub Issue #264: + * "Non-JSON Server Response Causes Permanent Networking Deadlock" + * + * This app starts a local HTTP server that returns HTML (simulating a 502 error page), + * initializes the Countly SDK against it, records events, and checks whether the SDK + * gets permanently stuck. + * + * Run with: ./gradlew app-java:run + * (after setting mainClassName = 'ly.count.java.demo.ReproduceIssue264' in app-java/build.gradle) + */ +public class ReproduceIssue264 { + + public static void main(String[] args) throws Exception { + AtomicInteger requestCount = new AtomicInteger(0); + AtomicInteger successCount = new AtomicInteger(0); + + // Start a local HTTP server that returns HTML for the first 3 requests, + // then valid JSON for subsequent requests (simulating server recovery) + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + int port = server.getAddress().getPort(); + + server.createContext("/", exchange -> { + int count = requestCount.incrementAndGet(); + String body; + int code; + + if (count <= 3) { + code = 502; + body = "

502 Bad Gateway

The server is temporarily unavailable.

"; + System.out.println("[Mock Server] Request #" + count + " -> returning HTML 502 (simulating outage)"); + } else { + code = 200; + body = "{\"result\":\"Success\"}"; + successCount.incrementAndGet(); + System.out.println("[Mock Server] Request #" + count + " -> returning JSON 200 (server recovered)"); + } + + exchange.sendResponseHeaders(code, body.length()); + OutputStream os = exchange.getResponseBody(); + os.write(body.getBytes()); + os.close(); + }); + + server.start(); + System.out.println("=== Issue #264 Reproduction ==="); + System.out.println("[Mock Server] Started on port " + port); + System.out.println(); + + // Setup SDK storage directory + String[] sdkStorageRootPath = { System.getProperty("user.home"), "__COUNTLY", "java_issue264" }; + File sdkStorageRootDirectory = new File(String.join(File.separator, sdkStorageRootPath)); + if (!(sdkStorageRootDirectory.exists() && sdkStorageRootDirectory.isDirectory())) { + sdkStorageRootDirectory.mkdirs(); + } + + // Initialize SDK pointing to our mock server + Config config = new Config("http://localhost:" + port, "TEST_APP_KEY", sdkStorageRootDirectory) + .setLoggingLevel(Config.LoggingLevel.WARN) + .setDeviceIdStrategy(Config.DeviceIdStrategy.UUID) + .enableFeatures(Config.Feature.Events, Config.Feature.Sessions) + .setEventQueueSizeToSend(1); + + Countly.instance().init(config); + System.out.println("[SDK] Initialized against mock server"); + + // Start session (triggers first request -> will get HTML 502) + Countly.session().begin(); + System.out.println("[SDK] Session started"); + + // Record an event (triggers another request -> will get HTML 502) + Countly.instance().events().recordEvent("test_event_during_outage"); + System.out.println("[SDK] Event recorded"); + + // Wait for requests to be attempted + System.out.println(); + System.out.println("[Test] Waiting 3 seconds for initial requests..."); + Thread.sleep(3000); + + // Check if SDK is deadlocked via reflection (SDKCore.instance.networking is protected) + boolean isSending = isNetworkingSending(); + + System.out.println(); + System.out.println("============================================================"); + if (isSending) { + System.out.println(" BUG REPRODUCED: isSending() = true (DEADLOCKED!)"); + System.out.println(" The SDK is permanently stuck. No further requests"); + System.out.println(" will ever be sent, even when the server recovers."); + } else { + System.out.println(" FIX CONFIRMED: isSending() = false (recovered)"); + System.out.println(" The SDK handled the non-JSON response gracefully."); + } + System.out.println("============================================================"); + System.out.println(); + + // Try to trigger recovery by calling check + System.out.println("[Test] Triggering networking check cycles (server now returns JSON)..."); + triggerNetworkingChecks(5); + + int totalRequests = requestCount.get(); + int successes = successCount.get(); + + System.out.println(); + System.out.println("============================================================"); + System.out.println(" Total requests received by server: " + totalRequests); + System.out.println(" Successful (JSON 200) responses: " + successes); + if (successes > 0) { + System.out.println(" SDK successfully retried after server recovered!"); + } else if (!isNetworkingSending()) { + System.out.println(" SDK recovered from error. Requests will retry on"); + System.out.println(" the next timer tick (no deadlock)."); + } else { + System.out.println(" SDK is STILL deadlocked. Bug confirmed."); + } + System.out.println("============================================================"); + + // Cleanup + Countly.instance().stop(); + server.stop(0); + System.out.println(); + System.out.println("[Done] Cleanup complete."); + } + + /** + * Access SDKCore.instance.networking.isSending() via reflection + * since these fields are protected/package-private. + */ + private static boolean isNetworkingSending() throws Exception { + Class sdkCoreClass = Class.forName("ly.count.sdk.java.internal.SDKCore"); + Field instanceField = sdkCoreClass.getDeclaredField("instance"); + instanceField.setAccessible(true); + Object sdkCore = instanceField.get(null); + + Field networkingField = sdkCoreClass.getDeclaredField("networking"); + networkingField.setAccessible(true); + Object networking = networkingField.get(sdkCore); + + return (boolean) networking.getClass().getMethod("isSending").invoke(networking); + } + + /** + * Trigger SDKCore.instance.networking.check(config) via reflection. + */ + private static void triggerNetworkingChecks(int count) throws Exception { + Class sdkCoreClass = Class.forName("ly.count.sdk.java.internal.SDKCore"); + Field instanceField = sdkCoreClass.getDeclaredField("instance"); + instanceField.setAccessible(true); + Object sdkCore = instanceField.get(null); + + Field networkingField = sdkCoreClass.getDeclaredField("networking"); + networkingField.setAccessible(true); + Object networking = networkingField.get(sdkCore); + + Field configField = sdkCoreClass.getDeclaredField("config"); + configField.setAccessible(true); + Object internalConfig = configField.get(sdkCore); + + java.lang.reflect.Method checkMethod = networking.getClass().getMethod("check", + Class.forName("ly.count.sdk.java.internal.InternalConfig")); + + for (int i = 0; i < count; i++) { + if (!isNetworkingSending()) { + checkMethod.invoke(networking, internalConfig); + } + Thread.sleep(1000); + } + } +} \ No newline at end of file From 25dd55770bfa72f776842d7b6e26ad436fa1c23a Mon Sep 17 00:00:00 2001 From: Arif Burak Demiray Date: Wed, 15 Apr 2026 22:47:09 +0300 Subject: [PATCH 4/4] fix: issue 264 --- .../ly/count/sdk/java/internal/Tasks.java | 24 ++++++++++-------- .../ly/count/sdk/java/internal/Transport.java | 25 ++++++++++++++----- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/sdk-java/src/main/java/ly/count/sdk/java/internal/Tasks.java b/sdk-java/src/main/java/ly/count/sdk/java/internal/Tasks.java index 9d165970..6eb7cb8c 100644 --- a/sdk-java/src/main/java/ly/count/sdk/java/internal/Tasks.java +++ b/sdk-java/src/main/java/ly/count/sdk/java/internal/Tasks.java @@ -22,7 +22,7 @@ public class Tasks { * Service which runs {@link Callable}s */ private final ExecutorService executor; - private Long running = null; + private volatile Long running = null; /** * Map of {@link Future}s for {@link Callable}s not yet resolved @@ -92,18 +92,20 @@ Future run(final Task task, final Callback callback) { @Override public T call() throws Exception { running = task.id; - T result = task.call(); - synchronized (pending) { - if (!task.id.equals(0L)) { - pending.remove(task.id); + try { + T result = task.call(); + if (callback != null) { + callback.call(result); + } + return result; + } finally { + synchronized (pending) { + if (!task.id.equals(0L)) { + pending.remove(task.id); + } + running = null; } - running = null; - // L.d("pending " + pending.keySet() + ", done running " + task.id); - } - if (callback != null) { - callback.call(result); } - return result; } }); diff --git a/sdk-java/src/main/java/ly/count/sdk/java/internal/Transport.java b/sdk-java/src/main/java/ly/count/sdk/java/internal/Transport.java index b016a4c1..0b7957f7 100644 --- a/sdk-java/src/main/java/ly/count/sdk/java/internal/Transport.java +++ b/sdk-java/src/main/java/ly/count/sdk/java/internal/Transport.java @@ -342,6 +342,9 @@ public Boolean send() { } catch (IOException e) { L.w("[network] Error while sending request " + request + " " + e); return false; + } catch (Exception e) { + L.e("[network] Unexpected error while sending request " + request + " " + e); + return false; } finally { if (connection != null) { connection.disconnect(); @@ -354,12 +357,22 @@ public Boolean send() { Boolean processResponse(int code, String response, Long requestId) { L.i("[network] [processResponse] Code [" + code + "] response [" + response + "] for request[" + requestId + "]"); - JSONObject jsonObject = new JSONObject(response); - if (code >= 200 && code < 300 && jsonObject.has("result")) { - L.d("[network] Success"); - return true; - } else { - L.w("[network] Fail: code :" + code + ", result: " + response); + if (response == null) { + L.w("[network] Null response for request [" + requestId + "]"); + return false; + } + + try { + JSONObject jsonObject = new JSONObject(response); + if (code >= 200 && code < 300 && jsonObject.has("result")) { + L.d("[network] Success"); + return true; + } else { + L.w("[network] Fail: code :" + code + ", result: " + response); + return false; + } + } catch (Exception e) { + L.w("[network] Failed to parse response as JSON for request [" + requestId + "], response: [" + response + "], error: [" + e.getMessage() + "]"); return false; } }