apache · AndrewJSchofield · Nov 20, 2025 · Jun 3, 2025 · Jun 3, 2025 · Jun 3, 2025
diff --git a/...egration-tests/src/test/java/org/apache/kafka/clients/consumer/PlaintextConsumerTest.java b/...egration-tests/src/test/java/org/apache/kafka/clients/consumer/PlaintextConsumerTest.java
@@ -87,6 +87,7 @@
 import static org.apache.kafka.clients.consumer.ConsumerConfig.AUTO_OFFSET_RESET_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.CLIENT_ID_CONFIG;
+import static org.apache.kafka.clients.consumer.ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.GROUP_ID_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.GROUP_INSTANCE_ID_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.GROUP_PROTOCOL_CONFIG;
@@ -109,6 +110,7 @@
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
@@ -181,6 +183,63 @@ public void testAsyncConsumeCoordinatorFailover() throws InterruptedException {
         testCoordinatorFailover(cluster, config);
     }
 
+    @ClusterTest(
+        brokers = 1,
+        serverProperties = {
+            @ClusterConfigProperty(key = OFFSETS_TOPIC_PARTITIONS_CONFIG, value = "1"),
+            @ClusterConfigProperty(key = "offsets.topic.replication.factor", value = "1"),
+            @ClusterConfigProperty(key = "transaction.state.log.replication.factor", value = "1"),
+            @ClusterConfigProperty(key = "transaction.state.log.min.isr", value = "1")
+        }
+    )
+    public void testClassicConsumerCloseOnBrokerShutdown() {
+        Map<String, Object> config = Map.of(
+            GROUP_PROTOCOL_CONFIG, GroupProtocol.CLASSIC.name().toLowerCase(Locale.ROOT)
+        );
+        testConsumerCloseOnBrokerShutdown(config);
+    }
+
+    @ClusterTest(
+        brokers = 1,
+        serverProperties = {
+            @ClusterConfigProperty(key = OFFSETS_TOPIC_PARTITIONS_CONFIG, value = "1"),
+            @ClusterConfigProperty(key = "offsets.topic.replication.factor", value = "1"),
+            @ClusterConfigProperty(key = "transaction.state.log.replication.factor", value = "1"),
+            @ClusterConfigProperty(key = "transaction.state.log.min.isr", value = "1")
+        }
+    )
+    public void testAsyncConsumerCloseOnBrokerShutdown() {
+        Map<String, Object> config = Map.of(
+            GROUP_PROTOCOL_CONFIG, GroupProtocol.CONSUMER.name().toLowerCase(Locale.ROOT),
+            ENABLE_AUTO_COMMIT_CONFIG, false
+        );
+        // Disabling auto commit so that commitSync() does not block the close timeout.
+        testConsumerCloseOnBrokerShutdown(config);
+    }
+
+    private void testConsumerCloseOnBrokerShutdown(Map<String, Object> consumerConfig) {
+        try (Consumer<byte[], byte[]> consumer = cluster.consumer(consumerConfig)) {
+            consumer.subscribe(List.of(TOPIC));
+
+            // Force consumer to discover coordinator by doing a poll
+            // This ensures coordinator is discovered before we shutdown the broker
+            consumer.poll(Duration.ofMillis(100));
+
+            // Now shutdown broker.
+            assertEquals(1, cluster.brokers().size());
+            KafkaBroker broker = cluster.brokers().get(0);
+            cluster.shutdownBroker(0);
+            broker.awaitShutdown();
+
+            // Do another poll to force the consumer to retry finding the coordinator.
+            consumer.poll(Duration.ofMillis(100));
+
+            // Close should not hang waiting for retries when broker is already down
+            assertTimeoutPreemptively(Duration.ofSeconds(5), () -> consumer.close(),
+                    "Consumer close should not wait for full timeout when broker is already shutdown");
+        }
+    }
+
     @ClusterTest
     public void testClassicConsumerHeaders() throws Exception {
         testHeaders(Map.of(

diff --git a/...-integration-tests/src/test/java/org/apache/kafka/clients/consumer/ShareConsumerTest.java b/...-integration-tests/src/test/java/org/apache/kafka/clients/consumer/ShareConsumerTest.java
@@ -113,6 +113,7 @@
 import static org.junit.jupiter.api.Assertions.assertInstanceOf;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
@@ -1252,6 +1253,28 @@ public void testMultipleConsumersWithDifferentGroupIds() throws InterruptedExcep
         }
     }
 
+    @ClusterTest
+    public void testConsumerCloseOnBrokerShutdown() {
+        alterShareAutoOffsetReset("group1", "earliest");
+        ShareConsumer<byte[], byte[]> shareConsumer = createShareConsumer("group1");
+        shareConsumer.subscribe(Set.of(tp.topic()));
+
+        // To ensure coordinator discovery is complete before shutting down the broker
+        shareConsumer.poll(Duration.ofMillis(100));
+
+        // Shutdown the broker.
+        assertEquals(1, cluster.brokers().size());
+        KafkaBroker broker = cluster.brokers().get(0);
+        cluster.shutdownBroker(0);
+
+        broker.awaitShutdown();
+
+        // Assert that close completes in less than 5 seconds, not the full 30-second timeout.
+        assertTimeoutPreemptively(Duration.ofSeconds(5), () -> {
+            shareConsumer.close();
+        }, "Consumer close should not wait for full timeout when broker is already shutdown");
+    }
+
     @ClusterTest
     public void testMultipleConsumersInGroupSequentialConsumption() {
         alterShareAutoOffsetReset("group1", "earliest");

diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkThread.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkThread.java
@@ -395,7 +395,7 @@ private void sendUnsentRequests(final Timer timer) {
             return;
 
         do {
-            networkClientDelegate.poll(timer.remainingMs(), timer.currentTimeMs());
+            networkClientDelegate.poll(timer.remainingMs(), timer.currentTimeMs(), true);
             timer.update();
         } while (timer.notExpired() && networkClientDelegate.hasAnyPendingRequests());
 

diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/NetworkClientDelegate.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/NetworkClientDelegate.java
@@ -34,6 +34,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.AbstractRequest;
 import org.apache.kafka.common.telemetry.internals.ClientTelemetrySender;
 import org.apache.kafka.common.utils.LogContext;
@@ -137,13 +138,25 @@ public void tryConnect(Node node) {
     }
 
     /**
-     * Returns the responses of the sent requests. This method will try to send the unsent requests, poll for responses,
+     * This method will try to send the unsent requests, poll for responses,
      * and check the disconnected nodes.
      *
      * @param timeoutMs     timeout time
      * @param currentTimeMs current time
      */
     public void poll(final long timeoutMs, final long currentTimeMs) {
+        poll(timeoutMs, currentTimeMs, false);
+    }
+
+    /**
+     * This method will try to send the unsent requests, poll for responses,
+     * and check the disconnected nodes.
+     *
+     * @param timeoutMs     timeout time
+     * @param currentTimeMs current time
+     * @param onClose       True when the network thread is closing.
+     */
+    public void poll(final long timeoutMs, final long currentTimeMs, boolean onClose) {
         trySend(currentTimeMs);
 
         long pollTimeoutMs = timeoutMs;
@@ -152,7 +165,7 @@ public void poll(final long timeoutMs, final long currentTimeMs) {
         }
         this.client.poll(pollTimeoutMs, currentTimeMs);
         maybePropagateMetadataError();
-        checkDisconnects(currentTimeMs);
+        checkDisconnects(currentTimeMs, onClose);
         asyncConsumerMetrics.recordUnsentRequestsQueueSize(unsentRequests.size(), currentTimeMs);
     }
 
@@ -219,7 +232,7 @@ boolean doSend(final UnsentRequest r, final long currentTimeMs) {
         return true;
     }
 
-    protected void checkDisconnects(final long currentTimeMs) {
+    protected void checkDisconnects(final long currentTimeMs, boolean onClose) {
         // Check the connection of the unsent request. Disconnect the disconnected node if it is unable to be connected.
         Iterator<UnsentRequest> iter = unsentRequests.iterator();
         while (iter.hasNext()) {
@@ -229,6 +242,11 @@ protected void checkDisconnects(final long currentTimeMs) {
                 asyncConsumerMetrics.recordUnsentRequestsQueueTime(time.milliseconds() - u.enqueueTimeMs());
                 AuthenticationException authenticationException = client.authenticationException(u.node.get());
                 u.handler.onFailure(currentTimeMs, authenticationException);
+            } else if (u.node.isEmpty() && onClose) {
+                log.debug("Removing unsent request {} because the client is closing", u);
+                iter.remove();
+                asyncConsumerMetrics.recordUnsentRequestsQueueTime(time.milliseconds() - u.enqueueTimeMs());
+                u.handler.onFailure(currentTimeMs, Errors.NETWORK_EXCEPTION.exception());
 // If coordinator is not known, requests are aborted. 
 // Poll to ensure that request has been written to the socket. Wait until either the timer has expired or until 
 // all requests have received a response. 
 while (timer.notExpired() && !requestFutures.stream().allMatch(RequestFuture::isDone)) { 
     client.poll(timer, null, true); 
     timer.update(); 
 } 
 return unsentRequest.whenComplete((clientResponse, throwable) -> { 
 if (applicationEventHandler != null) 
     closeQuietly(() -> applicationEventHandler.close(Duration.ofMillis(closeTimer.remainingMs())), "Failed shutting down network thread", firstException); 
 closeTimer.update(); 
     Node coordinator = checkAndGetCoordinator(); 
     if (coordinator != null && !client.awaitPendingRequests(coordinator, timer)) 
         log.warn("Close timed out with {} pending requests to coordinator, terminating client connections", 
                 client.pendingRequestCount(coordinator)); 
 } 
 if (fetchTarget == null || isUnavailable(fetchTarget)) { 
     log.debug("Skip sending close session request to broker {} since it is not reachable", fetchTarget); 
     return; 
 if (fetchTarget == null || isUnavailable(fetchTarget)) { 
     log.debug("Skip sending close session request to broker {} since it is not reachable", fetchTarget); 
 // If coordinator is not known, requests are aborted. 
 // Poll to ensure that request has been written to the socket. Wait until either the timer has expired or until 
 // all requests have received a response. 
 while (timer.notExpired() && !requestFutures.stream().allMatch(RequestFuture::isDone)) { 
     client.poll(timer, null, true); 
     timer.update(); 
 } 
 return unsentRequest.whenComplete((clientResponse, throwable) -> { 
 if (applicationEventHandler != null) 
     closeQuietly(() -> applicationEventHandler.close(Duration.ofMillis(closeTimer.remainingMs())), "Failed shutting down network thread", firstException); 
 closeTimer.update(); 
     Node coordinator = checkAndGetCoordinator(); 
     if (coordinator != null && !client.awaitPendingRequests(coordinator, timer)) 
         log.warn("Close timed out with {} pending requests to coordinator, terminating client connections", 
                 client.pendingRequestCount(coordinator)); 
 } 
 if (fetchTarget == null || isUnavailable(fetchTarget)) { 
     log.debug("Skip sending close session request to broker {} since it is not reachable", fetchTarget); 
     return; 
 if (fetchTarget == null || isUnavailable(fetchTarget)) { 
     log.debug("Skip sending close session request to broker {} since it is not reachable", fetchTarget); 
             }
         }
     }

diff --git a/...src/main/java/org/apache/kafka/clients/consumer/internals/ShareConsumeRequestManager.java b/...src/main/java/org/apache/kafka/clients/consumer/internals/ShareConsumeRequestManager.java
@@ -136,6 +136,9 @@ public class ShareConsumeRequestManager implements RequestManager, MemberStateLi
     @Override
     public PollResult poll(long currentTimeMs) {
         if (memberId == null) {
+            if (closing && !closeFuture.isDone()) {
+                closeFuture.complete(null);
+            }
             return PollResult.EMPTY;
         }
 

diff --git a/.../src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkThreadTest.java b/.../src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkThreadTest.java
@@ -35,6 +35,7 @@
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 import org.junit.jupiter.params.provider.ValueSource;
+import org.mockito.ArgumentMatchers;
 
 import java.time.Duration;
 import java.util.LinkedList;
@@ -203,7 +204,7 @@ public void testRunOnceInvokesReaper() {
     public void testSendUnsentRequests() {
         when(networkClientDelegate.hasAnyPendingRequests()).thenReturn(true).thenReturn(true).thenReturn(false);
         consumerNetworkThread.cleanup();
-        verify(networkClientDelegate, times(2)).poll(anyLong(), anyLong());
+        verify(networkClientDelegate, times(2)).poll(anyLong(), anyLong(), ArgumentMatchers.booleanThat(onClose -> onClose));
     }
 
     @ParameterizedTest

diff --git a/...ts/src/test/java/org/apache/kafka/clients/consumer/internals/FetchRequestManagerTest.java b/...ts/src/test/java/org/apache/kafka/clients/consumer/internals/FetchRequestManagerTest.java
@@ -4266,7 +4266,7 @@ private List<UnsentRequest> removeUnsentRequestByNode(Node node) {
         }
 
         @Override
-        protected void checkDisconnects(final long currentTimeMs) {
+        protected void checkDisconnects(final long currentTimeMs, boolean onClose) {
             // any disconnects affecting requests that have already been transmitted will be handled
             // by NetworkClient, so we just need to check whether connections for any of the unsent
             // requests have been disconnected; if they have, then we complete the corresponding future

diff --git a/.../src/test/java/org/apache/kafka/clients/consumer/internals/NetworkClientDelegateTest.java b/.../src/test/java/org/apache/kafka/clients/consumer/internals/NetworkClientDelegateTest.java
@@ -27,6 +27,7 @@
 import org.apache.kafka.common.Node;
 import org.apache.kafka.common.errors.AuthenticationException;
 import org.apache.kafka.common.errors.DisconnectException;
+import org.apache.kafka.common.errors.NetworkException;
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.message.FindCoordinatorRequestData;
 import org.apache.kafka.common.metrics.Metrics;
@@ -282,6 +283,51 @@ public void testRecordUnsentRequestsQueueTime(String groupName) throws Exception
         }
     }
 
+    @Test
+    public void testPollWithOnClose() throws Exception {
+        try (NetworkClientDelegate ncd = newNetworkClientDelegate(false)) {
+            NetworkClientDelegate.UnsentRequest unsentRequest = newUnsentFindCoordinatorRequest();
+            ncd.add(unsentRequest);
+
+            // First poll without onClose
+            ncd.poll(0, time.milliseconds());
+            assertTrue(ncd.hasAnyPendingRequests());
+
+            // Poll with onClose=true
+            ncd.poll(0, time.milliseconds(), true);
+            assertTrue(ncd.hasAnyPendingRequests());
+
+            // Complete the request
+            client.respond(FindCoordinatorResponse.prepareResponse(Errors.NONE, GROUP_ID, mockNode()));
+            ncd.poll(0, time.milliseconds(), true);
+            assertFalse(ncd.hasAnyPendingRequests());
+        }
+    }
+
+    @Test
+    public void testCheckDisconnectsWithOnClose() throws Exception {
+        try (NetworkClientDelegate ncd = newNetworkClientDelegate(false)) {
+            NetworkClientDelegate.UnsentRequest unsentRequest = newUnsentFindCoordinatorRequest();
+            ncd.add(unsentRequest);
+
+            // Mark node as disconnected
+            Node node = mockNode();
+            client.setUnreachable(node, REQUEST_TIMEOUT_MS);
+
+            // Poll with onClose=false (default)
+            ncd.poll(0, time.milliseconds());
+            assertTrue(ncd.hasAnyPendingRequests());
+
+            // Poll with onClose=true
+            ncd.poll(0, time.milliseconds(), true);
+
+            // Verify the request is absent since we're removing unsent requests on close.
+            assertFalse(ncd.hasAnyPendingRequests());
+            assertTrue(unsentRequest.future().isDone());
+            TestUtils.assertFutureThrows(NetworkException.class, unsentRequest.future());
+        }
+    }
+
     public NetworkClientDelegate newNetworkClientDelegate(boolean notifyMetadataErrorsViaErrorQueue) {
         return newNetworkClientDelegate(notifyMetadataErrorsViaErrorQueue, mock(AsyncConsumerMetrics.class));
     }

diff --git a/...test/java/org/apache/kafka/clients/consumer/internals/ShareConsumeRequestManagerTest.java b/...test/java/org/apache/kafka/clients/consumer/internals/ShareConsumeRequestManagerTest.java
@@ -402,7 +402,7 @@ public void testAcknowledgeOnClose() {
         // Remaining acknowledgements sent with close().
         Acknowledgements acknowledgements2 = getAcknowledgements(2, AcknowledgeType.ACCEPT, AcknowledgeType.REJECT);
 
-        shareConsumeRequestManager.acknowledgeOnClose(Map.of(tip0, new NodeAcknowledgements(0, acknowledgements2)),
+        CompletableFuture<Void> closeFuture = shareConsumeRequestManager.acknowledgeOnClose(Map.of(tip0, new NodeAcknowledgements(0, acknowledgements2)),
                 calculateDeadlineMs(time.timer(100)));
 
         assertEquals(1, shareConsumeRequestManager.sendAcknowledgements());
@@ -416,6 +416,25 @@ public void testAcknowledgeOnClose() {
         // Verifying that all 3 offsets were acknowledged as part of the final ShareAcknowledge on close.
         assertEquals(mergedAcks.getAcknowledgementsTypeMap(), completedAcknowledgements.get(0).get(tip0).getAcknowledgementsTypeMap());
         assertTrue(shareConsumeRequestManager.hasCompletedFetches());
+
+        // Polling once more to complete the closeFuture.
+        shareConsumeRequestManager.sendFetches();
+        assertTrue(closeFuture.isDone());
+    }
+
+    @Test
+    public void testCloseFutureCompletedWhenMemberIdIsNull() {
+        buildRequestManager(new MetricConfig(), new ByteArrayDeserializer(), new ByteArrayDeserializer(), null, ShareAcquireMode.BATCH_OPTIMIZED);
+        assignFromSubscribed(Collections.singleton(tp0));
+
+        CompletableFuture<Void> closeFuture = shareConsumeRequestManager.acknowledgeOnClose(Map.of(),
+                calculateDeadlineMs(time.timer(100)));
+
+        assertFalse(closeFuture.isDone());
+
+        // The subsequent poll should complete the closeFuture as the memberId is null.
+        shareConsumeRequestManager.sendFetches();
+        assertTrue(closeFuture.isDone());
     }
 
     @Test
@@ -2466,7 +2485,7 @@ public void testFetchOneNodeAtATimeForRecordLimitMode() {
                 .setErrorCode(Errors.NONE.code()));
         client.prepareResponseFrom(ShareFetchResponse.of(Errors.NONE, 0, partitionData, List.of(), 0), nodeId1);
         partitionData = buildPartitionDataMap(tip0, records, ShareCompletedFetchTest.acquiredRecords(1L, 1), Errors.NONE, Errors.NONE);
-        
+
         client.prepareResponseFrom(ShareFetchResponse.of(Errors.NONE, 0, partitionData, List.of(), 0), nodeId0);
         networkClientDelegate.poll(time.timer(0));
         assertTrue(shareConsumeRequestManager.hasCompletedFetches());
@@ -2731,25 +2750,27 @@ private void buildRequestManager(ShareAcquireMode shareAcquireMode) {
     private <K, V> void buildRequestManager(Deserializer<K> keyDeserializer,
                                             Deserializer<V> valueDeserializer,
                                             ShareAcquireMode shareAcquireMode) {
-        buildRequestManager(new MetricConfig(), keyDeserializer, valueDeserializer, shareAcquireMode);
+        buildRequestManager(new MetricConfig(), keyDeserializer, valueDeserializer, Uuid.randomUuid().toString(), shareAcquireMode);
     }
 
     private <K, V> void buildRequestManager(MetricConfig metricConfig,
                                             Deserializer<K> keyDeserializer,
                                             Deserializer<V> valueDeserializer,
+                                            String memberId,
                                             ShareAcquireMode shareAcquireMode) {
         LogContext logContext = new LogContext();
         SubscriptionState subscriptionState = new SubscriptionState(logContext, AutoOffsetResetStrategy.EARLIEST);
         buildRequestManager(metricConfig, keyDeserializer, valueDeserializer,
-                subscriptionState, logContext, shareAcquireMode);
+                subscriptionState, logContext, memberId, shareAcquireMode);
     }
 
     private <K, V> void buildRequestManager(MetricConfig metricConfig,
                                             Deserializer<K> keyDeserializer,
                                             Deserializer<V> valueDeserializer,
                                             SubscriptionState subscriptionState,
                                             LogContext logContext,
-                                            ShareAcquireMode shareAcquireMode) {
+                                            String memberId,
+                                                                                   ShareAcquireMode shareAcquireMode) {
         buildDependencies(metricConfig, subscriptionState, logContext);
         Deserializers<K, V> deserializers = new Deserializers<>(keyDeserializer, valueDeserializer, metrics);
         int maxWaitMs = 0;
@@ -2781,7 +2802,8 @@ private <K, V> void buildRequestManager(MetricConfig metricConfig,
                 new ShareFetchBuffer(logContext),
                 acknowledgementEventHandler,
                 metricsManager,
-                shareFetchCollector));
+                shareFetchCollector,
+                memberId));
     }
 
     private void buildDependencies(MetricConfig metricConfig,
@@ -2820,11 +2842,14 @@ public TestableShareConsumeRequestManager(LogContext logContext,
                                                   ShareFetchBuffer shareFetchBuffer,
                                                   ShareAcknowledgementEventHandler acknowledgementEventHandler,
                                                   ShareFetchMetricsManager metricsManager,
-                                                  ShareFetchCollector<K, V> fetchCollector) {
+                                                  ShareFetchCollector<K, V> fetchCollector,
+                                                  String memberId) {
             super(time, logContext, groupId, metadata, subscriptions, shareFetchConfig, shareFetchBuffer,
                 acknowledgementEventHandler, metricsManager, retryBackoffMs, 1000);
             this.shareFetchCollector = fetchCollector;
-            onMemberEpochUpdated(Optional.empty(), Uuid.randomUuid().toString());
+            if (memberId != null) {
+                onMemberEpochUpdated(Optional.empty(), memberId);
+            }
         }
 
         private ShareFetch<K, V> collectFetch() {
@@ -2914,7 +2939,7 @@ private List<UnsentRequest> removeUnsentRequestByNode(Node node) {
         }
 
         @Override
-        protected void checkDisconnects(final long currentTimeMs) {
+        protected void checkDisconnects(final long currentTimeMs, boolean onClose) {
             // any disconnects affecting requests that have already been transmitted will be handled
             // by NetworkClient, so we just need to check whether connections for any of the unsent
             // requests have been disconnected; if they have, then we complete the corresponding future