You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by GitBox <gi...@apache.org> on 2018/04/09 19:39:27 UTC

[GitHub] sijie closed pull request #1525: Compaction considers messages with empty payload as deleting the key

sijie closed pull request #1525: Compaction considers messages with empty payload as deleting the key
URL: https://github.com/apache/incubator-pulsar/pull/1525
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java b/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java
index 4e628bc5f8..9ee31acf42 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java
@@ -113,7 +113,8 @@ public static boolean isBatch(RawMessage msg) {
                     messagesRetained++;
                     Commands.serializeSingleMessageInBatchWithPayload(singleMessageMetadataBuilder,
                                                                       singleMessagePayload, batchBuffer);
-                } else if (filter.test(singleMessageMetadataBuilder.getPartitionKey(), id)) {
+                } else if (filter.test(singleMessageMetadataBuilder.getPartitionKey(), id)
+                           && singleMessagePayload.readableBytes() > 0) {
                     messagesRetained++;
                     Commands.serializeSingleMessageInBatchWithPayload(singleMessageMetadataBuilder,
                                                                       singleMessagePayload, batchBuffer);
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java b/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java
index fbad47ea30..2eaa8d0c9c 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java
@@ -38,6 +38,7 @@
 import org.apache.bookkeeper.client.BKException;
 import org.apache.bookkeeper.client.BookKeeper;
 import org.apache.bookkeeper.client.LedgerHandle;
+import org.apache.commons.lang3.tuple.Pair;
 
 import org.apache.pulsar.broker.ServiceConfiguration;
 import org.apache.pulsar.common.api.Commands;
@@ -122,9 +123,9 @@ private void phaseOneLoop(RawReader reader,
                                          id, ioe);
                             }
                         } else {
-                            String key = extractKey(m);
-                            if (key != null) {
-                                latestForKey.put(key, id);
+                            Pair<String,Integer> keyAndSize = extractKeyAndSize(m);
+                            if (keyAndSize != null) {
+                                latestForKey.put(keyAndSize.getLeft(), id);
                             }
                         }
 
@@ -214,10 +215,11 @@ private void phaseTwoLoop(RawReader reader, MessageId to, Map<String, MessageId>
                             messageToAdd = Optional.of(m);
                         }
                     } else {
-                        String key = extractKey(m);
-                        if (key == null) { // pass through messages without a key
+                        Pair<String,Integer> keyAndSize = extractKeyAndSize(m);
+                        if (keyAndSize == null) { // pass through messages without a key
                             messageToAdd = Optional.of(m);
-                        } else if (latestForKey.get(key).equals(id)) {
+                        } else if (latestForKey.get(keyAndSize.getLeft()).equals(id)
+                                   && keyAndSize.getRight() > 0) {
                             messageToAdd = Optional.of(m);
                         } else {
                             m.close();
@@ -307,11 +309,11 @@ private void phaseTwoLoop(RawReader reader, MessageId to, Map<String, MessageId>
         return bkf;
     }
 
-    private static String extractKey(RawMessage m) {
+    private static Pair<String,Integer> extractKeyAndSize(RawMessage m) {
         ByteBuf headersAndPayload = m.getHeadersAndPayload();
         MessageMetadata msgMetadata = Commands.parseMessageMetadata(headersAndPayload);
         if (msgMetadata.hasPartitionKey()) {
-            return msgMetadata.getPartitionKey();
+            return Pair.of(msgMetadata.getPartitionKey(), headersAndPayload.readableBytes());
         } else {
             return null;
         }
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java
index a0f0f972e4..22e74f21a9 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java
@@ -512,4 +512,72 @@ public void testKeyLessMessagesPassThrough() throws Exception {
         }
     }
 
+
+    @Test
+    public void testEmptyPayloadDeletes() throws Exception {
+        String topic = "persistent://my-property/use/my-ns/my-topic1";
+
+        // subscribe before sending anything, so that we get all messages
+        pulsarClient.newConsumer().topic(topic).subscriptionName("sub1")
+            .readCompacted(true).subscribe().close();
+
+        try (Producer producerNormal = pulsarClient.newProducer().topic(topic).create();
+             Producer producerBatch = pulsarClient.newProducer().topic(topic).maxPendingMessages(3)
+                .enableBatching(true).batchingMaxMessages(3)
+                .batchingMaxPublishDelay(1, TimeUnit.HOURS).create()) {
+
+            // key0 persists through it all
+            producerNormal.sendAsync(MessageBuilder.create()
+                                     .setKey("key0")
+                                     .setContent("my-message-0".getBytes()).build()).get();
+
+            // key1 is added but then deleted
+            producerNormal.sendAsync(MessageBuilder.create()
+                                     .setKey("key1")
+                                     .setContent("my-message-1".getBytes()).build()).get();
+
+            producerNormal.sendAsync(MessageBuilder.create()
+                                     .setKey("key1").build()).get();
+
+            // key2 is added but deleted in same batch
+            producerBatch.sendAsync(MessageBuilder.create()
+                                    .setKey("key2")
+                                    .setContent("my-message-2".getBytes()).build());
+            producerBatch.sendAsync(MessageBuilder.create()
+                                    .setKey("key3")
+                                    .setContent("my-message-3".getBytes()).build());
+            producerBatch.sendAsync(MessageBuilder.create()
+                                    .setKey("key2").build()).get();
+
+            // key3 is added in previous batch, deleted in this batch
+            producerBatch.sendAsync(MessageBuilder.create()
+                                    .setKey("key3").build());
+            producerBatch.sendAsync(MessageBuilder.create()
+                                    .setKey("key4")
+                                    .setContent("my-message-3".getBytes()).build());
+            producerBatch.sendAsync(MessageBuilder.create()
+                                    .setKey("key4").build()).get();
+
+            // key4 is added, deleted, then resurrected
+            producerNormal.sendAsync(MessageBuilder.create()
+                                     .setKey("key4")
+                                     .setContent("my-message-4".getBytes()).build()).get();
+        }
+
+        // compact the topic
+        Compactor compactor = new TwoPhaseCompactor(conf, pulsarClient, bk, compactionScheduler);
+        compactor.compact(topic).get();
+
+        try (Consumer consumer = pulsarClient.newConsumer().topic(topic)
+                .subscriptionName("sub1").readCompacted(true).subscribe()){
+            Message message1 = consumer.receive();
+            Assert.assertEquals(message1.getKey(), "key0");
+            Assert.assertEquals(new String(message1.getData()), "my-message-0");
+
+            Message message2 = consumer.receive();
+            Assert.assertEquals(message2.getKey(), "key4");
+            Assert.assertEquals(new String(message2.getData()), "my-message-4");
+        }
+    }
+
 }
diff --git a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java
index 7714b993c2..056064a6ca 100644
--- a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java
+++ b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java
@@ -33,10 +33,10 @@
 import com.google.common.base.Preconditions;
 
 public class MessageBuilderImpl<T> implements MessageBuilder<T> {
-
+    private static final ByteBuffer EMPTY_CONTENT = ByteBuffer.allocate(0);
     private final MessageMetadata.Builder msgMetadataBuilder = MessageMetadata.newBuilder();
     private final Schema<T> schema;
-    private ByteBuffer content;
+    private ByteBuffer content = EMPTY_CONTENT;
 
     public MessageBuilderImpl(Schema<T> schema) {
         this.schema = schema;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services