You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by GitBox <gi...@apache.org> on 2018/09/18 21:46:50 UTC

[GitHub] jerrypeng commented on a change in pull request #2605: implement topic routing on a per record basis

jerrypeng commented on a change in pull request #2605: implement topic routing on a per record basis
URL: https://github.com/apache/incubator-pulsar/pull/2605#discussion_r218606215
 
 

 ##########
 File path: pulsar-functions/instance/src/main/java/org/apache/pulsar/functions/sink/PulsarSink.java
 ##########
 @@ -60,140 +62,166 @@
     private final String fqfn;
 
     private interface PulsarSinkProcessor<T> {
-        void initializeOutputProducer(String outputTopic, Schema<T> schema, String fqfn) throws Exception;
 
         TypedMessageBuilder<T> newMessage(Record<T> record) throws Exception;
 
         void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> record) throws Exception;
 
-        abstract void close() throws Exception;
+        void close() throws Exception;
     }
 
-    private class PulsarSinkAtMostOnceProcessor implements PulsarSinkProcessor<T> {
-        private Producer<T> producer;
+    private abstract class PulsarSinkProcessorBase implements PulsarSinkProcessor<T> {
+        protected Map<String, Producer<T>> publishProducers = new ConcurrentHashMap<>();
+        protected Schema schema;
 
-        @Override
-        public void initializeOutputProducer(String outputTopic, Schema<T> schema, String fqfn) throws Exception {
-            this.producer = AbstractOneOuputTopicProducers.createProducer(
-                    client, pulsarSinkConfig.getTopic(), null, schema, fqfn);
+        protected PulsarSinkProcessorBase(Schema schema) {
+            this.schema = schema;
         }
 
-        @Override
-        public TypedMessageBuilder<T> newMessage(Record<T> record) {
-            return producer.newMessage();
+        public <T> Producer<T> createProducer(PulsarClient client, String topic, String producerName, Schema<T> schema, String fqfn)
+                throws PulsarClientException {
+            ProducerBuilder<T> builder = client.newProducer(schema)
+                    .blockIfQueueFull(true)
+                    .enableBatching(true)
+                    .batchingMaxPublishDelay(1, TimeUnit.MILLISECONDS)
+                    .compressionType(CompressionType.LZ4)
+                    .hashingScheme(HashingScheme.Murmur3_32Hash) //
+                    .messageRoutingMode(MessageRoutingMode.CustomPartition)
+                    .messageRouter(FunctionResultRouter.of())
+                    .topic(topic);
+            if (producerName != null) {
+                builder.producerName(producerName);
+            }
+
+            return builder
+                    .property("application", "pulsarfunction")
+                    .property("fqfn", fqfn).create();
         }
 
-        @Override
-        public void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> record) throws Exception {
-            msg.sendAsync();
+        protected Producer<T> getProducer(String destinationTopic) {
+            return getProducer(destinationTopic, null, destinationTopic);
         }
 
-        @Override
-        public void close() throws Exception {
-            if (null != producer) {
+        protected Producer<T> getProducer(String producerId, String producerName, String topicName) {
+
+            Producer<T> producer = publishProducers.get(producerId);
+
+            if (producer == null) {
                 try {
-                    producer.close();
+                    Producer<T> newProducer = createProducer(
+                            client,
+                            topicName,
+                            producerName,
+                            schema,
+                            fqfn);
+
+                    Producer<T> existingProducer = publishProducers.putIfAbsent(producerId, newProducer);
+
+                    if (existingProducer != null) {
+                        // The value in the map was not updated after the concurrent put
+                        newProducer.close();
+                        producer = existingProducer;
+                    } else {
+                        producer = newProducer;
+                    }
+
                 } catch (PulsarClientException e) {
-                    log.warn("Fail to close producer for processor {}", pulsarSinkConfig.getTopic(), e);
+                    log.error("Failed to create Producer while doing user publish", e);
+                    throw new RuntimeException(e);
                 }
             }
+            return producer;
         }
-    }
-
-    private class PulsarSinkAtLeastOnceProcessor implements PulsarSinkProcessor<T> {
-        private Producer<T> producer;
 
         @Override
-        public void initializeOutputProducer(String outputTopic, Schema<T> schema, String fqfn) throws Exception {
-            this.producer = AbstractOneOuputTopicProducers.createProducer(
-                    client, pulsarSinkConfig.getTopic(), null, schema, fqfn);
+        public void close() throws Exception {
+            List<CompletableFuture<Void>> closeFutures = new ArrayList<>(publishProducers.size());
+            for (Map.Entry<String, Producer<T>> entry: publishProducers.entrySet()) {
+                String topicId = entry.getKey();
+                Producer<T> producer = entry.getValue();
+                closeFutures.add(producer.closeAsync().exceptionally(throwable -> {
+                    log.warn("Fail to close producer for output topic {}", topicId, throwable);
+                    return null;
+                }));
+            }
+            try {
+                FutureUtils.result(FutureUtils.collect(closeFutures));
+            } catch (Exception e) {
+                log.warn("Fail to close all the producers", e);
+            }
+        }
+    }
+
+    private class PulsarSinkAtMostOnceProcessor extends PulsarSinkProcessorBase {
+        public PulsarSinkAtMostOnceProcessor(Schema schema) {
+            super(schema);
         }
 
         @Override
         public TypedMessageBuilder<T> newMessage(Record<T> record) {
-            return producer.newMessage();
+            if (record.getDestinationTopic().isPresent()) {
+                return getProducer(record.getDestinationTopic().get()).newMessage();
+            }
+            return getProducer(pulsarSinkConfig.getTopic()).newMessage();
         }
 
         @Override
         public void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> record) throws Exception {
-            msg.sendAsync().thenAccept(messageId -> record.ack());
+            msg.sendAsync();
+        }
+    }
+
+    private class PulsarSinkAtLeastOnceProcessor extends PulsarSinkAtMostOnceProcessor {
+        public PulsarSinkAtLeastOnceProcessor(Schema schema) {
+            super(schema);
         }
 
         @Override
-        public void close() throws Exception {
-            if (null != producer) {
-                try {
-                    producer.close();
-                } catch (PulsarClientException e) {
-                    log.warn("Fail to close producer for processor {}", pulsarSinkConfig.getTopic(), e);
-                }
-            }
+        public void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> record) throws Exception {
+            msg.sendAsync().thenAccept(messageId -> record.ack());
         }
     }
 
-    private class PulsarSinkEffectivelyOnceProcessor implements PulsarSinkProcessor<T>, ConsumerEventListener {
+    private class PulsarSinkEffectivelyOnceProcessor extends PulsarSinkProcessorBase {
 
-        @Getter(AccessLevel.PACKAGE)
-        protected Producers<T> outputProducer;
 
-        @Override
-        public void initializeOutputProducer(String outputTopic, Schema<T> schema, String fqfn) throws Exception {
-            outputProducer = new MultiConsumersOneOuputTopicProducers<T>(client, outputTopic, schema, fqfn);
-            outputProducer.initialize();
+        public PulsarSinkEffectivelyOnceProcessor(Schema schema) {
+            super(schema);
         }
 
         @Override
         public TypedMessageBuilder<T> newMessage(Record<T> record) throws Exception {
-            // Route message to appropriate partition producer
-            return outputProducer.getProducer(record.getPartitionId().get()).newMessage();
+            if (!record.getPartitionId().isPresent()) {
+                throw new RuntimeException("PartitionId needs to be specified for every record while in Effectively-once mode");
+            }
+            if (record.getDestinationTopic().isPresent()) {
 
 Review comment:
   Oh I think we can just do this:
   
   ```
   record.getDestinationTopic().orElse(pulsarSinkConfig.getTopic())
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services