You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/01/12 00:27:26 UTC
[GitHub] [incubator-pinot] mcvsubbu commented on a change in pull request #6419: Compatibility test: streamOp

mcvsubbu commented on a change in pull request #6419:
URL: https://github.com/apache/incubator-pinot/pull/6419#discussion_r555378458



##########
File path: compatibility-verifier/compCheck.sh
##########
@@ -202,14 +206,14 @@ startService server "$newTargetDir"
 #$COMPAT_TESTER post-server-upgrade.yaml; if [ $? -ne 0 ]; then exit 1; fi
 
 # Upgrade complated, now do a rollback
-stopService controller "$newTargetDir"
-startService controller "$oldTargetDir"
+stopService server "$newTargetDir"

Review comment:
       thanks for moving this.

##########
File path: compatibility-verifier/compCheck.sh
##########
@@ -72,29 +72,31 @@ function checkoutAndBuild() {
   popd || exit 1
 }
 
-# Given a component and directory, start that version of the specific component 
+# Given a component and directory, start that version of the specific component
 function startService() {
   serviceName=$1
   dirName=$2
   # Upon start, save the pid of the process for a component into a file in /tmp/{component}.pid, which is then used to stop it
   pushd "$dirName"/pinot-tools/target/pinot-tools-pkg/bin  || exit 1
   if [ "$serviceName" = "zookeeper" ]; then
     sh -c 'echo $$ > $0/zookeeper.pid; exec ./pinot-admin.sh StartZookeeper' "${dirName}" &
-  elif [ "$serviceName" = "controller" ]; then 
+  elif [ "$serviceName" = "controller" ]; then
     sh -c 'echo $$ > $0/controller.pid; exec ./pinot-admin.sh StartController' "${dirName}" &
   elif [ "$serviceName" = "broker" ]; then
     sh -c 'echo $$ > $0/broker.pid; exec ./pinot-admin.sh StartBroker' "${dirName}" &
   elif [ "$serviceName" = "server" ]; then
     sh -c 'echo $$ > $0/server.pid; exec ./pinot-admin.sh StartServer' "${dirName}" &
-  fi 
+  elif [ "$serviceName" = "kafka" ]; then
+    sh -c 'echo $$ > $0/kafka.pid; exec ./pinot-admin.sh StartKafka -zkAddress localhost:2181/kafka' "${dirName}" &

Review comment:
       Does it take a default without the `-zkAddress` argument? If so, we should use that.

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/AvroSchemaKafkaAvroMessageDecoder.java
##########
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.compat.tests;
+
+import java.io.File;
+import java.util.Map;
+import java.util.Set;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.pinot.plugin.inputformat.avro.AvroRecordExtractor;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.data.readers.RecordExtractor;
+import org.apache.pinot.spi.stream.StreamMessageDecoder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class AvroSchemaKafkaAvroMessageDecoder implements StreamMessageDecoder<byte[]> {

Review comment:
       Can we pull this from ClusterTest?

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/AvroRowExtractor.java
##########
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.compat.tests;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.pinot.spi.data.readers.GenericRow;
+
+
+/**
+ * Extract avro record from generic row
+ */
+public class AvroRowExtractor {

Review comment:
       Can we use AvroRecordExtractor? We can add the avro package as dependency here?

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/StreamOp.java
##########
@@ -85,4 +116,118 @@ boolean runOp() {
   public void setTableConfigFileNames(List<String> tableConfigFileNames) {
     _tableConfigFileNames = tableConfigFileNames;
   }
+
+  public String getRecordReaderConfigFileName() {
+    return _recordReaderConfigFileName;
+  }
+
+  public void setRecordReaderConfigFileName(String recordReaderConfigFileName) {
+    _recordReaderConfigFileName = recordReaderConfigFileName;
+  }
+
+  @Override
+  boolean runOp() {
+    try {
+      File csvFile = new File(_inputDataFileName);
+      Map<String, String> streamConfigMap = JsonUtils.fileToObject(new File(_streamConfigFileName), HashMap.class);
+      final Map<String, Object> config = new HashMap<>();
+      config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:" + ClusterDescriptor.KAFKA_PORT);
+      config.put(AdminClientConfig.CLIENT_ID_CONFIG, "Kafka2AdminClient-" + UUID.randomUUID().toString());
+      config.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, 15000);
+      AdminClient adminClient = KafkaAdminClient.create(config);
+
+      // create kafka topic
+      String topicName = streamConfigMap.get("stream.kafka.topic.name");
+      int partitions = Integer.parseInt(streamConfigMap.get("stream.kafka.numPartitions"));
+      String partitionColumn = streamConfigMap.get("stream.kafka.partitionColumn");
+      NewTopic newTopic = new NewTopic(topicName, partitions, (short) 1);

Review comment:
       We should create the topic if it is not already there.
   
   Alternatively, create the topic in the shell script. That may be better. We can create as many topics as needed when we start kafka. Harrd-code the number of partitions is ok if we are doing it this way.

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/StreamOp.java
##########
@@ -85,4 +116,118 @@ boolean runOp() {
   public void setTableConfigFileNames(List<String> tableConfigFileNames) {
     _tableConfigFileNames = tableConfigFileNames;
   }
+
+  public String getRecordReaderConfigFileName() {
+    return _recordReaderConfigFileName;
+  }
+
+  public void setRecordReaderConfigFileName(String recordReaderConfigFileName) {
+    _recordReaderConfigFileName = recordReaderConfigFileName;
+  }
+
+  @Override
+  boolean runOp() {
+    try {
+      File csvFile = new File(_inputDataFileName);
+      Map<String, String> streamConfigMap = JsonUtils.fileToObject(new File(_streamConfigFileName), HashMap.class);
+      final Map<String, Object> config = new HashMap<>();
+      config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:" + ClusterDescriptor.KAFKA_PORT);
+      config.put(AdminClientConfig.CLIENT_ID_CONFIG, "Kafka2AdminClient-" + UUID.randomUUID().toString());
+      config.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, 15000);
+      AdminClient adminClient = KafkaAdminClient.create(config);
+
+      // create kafka topic
+      String topicName = streamConfigMap.get("stream.kafka.topic.name");
+      int partitions = Integer.parseInt(streamConfigMap.get("stream.kafka.numPartitions"));
+      String partitionColumn = streamConfigMap.get("stream.kafka.partitionColumn");
+      NewTopic newTopic = new NewTopic(topicName, partitions, (short) 1);
+      CreateTopicsResult createTopicsResult = adminClient.createTopics(Arrays.asList(newTopic));
+      try {
+        createTopicsResult.all().get();
+      } catch (InterruptedException | ExecutionException e) {
+        LOGGER.warn("Failed to create Kafka topic: {}, Exception: {}", newTopic.toString(), e);
+      }
+
+      List<Long> existingTotalDocs = new ArrayList<>();
+      List<String> tableNames = new ArrayList<>();
+
+      for (String tableConfigFileName : _tableConfigFileNames) {

Review comment:
       Only one table config name will be given, not an array

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/StreamOp.java
##########
@@ -85,4 +116,118 @@ boolean runOp() {
   public void setTableConfigFileNames(List<String> tableConfigFileNames) {
     _tableConfigFileNames = tableConfigFileNames;
   }
+
+  public String getRecordReaderConfigFileName() {
+    return _recordReaderConfigFileName;
+  }
+
+  public void setRecordReaderConfigFileName(String recordReaderConfigFileName) {
+    _recordReaderConfigFileName = recordReaderConfigFileName;
+  }
+
+  @Override
+  boolean runOp() {
+    try {
+      File csvFile = new File(_inputDataFileName);
+      Map<String, String> streamConfigMap = JsonUtils.fileToObject(new File(_streamConfigFileName), HashMap.class);

Review comment:
       Better to read this as a Properties  class.

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/StreamOp.java
##########
@@ -85,4 +116,118 @@ boolean runOp() {
   public void setTableConfigFileNames(List<String> tableConfigFileNames) {
     _tableConfigFileNames = tableConfigFileNames;
   }
+
+  public String getRecordReaderConfigFileName() {
+    return _recordReaderConfigFileName;
+  }
+
+  public void setRecordReaderConfigFileName(String recordReaderConfigFileName) {
+    _recordReaderConfigFileName = recordReaderConfigFileName;
+  }
+
+  @Override
+  boolean runOp() {
+    try {
+      File csvFile = new File(_inputDataFileName);
+      Map<String, String> streamConfigMap = JsonUtils.fileToObject(new File(_streamConfigFileName), HashMap.class);
+      final Map<String, Object> config = new HashMap<>();
+      config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:" + ClusterDescriptor.KAFKA_PORT);
+      config.put(AdminClientConfig.CLIENT_ID_CONFIG, "Kafka2AdminClient-" + UUID.randomUUID().toString());
+      config.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, 15000);
+      AdminClient adminClient = KafkaAdminClient.create(config);
+
+      // create kafka topic
+      String topicName = streamConfigMap.get("stream.kafka.topic.name");
+      int partitions = Integer.parseInt(streamConfigMap.get("stream.kafka.numPartitions"));
+      String partitionColumn = streamConfigMap.get("stream.kafka.partitionColumn");
+      NewTopic newTopic = new NewTopic(topicName, partitions, (short) 1);
+      CreateTopicsResult createTopicsResult = adminClient.createTopics(Arrays.asList(newTopic));
+      try {
+        createTopicsResult.all().get();
+      } catch (InterruptedException | ExecutionException e) {
+        LOGGER.warn("Failed to create Kafka topic: {}, Exception: {}", newTopic.toString(), e);
+      }
+
+      List<Long> existingTotalDocs = new ArrayList<>();
+      List<String> tableNames = new ArrayList<>();
+
+      for (String tableConfigFileName : _tableConfigFileNames) {
+        // get table config
+        TableConfig tableConfig = JsonUtils.fileToObject(new File(tableConfigFileName), TableConfig.class);
+
+        // get original rows
+        String tableName = tableConfig.getTableName();
+        tableNames.add(tableName);
+        existingTotalDocs.add(fetchExistingTotalDocs(tableConfig.getTableName()));
+      }
+
+      // push avro file to kafka
+      Schema avroSchema = StreamOpUtils.getAvroSchema(new File(_avroSchemaFileName));
+      StreamOpUtils.pushCsvIntoKafka(
+          csvFile,
+          avroSchema,
+          null,
+          _numRows,
+          getCSVRecordReaderConfig(),
+          "localhost:" + KafkaStarterUtils.DEFAULT_KAFKA_PORT,
+          topicName,
+          10000,
+          null,
+          partitionColumn);
+
+      for (int i = 0; i < tableNames.size(); i++) {
+        // verify number of rows increases as expected
+        String tableName = tableNames.get(i);
+        long targetTotalDocs = existingTotalDocs.get(i) + _numRows;
+        waitForDocsLoaded(tableName, targetTotalDocs, 60_000L);
+        LOGGER.info("Verified {} new rows in table: {}", _numRows, tableName);
+      }
+    } catch (Exception e) {
+      LOGGER.error("Failed to ingest stream data", e);
+      return false;
+    }
+    return true;
+  }
+
+  private RecordReaderConfig getCSVRecordReaderConfig() throws IOException {
+    CSVRecordReaderConfig recordReaderConfig = JsonUtils.fileToObject(new File(_recordReaderConfigFileName), CSVRecordReaderConfig.class);
+    return recordReaderConfig;
+  }
+
+  private long fetchExistingTotalDocs(String tableName) throws Exception  {
+    String query = "SELECT count(*) FROM " + tableName;
+    JsonNode response = ClusterTest.postQuery(query, ClusterDescriptor.BROKER_URL, false, "sql");
+    if (response == null) {
+      String errorMsg = String.format("Failed to query Table: %s", tableName);
+      LOGGER.error(errorMsg);
+      throw new RuntimeException(errorMsg);
+    }
+    if (response.has("hasPartialResults") && response.get("hasPartialResults").asBoolean()) {

Review comment:
       `hasPartialResults`, `totalDocs`, etc. should be defined some place. Can you check the class `V1Constants`? 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org