You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by jl...@apache.org on 2019/10/02 19:33:34 UTC

[incubator-pinot] 01/01: Modify test to compare the time spent of fastHLL and distinctCountHLL

This is an automated email from the ASF dual-hosted git repository.

jlli pushed a commit to branch check-hll
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 47455e8e48796c5ed85021db32665c094498d58f
Author: jackjlli <jl...@linkedin.com>
AuthorDate: Wed Oct 2 12:33:04 2019 -0700

    Modify test to compare the time spent of fastHLL and distinctCountHLL
---
 .../apache/pinot/queries/FastHllQueriesTest.java   | 59 ++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
index 218ed07..7371b49 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
@@ -21,17 +21,23 @@ package org.apache.pinot.queries;
 import com.clearspring.analytics.stream.cardinality.HyperLogLog;
 import java.io.File;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.common.data.FieldSpec;
+import org.apache.pinot.common.data.MetricFieldSpec;
 import org.apache.pinot.common.data.Schema;
 import org.apache.pinot.common.response.broker.BrokerResponseNative;
 import org.apache.pinot.common.segment.ReadMode;
+import org.apache.pinot.core.data.GenericRow;
 import org.apache.pinot.core.data.manager.SegmentDataManager;
 import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager;
+import org.apache.pinot.core.data.readers.AvroRecordReader;
+import org.apache.pinot.core.data.readers.GenericRowRecordReader;
+import org.apache.pinot.core.data.readers.RecordReader;
 import org.apache.pinot.core.indexsegment.IndexSegment;
 import org.apache.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
 import org.apache.pinot.core.indexsegment.immutable.ImmutableSegment;
@@ -42,8 +48,8 @@ import org.apache.pinot.core.operator.query.AggregationGroupByOperator;
 import org.apache.pinot.core.operator.query.AggregationOperator;
 import org.apache.pinot.core.query.aggregation.groupby.AggregationGroupByResult;
 import org.apache.pinot.core.query.aggregation.groupby.GroupKeyGenerator;
-import org.apache.pinot.core.segment.creator.SegmentIndexCreationDriver;
 import org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.core.startree.hll.HllUtil;
 import org.apache.pinot.startree.hll.HllConfig;
 import org.testng.Assert;
 import org.testng.annotations.Test;
@@ -84,6 +90,9 @@ public class FastHllQueriesTest extends BaseQueriesTest {
       " WHERE column1 > 100000000" + " AND column3 BETWEEN 20000000 AND 1000000000" + " AND column5 = 'gFuH'"
           + " AND (column6 < 500000000 OR column11 NOT IN ('t', 'P'))" + " AND daysSinceEpoch = 126164076";
 
+  private static final String BASE_DISTINCT_COUNT_QUERY =
+      "SELECT DISTINCTCOUNTHLL(newColumn17HLL), DISTINCTCOUNTHLL(newColumn18HLL) FROM testTable";
+
   private IndexSegment _indexSegment;
   // Contains 2 identical index segments
   private List<SegmentDataManager> _segmentDataManagers;
@@ -185,18 +194,41 @@ public class FastHllQueriesTest extends BaseQueriesTest {
     Assert.assertEquals(((HyperLogLog) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).cardinality(), 691L);
 
     // Test inter segments base query
+    long startTime = System.currentTimeMillis();
     BrokerResponseNative brokerResponse = getBrokerResponseForQuery(BASE_QUERY);
+    long timeForFastHLLQuery = System.currentTimeMillis() - startTime;
     QueriesTestUtils
         .testInterSegmentAggregationResult(brokerResponse, 120000L, 0L, 240000L, 120000L, new String[]{"21", "1762"});
+
+    startTime = System.currentTimeMillis();
+    getBrokerResponseForQuery(BASE_DISTINCT_COUNT_QUERY);
+    long timeForDistinctCountHLLQuery = System.currentTimeMillis() - startTime;
+    Assert.assertTrue(timeForFastHLLQuery > timeForDistinctCountHLLQuery);
+
     // Test inter segments query with filter
+    startTime = System.currentTimeMillis();
     brokerResponse = getBrokerResponseForQueryWithFilter(BASE_QUERY);
+    timeForFastHLLQuery = System.currentTimeMillis() - startTime;
     QueriesTestUtils.testInterSegmentAggregationResult(brokerResponse, 24516L, 336536L, 49032L, 120000L,
         new String[]{"17", "1197"});
+
+    startTime = System.currentTimeMillis();
+    getBrokerResponseForQueryWithFilter(BASE_DISTINCT_COUNT_QUERY);
+    timeForDistinctCountHLLQuery = System.currentTimeMillis() - startTime;
+    Assert.assertTrue(timeForFastHLLQuery > timeForDistinctCountHLLQuery);
+
     // Test inter segments query with group-by
+    startTime = System.currentTimeMillis();
     brokerResponse = getBrokerResponseForQuery(BASE_QUERY + GROUP_BY);
+    timeForFastHLLQuery = System.currentTimeMillis() - startTime;
     QueriesTestUtils
         .testInterSegmentAggregationResult(brokerResponse, 120000L, 0L, 360000L, 120000L, new String[]{"21", "1762"});
 
+    startTime = System.currentTimeMillis();
+    getBrokerResponseForQuery(BASE_DISTINCT_COUNT_QUERY + GROUP_BY);
+    timeForDistinctCountHLLQuery = System.currentTimeMillis() - startTime;
+    Assert.assertTrue(timeForFastHLLQuery > timeForDistinctCountHLLQuery);
+
     deleteSegment();
   }
 
@@ -253,8 +285,29 @@ public class FastHllQueriesTest extends BaseQueriesTest {
     }
 
     // Build the index segment
-    SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl();
-    driver.init(segmentGeneratorConfig);
+    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
+    RecordReader recordReader = new AvroRecordReader(new File(filePath), segmentGeneratorConfig.getSchema());
+
+    List<GenericRow> segmentRecords = new ArrayList<>();
+    while (recordReader.hasNext()) {
+      GenericRow segmentRecord = recordReader.next();
+      Object column17Hll = segmentRecord.getValue("column17_HLL");
+      HyperLogLog logLog = HllUtil.convertStringToHll((String) column17Hll);
+      segmentRecord.putField("newColumn17HLL", logLog.getBytes());
+
+      Object column18Hll = segmentRecord.getValue("column18_HLL");
+      HyperLogLog logLog2 = HllUtil.convertStringToHll((String) column18Hll);
+      segmentRecord.putField("newColumn18HLL", logLog2.getBytes());
+
+      segmentRecords.add(segmentRecord);
+    }
+
+    FieldSpec column17Hll = new MetricFieldSpec("newColumn17HLL", FieldSpec.DataType.BYTES);
+    segmentGeneratorConfig.getSchema().addField(column17Hll);
+    FieldSpec column18Hll = new MetricFieldSpec("newColumn18HLL", FieldSpec.DataType.BYTES);
+    segmentGeneratorConfig.getSchema().addField(column18Hll);
+
+    driver.init(segmentGeneratorConfig, new GenericRowRecordReader(segmentRecords, segmentGeneratorConfig.getSchema()));
     driver.build();
 
     ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), ReadMode.heap);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org