You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by jl...@apache.org on 2019/10/02 19:33:33 UTC

[incubator-pinot] branch check-hll created (now 47455e8)

This is an automated email from the ASF dual-hosted git repository.

jlli pushed a change to branch check-hll
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git.


      at 47455e8  Modify test to compare the time spent of fastHLL and distinctCountHLL

This branch includes the following new commits:

     new 47455e8  Modify test to compare the time spent of fastHLL and distinctCountHLL

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 01/01: Modify test to compare the time spent of fastHLL and distinctCountHLL

Posted by jl...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jlli pushed a commit to branch check-hll
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 47455e8e48796c5ed85021db32665c094498d58f
Author: jackjlli <jl...@linkedin.com>
AuthorDate: Wed Oct 2 12:33:04 2019 -0700

    Modify test to compare the time spent of fastHLL and distinctCountHLL
---
 .../apache/pinot/queries/FastHllQueriesTest.java   | 59 ++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
index 218ed07..7371b49 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
@@ -21,17 +21,23 @@ package org.apache.pinot.queries;
 import com.clearspring.analytics.stream.cardinality.HyperLogLog;
 import java.io.File;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.common.data.FieldSpec;
+import org.apache.pinot.common.data.MetricFieldSpec;
 import org.apache.pinot.common.data.Schema;
 import org.apache.pinot.common.response.broker.BrokerResponseNative;
 import org.apache.pinot.common.segment.ReadMode;
+import org.apache.pinot.core.data.GenericRow;
 import org.apache.pinot.core.data.manager.SegmentDataManager;
 import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager;
+import org.apache.pinot.core.data.readers.AvroRecordReader;
+import org.apache.pinot.core.data.readers.GenericRowRecordReader;
+import org.apache.pinot.core.data.readers.RecordReader;
 import org.apache.pinot.core.indexsegment.IndexSegment;
 import org.apache.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
 import org.apache.pinot.core.indexsegment.immutable.ImmutableSegment;
@@ -42,8 +48,8 @@ import org.apache.pinot.core.operator.query.AggregationGroupByOperator;
 import org.apache.pinot.core.operator.query.AggregationOperator;
 import org.apache.pinot.core.query.aggregation.groupby.AggregationGroupByResult;
 import org.apache.pinot.core.query.aggregation.groupby.GroupKeyGenerator;
-import org.apache.pinot.core.segment.creator.SegmentIndexCreationDriver;
 import org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.core.startree.hll.HllUtil;
 import org.apache.pinot.startree.hll.HllConfig;
 import org.testng.Assert;
 import org.testng.annotations.Test;
@@ -84,6 +90,9 @@ public class FastHllQueriesTest extends BaseQueriesTest {
       " WHERE column1 > 100000000" + " AND column3 BETWEEN 20000000 AND 1000000000" + " AND column5 = 'gFuH'"
           + " AND (column6 < 500000000 OR column11 NOT IN ('t', 'P'))" + " AND daysSinceEpoch = 126164076";
 
+  private static final String BASE_DISTINCT_COUNT_QUERY =
+      "SELECT DISTINCTCOUNTHLL(newColumn17HLL), DISTINCTCOUNTHLL(newColumn18HLL) FROM testTable";
+
   private IndexSegment _indexSegment;
   // Contains 2 identical index segments
   private List<SegmentDataManager> _segmentDataManagers;
@@ -185,18 +194,41 @@ public class FastHllQueriesTest extends BaseQueriesTest {
     Assert.assertEquals(((HyperLogLog) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).cardinality(), 691L);
 
     // Test inter segments base query
+    long startTime = System.currentTimeMillis();
     BrokerResponseNative brokerResponse = getBrokerResponseForQuery(BASE_QUERY);
+    long timeForFastHLLQuery = System.currentTimeMillis() - startTime;
     QueriesTestUtils
         .testInterSegmentAggregationResult(brokerResponse, 120000L, 0L, 240000L, 120000L, new String[]{"21", "1762"});
+
+    startTime = System.currentTimeMillis();
+    getBrokerResponseForQuery(BASE_DISTINCT_COUNT_QUERY);
+    long timeForDistinctCountHLLQuery = System.currentTimeMillis() - startTime;
+    Assert.assertTrue(timeForFastHLLQuery > timeForDistinctCountHLLQuery);
+
     // Test inter segments query with filter
+    startTime = System.currentTimeMillis();
     brokerResponse = getBrokerResponseForQueryWithFilter(BASE_QUERY);
+    timeForFastHLLQuery = System.currentTimeMillis() - startTime;
     QueriesTestUtils.testInterSegmentAggregationResult(brokerResponse, 24516L, 336536L, 49032L, 120000L,
         new String[]{"17", "1197"});
+
+    startTime = System.currentTimeMillis();
+    getBrokerResponseForQueryWithFilter(BASE_DISTINCT_COUNT_QUERY);
+    timeForDistinctCountHLLQuery = System.currentTimeMillis() - startTime;
+    Assert.assertTrue(timeForFastHLLQuery > timeForDistinctCountHLLQuery);
+
     // Test inter segments query with group-by
+    startTime = System.currentTimeMillis();
     brokerResponse = getBrokerResponseForQuery(BASE_QUERY + GROUP_BY);
+    timeForFastHLLQuery = System.currentTimeMillis() - startTime;
     QueriesTestUtils
         .testInterSegmentAggregationResult(brokerResponse, 120000L, 0L, 360000L, 120000L, new String[]{"21", "1762"});
 
+    startTime = System.currentTimeMillis();
+    getBrokerResponseForQuery(BASE_DISTINCT_COUNT_QUERY + GROUP_BY);
+    timeForDistinctCountHLLQuery = System.currentTimeMillis() - startTime;
+    Assert.assertTrue(timeForFastHLLQuery > timeForDistinctCountHLLQuery);
+
     deleteSegment();
   }
 
@@ -253,8 +285,29 @@ public class FastHllQueriesTest extends BaseQueriesTest {
     }
 
     // Build the index segment
-    SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl();
-    driver.init(segmentGeneratorConfig);
+    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
+    RecordReader recordReader = new AvroRecordReader(new File(filePath), segmentGeneratorConfig.getSchema());
+
+    List<GenericRow> segmentRecords = new ArrayList<>();
+    while (recordReader.hasNext()) {
+      GenericRow segmentRecord = recordReader.next();
+      Object column17Hll = segmentRecord.getValue("column17_HLL");
+      HyperLogLog logLog = HllUtil.convertStringToHll((String) column17Hll);
+      segmentRecord.putField("newColumn17HLL", logLog.getBytes());
+
+      Object column18Hll = segmentRecord.getValue("column18_HLL");
+      HyperLogLog logLog2 = HllUtil.convertStringToHll((String) column18Hll);
+      segmentRecord.putField("newColumn18HLL", logLog2.getBytes());
+
+      segmentRecords.add(segmentRecord);
+    }
+
+    FieldSpec column17Hll = new MetricFieldSpec("newColumn17HLL", FieldSpec.DataType.BYTES);
+    segmentGeneratorConfig.getSchema().addField(column17Hll);
+    FieldSpec column18Hll = new MetricFieldSpec("newColumn18HLL", FieldSpec.DataType.BYTES);
+    segmentGeneratorConfig.getSchema().addField(column18Hll);
+
+    driver.init(segmentGeneratorConfig, new GenericRowRecordReader(segmentRecords, segmentGeneratorConfig.getSchema()));
     driver.build();
 
     ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), ReadMode.heap);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org