You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2019/07/31 05:11:25 UTC

[GitHub] [incubator-pinot] jasperjiaguo commented on a change in pull request #4474: An auto recommendation for inverted index

jasperjiaguo commented on a change in pull request #4474: An auto recommendation for inverted index
URL: https://github.com/apache/incubator-pinot/pull/4474#discussion_r309040882
 
 

 ##########
 File path: pinot-tools/src/main/java/org/apache/pinot/tools/tuner/strategy/FrequencyImpl.java
 ##########
 @@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.tools.tuner.strategy;
+
+import io.vavr.Tuple2;
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.annotation.Nonnull;
+import org.apache.commons.math.fraction.BigFraction;
+import org.apache.pinot.tools.tuner.meta.manager.MetaManager;
+import org.apache.pinot.tools.tuner.query.src.stats.wrapper.AbstractQueryStats;
+import org.apache.pinot.tools.tuner.query.src.stats.wrapper.IndexSuggestQueryStatsImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class FrequencyImpl implements TuningStrategy {
+  private static final Logger LOGGER = LoggerFactory.getLogger(FrequencyImpl.class);
+
+  private static final String NUM_QUERIES_COUNT = "PINOT_TUNER_COUNT*";
+
+  public final static String DIMENSION_REGEX = "(?:(\\w+) ((?:NOT )?IN) (\\(.+?\\)))|(?:(\\w+) (=|<>|!=) (.+?)[ |$)])";
+  public final static long NO_IN_FILTER_THRESHOLD = 0;
+  public final static long CARD_THRESHOLD_ONE = 1;
+
+  public final static long NO_PROCESSED_THRESH = 0;
+
+  public final static Pattern _dimensionPattern = Pattern.compile(DIMENSION_REGEX);
+  private HashSet<String> _tableNamesWithoutType;
+  private long _numEntriesScannedThreshold;
+  private long _cardinalityThreshold;
+  private long _numQueriesThreshold;
+
+  private FrequencyImpl(Builder builder) {
+    _tableNamesWithoutType = builder._tableNamesWithoutType;
+    _numEntriesScannedThreshold = builder._numEntriesScannedThreshold;
+    _cardinalityThreshold = builder._cardinalityThreshold;
+    _numQueriesThreshold = builder._numQueriesThreshold;
+  }
+
+  public static final class Builder {
+    private HashSet<String> _tableNamesWithoutType = new HashSet<>();
+    private long _numEntriesScannedThreshold = NO_IN_FILTER_THRESHOLD;
+    private long _cardinalityThreshold = CARD_THRESHOLD_ONE;
+    private long _numQueriesThreshold = NO_PROCESSED_THRESH;
+
+    public Builder() {
+    }
+
+    @Nonnull
+    public FrequencyImpl build() {
+      return new FrequencyImpl(this);
+    }
+
+    /**
+     * set the tables to work on, other tables will be filtered out
+     * @param val set of table names without type
+     * @return
+     */
+    @Nonnull
+    public Builder setTableNamesWithoutType(@Nonnull HashSet<String> val) {
+      _tableNamesWithoutType = val;
+      return this;
+    }
+
+    /**
+     * set the threshold for _numEntriesScannedInFilter, the queries with _numEntriesScannedInFilter below this will be filtered out
+     * @param val
+     * @return
+     */
+    @Nonnull
+    public Builder setNumEntriesScannedThreshold(long val) {
+      _numEntriesScannedThreshold = val;
+      return this;
+    }
+
+    /**
+     * set the cardinality threshold, column with cardinality below this will be ignored,
+     * setting a high value will force the system to ignore low card columns
+     * @param val cardinality threshold, default to 1
+     * @return
+     */
+    @Nonnull
+    public Builder setCardinalityThreshold(long val) {
+      _cardinalityThreshold = val;
+      return this;
+    }
+
+    /**
+     * set the minimum number of records scanned to give a recommendation
+     * @param val minimum number of records scanned to give a recommendation, default to 0
+     * @return
+     */
+    @Nonnull
+    public Builder setNumQueriesThreshold(long val) {
+      _numQueriesThreshold = val;
+      return this;
+    }
+  }
+
+  @Override
+  public boolean filter(AbstractQueryStats queryStats) {
+    IndexSuggestQueryStatsImpl indexSuggestQueryStatsImpl = (IndexSuggestQueryStatsImpl) queryStats;
+    long numEntriesScannedInFilter = Long.parseLong(indexSuggestQueryStatsImpl.getNumEntriesScannedInFilter());
+    return (_tableNamesWithoutType == null || _tableNamesWithoutType.isEmpty() || _tableNamesWithoutType
 
 Review comment:
   We will not skip the numEntriesScannedThreshold check because it's (_tableNamesWithoutType == null || ... || ...) **&&** (numEntriesScannedInFilter > _numEntriesScannedThreshold)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org