You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2023/12/28 04:52:03 UTC

(pinot) branch master updated: Create DateTimeGenerator and add it to data generator (#12206)

This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 50912eb0c4 Create DateTimeGenerator and add it to data generator (#12206)
50912eb0c4 is described below

commit 50912eb0c419151bdaa23fa5153d93dfab4bf9c6
Author: Shounak kulkarni <sh...@gmail.com>
AuthorDate: Thu Dec 28 10:21:55 2023 +0530

    Create DateTimeGenerator and add it to data generator (#12206)
---
 .../recommender/data/DataGenerationHelpers.java    | 35 ++++++--
 .../recommender/data/generator/DataGenerator.java  | 18 ++++-
 .../data/generator/DataGeneratorSpec.java          | 92 +++++++++++++++++++++-
 .../data/generator/DateTimeGenerator.java          | 51 ++++++++++++
 .../realtime/provisioning/MemoryEstimator.java     | 12 ++-
 .../tools/admin/command/GenerateDataCommand.java   | 32 ++------
 6 files changed, 201 insertions(+), 39 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java
index b4017abfa8..0450d56e87 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java
@@ -22,8 +22,8 @@ package org.apache.pinot.controller.recommender.data;
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
+import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang.math.IntRange;
@@ -34,6 +34,7 @@ import org.apache.pinot.controller.recommender.data.writer.AvroWriterSpec;
 import org.apache.pinot.controller.recommender.data.writer.CsvWriter;
 import org.apache.pinot.controller.recommender.data.writer.FileWriterSpec;
 import org.apache.pinot.controller.recommender.data.writer.JsonWriter;
+import org.apache.pinot.spi.data.DateTimeFieldSpec;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.TimeFieldSpec;
@@ -82,10 +83,16 @@ public final class DataGenerationHelpers {
     return dir;
   }
 
-  public static DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List<String> columns,
-      HashMap<String, FieldSpec.DataType> dataTypes, HashMap<String, FieldSpec.FieldType> fieldTypes,
-      HashMap<String, TimeUnit> timeUnits, HashMap<String, Integer> cardinality, HashMap<String, IntRange> range,
-      HashMap<String, Map<String, Object>> pattern, Map<String, Double> mvCountMap, Map<String, Integer> lengthMap) {
+  public static DataGeneratorSpec buildDataGeneratorSpec(Schema schema) {
+    final List<String> columns = new LinkedList<>();
+    final HashMap<String, FieldSpec.DataType> dataTypes = new HashMap<>();
+    final HashMap<String, FieldSpec.FieldType> fieldTypes = new HashMap<>();
+    final HashMap<String, TimeUnit> timeUnits = new HashMap<>();
+
+    final HashMap<String, Integer> cardinality = new HashMap<>();
+    final HashMap<String, IntRange> range = new HashMap<>();
+    final HashMap<String, String> granularityMap = new HashMap<>();
+    final HashMap<String, String> formatMap = new HashMap<>();
     for (final FieldSpec fs : schema.getAllFieldSpecs()) {
       String col = fs.getName();
       columns.add(col);
@@ -104,16 +111,28 @@ public final class DataGenerationHelpers {
           TimeFieldSpec tfs = (TimeFieldSpec) fs;
           timeUnits.put(col, tfs.getIncomingGranularitySpec().getTimeType());
           break;
+        case DATE_TIME:
+          DateTimeFieldSpec dtfs = (DateTimeFieldSpec) fs;
+          granularityMap.put(col, dtfs.getGranularity());
+          formatMap.put(col, dtfs.getFormat());
+          break;
 
         // forward compatibility with pattern generator
-        case DATE_TIME:
         case COMPLEX:
           break;
         default:
           throw new RuntimeException("Invalid field type.");
       }
     }
-    return new DataGeneratorSpec(columns, cardinality, range, pattern, mvCountMap, lengthMap, dataTypes, fieldTypes,
-        timeUnits);
+    return new DataGeneratorSpec.Builder()
+        .setColumns(columns)
+        .setDataTypeMap(dataTypes)
+        .setFieldTypeMap(fieldTypes)
+        .setTimeUnitMap(timeUnits)
+        .setCardinalityMap(cardinality)
+        .setRangeMap(range)
+        .setDateTimeGranularityMap(granularityMap)
+        .setDateTimeFormatMap(formatMap)
+        .build();
   }
 }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGenerator.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGenerator.java
index 4c818846ed..8a72ea60d6 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGenerator.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGenerator.java
@@ -28,6 +28,7 @@ import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.lang.math.IntRange;
 import org.apache.pinot.controller.recommender.data.DataGenerationHelpers;
+import org.apache.pinot.spi.data.DateTimeFieldSpec;
 import org.apache.pinot.spi.data.DimensionFieldSpec;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
@@ -63,7 +64,11 @@ public class DataGenerator {
       DataType dataType = _genSpec.getDataTypeMap().get(column);
 
       Generator generator;
-      if (_genSpec.getPatternMap().containsKey(column)) {
+      if (_genSpec.getDateTimeFormatMap().containsKey(column)
+          && _genSpec.getDateTimeGranularityMap().containsKey(column)) {
+        generator = new DateTimeGenerator(_genSpec.getDateTimeFormatMap().get(column),
+            _genSpec.getDateTimeGranularityMap().get(column));
+      } else if (_genSpec.getPatternMap().containsKey(column)) {
         generator = GeneratorFactory
             .getGeneratorFor(PatternType.valueOf(_genSpec.getPatternMap().get(column).get("type").toString()),
                 _genSpec.getPatternMap().get(column));
@@ -123,6 +128,12 @@ public class DataGenerator {
         spec = new TimeFieldSpec(new TimeGranularitySpec(dataType, genSpec.getTimeUnitMap().get(column), column));
         break;
 
+      case DATE_TIME:
+        String format = genSpec.getDateTimeFormatMap().get(column);
+        String granularity = genSpec.getDateTimeGranularityMap().get(column);
+        spec = new DateTimeFieldSpec(column, dataType, format, granularity);
+        break;
+
       default:
         throw new RuntimeException("Invalid Field type.");
     }
@@ -148,6 +159,9 @@ public class DataGenerator {
     Map<String, Integer> lengthMap = new HashMap<>();
     List<String> columnNames = new ArrayList<>();
 
+    final Map<String, String> dateTimeFormatMap = new HashMap<>();
+    final Map<String, String> dateTimeGranularityMap = new HashMap<>();
+
     int cardinalityValue = 5;
     int strLength = 5;
 
@@ -199,7 +213,7 @@ public class DataGenerator {
     String outputDir = Paths.get(System.getProperty("java.io.tmpdir"), "csv-data").toString();
     final DataGeneratorSpec spec =
         new DataGeneratorSpec(columnNames, cardinality, range, template, mvCountMap, lengthMap, dataTypes, fieldTypes,
-            timeUnits);
+            timeUnits, dateTimeFormatMap, dateTimeGranularityMap);
 
     final DataGenerator gen = new DataGenerator();
     gen.init(spec);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java
index f64a7a984b..550730ac9c 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java
@@ -41,6 +41,8 @@ public class DataGeneratorSpec {
   private final Map<String, DataType> _dataTypeMap;
   private final Map<String, FieldType> _fieldTypeMap;
   private final Map<String, TimeUnit> _timeUnitMap;
+  private final Map<String, String> _dateTimeFormatMap;
+  private final Map<String, String> _dateTimeGranularityMap;
 
   @Deprecated
   private FileFormat _outputFileFormat;
@@ -74,11 +76,15 @@ public class DataGeneratorSpec {
     _dataTypeMap = dataTypesMap;
     _fieldTypeMap = fieldTypesMap;
     _timeUnitMap = timeUnitMap;
+
+    _dateTimeFormatMap = new HashMap<>();
+    _dateTimeGranularityMap = new HashMap<>();
   }
 
   public DataGeneratorSpec(List<String> columns, Map<String, Integer> cardinalityMap, Map<String, IntRange> rangeMap,
       Map<String, Map<String, Object>> patternMap, Map<String, Double> mvCountMap, Map<String, Integer> lengthMap,
-      Map<String, DataType> dataTypesMap, Map<String, FieldType> fieldTypesMap, Map<String, TimeUnit> timeUnitMap) {
+      Map<String, DataType> dataTypesMap, Map<String, FieldType> fieldTypesMap, Map<String, TimeUnit> timeUnitMap,
+      Map<String, String> dateTimeFormatMap, Map<String, String> dateTimeGranularityMap) {
     _columns = columns;
     _cardinalityMap = cardinalityMap;
     _rangeMap = rangeMap;
@@ -89,6 +95,8 @@ public class DataGeneratorSpec {
     _dataTypeMap = dataTypesMap;
     _fieldTypeMap = fieldTypesMap;
     _timeUnitMap = timeUnitMap;
+    _dateTimeGranularityMap = dateTimeGranularityMap;
+    _dateTimeFormatMap = dateTimeFormatMap;
   }
 
   public Map<String, DataType> getDataTypeMap() {
@@ -139,6 +147,14 @@ public class DataGeneratorSpec {
     return _outputDir;
   }
 
+  public Map<String, String> getDateTimeFormatMap() {
+    return _dateTimeFormatMap;
+  }
+
+  public Map<String, String> getDateTimeGranularityMap() {
+    return _dateTimeGranularityMap;
+  }
+
   @Override
   public String toString() {
     final StringBuilder builder = new StringBuilder();
@@ -156,4 +172,78 @@ public class DataGeneratorSpec {
     builder.append(", output dir : " + _outputDir);
     return builder.toString();
   }
+
+  public static class Builder {
+    private List<String> _columns = new ArrayList<>();
+    private Map<String, Integer> _cardinalityMap = new HashMap<>();
+    private Map<String, IntRange> _rangeMap = new HashMap<>();
+    private Map<String, Map<String, Object>> _patternMap = new HashMap<>();
+    private Map<String, Double> _mvCountMap = new HashMap<>();
+    private Map<String, Integer> _lengthMap = new HashMap<>();
+    private Map<String, DataType> _dataTypeMap = new HashMap<>();
+    private Map<String, FieldType> _fieldTypeMap = new HashMap<>();
+    private Map<String, TimeUnit> _timeUnitMap = new HashMap<>();
+    private Map<String, String> _dateTimeFormatMap = new HashMap<>();
+    private Map<String, String> _dateTimeGranularityMap = new HashMap<>();
+
+    public DataGeneratorSpec build() {
+      return new DataGeneratorSpec(_columns, _cardinalityMap, _rangeMap, _patternMap, _mvCountMap, _lengthMap,
+          _dataTypeMap, _fieldTypeMap, _timeUnitMap, _dateTimeFormatMap, _dateTimeGranularityMap);
+    }
+
+    public Builder setColumns(List<String> columns) {
+      _columns = columns;
+      return this;
+    }
+
+    public Builder setCardinalityMap(Map<String, Integer> cardinalityMap) {
+      _cardinalityMap = cardinalityMap;
+      return this;
+    }
+
+    public Builder setRangeMap(Map<String, IntRange> rangeMap) {
+      _rangeMap = rangeMap;
+      return this;
+    }
+
+    public Builder setPatternMap(Map<String, Map<String, Object>> patternMap) {
+      _patternMap = patternMap;
+      return this;
+    }
+
+    public Builder setMvCountMap(Map<String, Double> mvCountMap) {
+      _mvCountMap = mvCountMap;
+      return this;
+    }
+
+    public Builder setLengthMap(Map<String, Integer> lengthMap) {
+      _lengthMap = lengthMap;
+      return this;
+    }
+
+    public Builder setDataTypeMap(Map<String, DataType> dataTypeMap) {
+      _dataTypeMap = dataTypeMap;
+      return this;
+    }
+
+    public Builder setFieldTypeMap(Map<String, FieldType> fieldTypeMap) {
+      _fieldTypeMap = fieldTypeMap;
+      return this;
+    }
+
+    public Builder setTimeUnitMap(Map<String, TimeUnit> timeUnitMap) {
+      _timeUnitMap = timeUnitMap;
+      return this;
+    }
+
+    public Builder setDateTimeFormatMap(Map<String, String> dateTimeFormatMap) {
+      _dateTimeFormatMap = dateTimeFormatMap;
+      return this;
+    }
+
+    public Builder setDateTimeGranularityMap(Map<String, String> dateTimeGranularityMap) {
+      _dateTimeGranularityMap = dateTimeGranularityMap;
+      return this;
+    }
+  }
 }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DateTimeGenerator.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DateTimeGenerator.java
new file mode 100644
index 0000000000..b83f95ba3b
--- /dev/null
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DateTimeGenerator.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.pinot.controller.recommender.data.generator;
+
+import java.util.Date;
+import java.util.Random;
+import org.apache.pinot.spi.data.DateTimeFormatSpec;
+import org.apache.pinot.spi.data.DateTimeGranularitySpec;
+
+
+public class DateTimeGenerator implements Generator {
+
+  private static final int MULTIPLIER_CARDINALITY = 5;
+  private final DateTimeFormatSpec _formatSpec;
+  private final DateTimeGranularitySpec _granularitySpec;
+  private long _currentValue;
+  private Random _multiplier = new Random();
+
+  public DateTimeGenerator(String format, String granularity) {
+    _formatSpec = new DateTimeFormatSpec(format);
+    _granularitySpec = new DateTimeGranularitySpec(granularity);
+  }
+
+  @Override
+  public void init() {
+    _currentValue = new Date().getTime();
+  }
+
+  @Override
+  public Object next() {
+    _currentValue += _granularitySpec.granularityToMillis() * _multiplier.nextInt(MULTIPLIER_CARDINALITY);
+    return _formatSpec.fromMillisToFormat(_currentValue);
+  }
+}
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java
index 3c19db74b6..934b197560 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java
@@ -526,9 +526,15 @@ public class MemoryEstimator {
 
       // generate data
       String outputDir = new File(_workingDir, "csv").getAbsolutePath();
-      DataGeneratorSpec spec =
-          new DataGeneratorSpec(colNames, cardinalities, new HashMap<>(), new HashMap<>(), mvCounts, lengths, dataTypes,
-              fieldTypes, timeUnits);
+      DataGeneratorSpec spec = new DataGeneratorSpec.Builder()
+          .setColumns(colNames)
+          .setCardinalityMap(cardinalities)
+          .setMvCountMap(mvCounts)
+          .setLengthMap(lengths)
+          .setDataTypeMap(dataTypes)
+          .setFieldTypeMap(fieldTypes)
+          .setTimeUnitMap(timeUnits)
+          .build();
       DataGenerator dataGenerator = new DataGenerator();
       try {
         dataGenerator.init(spec);
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java
index 9a98f652fb..1c06e95853 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java
@@ -20,10 +20,7 @@ package org.apache.pinot.tools.admin.command;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.lang.math.IntRange;
 import org.apache.pinot.controller.recommender.data.DataGenerationHelpers;
@@ -31,7 +28,6 @@ import org.apache.pinot.controller.recommender.data.generator.DataGenerator;
 import org.apache.pinot.controller.recommender.data.generator.DataGeneratorSpec;
 import org.apache.pinot.controller.recommender.data.generator.SchemaAnnotation;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
-import org.apache.pinot.spi.data.FieldSpec.FieldType;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.Schema.SchemaBuilder;
 import org.apache.pinot.spi.data.TimeGranularitySpec;
@@ -124,23 +120,10 @@ public class GenerateDataCommand extends AbstractBaseAdminCommand implements Com
     }
 
     Schema schema = Schema.fromFile(new File(_schemaFile));
-
-    List<String> columns = new LinkedList<>();
-    final HashMap<String, DataType> dataTypes = new HashMap<>();
-    final HashMap<String, FieldType> fieldTypes = new HashMap<>();
-    final HashMap<String, TimeUnit> timeUnits = new HashMap<>();
-
-    final HashMap<String, Integer> cardinality = new HashMap<>();
-    final HashMap<String, IntRange> range = new HashMap<>();
-    final HashMap<String, Map<String, Object>> pattern = new HashMap<>();
-    final HashMap<String, Double> mvCountMap = new HashMap<>();
-    final HashMap<String, Integer> lengthMap = new HashMap<>();
-
-    buildCardinalityRangeMaps(_schemaAnnFile, cardinality, range, pattern);
-
     final DataGeneratorSpec spec =
-        DataGenerationHelpers.buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, timeUnits, cardinality,
-            range, pattern, mvCountMap, lengthMap);
+        DataGenerationHelpers.buildDataGeneratorSpec(schema);
+    buildCardinalityRangeMaps(_schemaAnnFile, spec);
+
 
     final DataGenerator gen = new DataGenerator();
     gen.init(spec);
@@ -158,8 +141,7 @@ public class GenerateDataCommand extends AbstractBaseAdminCommand implements Com
     return true;
   }
 
-  private void buildCardinalityRangeMaps(String file, HashMap<String, Integer> cardinality,
-      HashMap<String, IntRange> range, Map<String, Map<String, Object>> pattern)
+  private void buildCardinalityRangeMaps(String file, DataGeneratorSpec spec)
       throws IOException {
     if (file == null) {
       return; // Nothing to do here.
@@ -171,11 +153,11 @@ public class GenerateDataCommand extends AbstractBaseAdminCommand implements Com
       String column = sa.getColumn();
 
       if (sa.isRange()) {
-        range.put(column, new IntRange(sa.getRangeStart(), sa.getRangeEnd()));
+        spec.getRangeMap().put(column, new IntRange(sa.getRangeStart(), sa.getRangeEnd()));
       } else if (sa.getPattern() != null) {
-        pattern.put(column, sa.getPattern());
+        spec.getPatternMap().put(column, sa.getPattern());
       } else {
-        cardinality.put(column, sa.getCardinality());
+        spec.getCardinalityMap().put(column, sa.getCardinality());
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org