You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2020/05/12 00:55:28 UTC

[incubator-pinot] 01/01: Revert "Adding template support for Pinot Ingestion Job Spec (#5341)"

This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch revert_PR
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 627de88cc2c07f5f5de10f53e04c346d4a1646cb
Author: Xiang Fu <fx...@gmail.com>
AuthorDate: Mon May 11 17:54:49 2020 -0700

    Revert "Adding template support for Pinot Ingestion Job Spec (#5341)"
    
    This reverts commit c0cdb7218166f1773274c0126f21dd4494a31f07.
---
 pinot-spi/pom.xml                                  |  4 -
 .../spi/ingestion/batch/IngestionJobLauncher.java  | 29 ++------
 .../apache/pinot/spi/utils/JinjaTemplateUtils.java | 75 -------------------
 .../ingestion/batch/IngestionJobLauncherTest.java  | 42 -----------
 .../pinot/spi/utils/JinjaTemplateUtilsTest.java    | 86 ----------------------
 .../test/resources/ingestionJobSpecTemplate.yaml   | 45 -----------
 .../command/LaunchDataIngestionJobCommand.java     | 16 +---
 pom.xml                                            |  6 --
 8 files changed, 10 insertions(+), 293 deletions(-)

diff --git a/pinot-spi/pom.xml b/pinot-spi/pom.xml
index b1a4b8b..8ad0aec 100644
--- a/pinot-spi/pom.xml
+++ b/pinot-spi/pom.xml
@@ -100,10 +100,6 @@
       <artifactId>jsr305</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.hubspot.jinjava</groupId>
-      <artifactId>jinjava</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-slf4j-impl</artifactId>
     </dependency>
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java
index adbd30f..9bb740a 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java
@@ -20,18 +20,13 @@ package org.apache.pinot.spi.ingestion.batch;
 
 import java.io.BufferedReader;
 import java.io.FileReader;
-import java.io.IOException;
+import java.io.Reader;
 import java.io.StringWriter;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import org.apache.commons.io.IOUtils;
 import org.apache.pinot.spi.ingestion.batch.runner.IngestionJobRunner;
 import org.apache.pinot.spi.ingestion.batch.spec.ExecutionFrameworkSpec;
 import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
 import org.apache.pinot.spi.plugin.PluginManager;
-import org.apache.pinot.spi.utils.JinjaTemplateUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.yaml.snakeyaml.Yaml;
@@ -41,7 +36,7 @@ public class IngestionJobLauncher {
 
   public static final Logger LOGGER = LoggerFactory.getLogger(IngestionJobLauncher.class);
 
-  private static final String USAGE = "usage: [jobSpec.yaml] [template_key=template_value]...";
+  private static final String USAGE = "usage: [jobSpec.yaml]";
 
   private static void usage() {
     System.err.println(USAGE);
@@ -49,26 +44,16 @@ public class IngestionJobLauncher {
 
   public static void main(String[] args)
       throws Exception {
-    if (args.length < 1) {
+    if (args.length != 1) {
       usage();
       System.exit(1);
     }
     String jobSpecFilePath = args[0];
-    List<String> valueList = new ArrayList<>();
-    for (int i = 1; i < args.length; i++) {
-      valueList.add(args[i]);
-    }
-    SegmentGenerationJobSpec spec =
-        getSegmentGenerationJobSpec(jobSpecFilePath, JinjaTemplateUtils.getTemplateContext(valueList));
-    runIngestionJob(spec);
-  }
 
-  public static SegmentGenerationJobSpec getSegmentGenerationJobSpec(String jobSpecFilePath,
-      Map<String, Object> context)
-      throws IOException {
-    String yamlTemplate = IOUtils.toString(new BufferedReader(new FileReader(jobSpecFilePath)));
-    String yamlStr = JinjaTemplateUtils.renderTemplate(yamlTemplate, context);
-    return new Yaml().loadAs(yamlStr, SegmentGenerationJobSpec.class);
+    try (Reader reader = new BufferedReader(new FileReader(jobSpecFilePath))) {
+      SegmentGenerationJobSpec spec = new Yaml().loadAs(reader, SegmentGenerationJobSpec.class);
+      runIngestionJob(spec);
+    }
   }
 
   public static void runIngestionJob(SegmentGenerationJobSpec spec)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JinjaTemplateUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JinjaTemplateUtils.java
deleted file mode 100644
index 5d6e15b..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JinjaTemplateUtils.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.utils;
-
-import com.hubspot.jinjava.Jinjava;
-import java.text.SimpleDateFormat;
-import java.time.Instant;
-import java.time.temporal.ChronoUnit;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-
-public class JinjaTemplateUtils {
-
-  private static final Jinjava JINJAVA = new Jinjava();
-  private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
-
-  public static String renderTemplate(String template, Map<String, Object> newContext) {
-    Map<String, Object> contextMap = getDefaultJinjaContextMap();
-    contextMap.putAll(newContext);
-    return JINJAVA.render(template, contextMap);
-  }
-
-  /**
-   Construct default template context:
-   today : today's date in format `yyyy-MM-dd`, example value: '2020-05-06'
-   yesterday : yesterday's date in format `yyyy-MM-dd`, example value: '2020-05-06'
-   */
-  public static Map<String, Object> getDefaultJinjaContextMap() {
-    Map<String, Object> defaultJinjaContextMap = new HashMap<>();
-    Instant now = Instant.now();
-    defaultJinjaContextMap.put("today", DATE_FORMAT.format(new Date(now.toEpochMilli())));
-    defaultJinjaContextMap.put("yesterday", DATE_FORMAT.format(new Date(now.minus(1, ChronoUnit.DAYS).toEpochMilli())));
-    return defaultJinjaContextMap;
-  }
-
-  public static Map<String, Object> getTemplateContext(List<String> values) {
-    Map<String, Object> context = new HashMap<>();
-    for (String value : values) {
-      String[] splits = value.split("=", 2);
-      if (splits.length > 1) {
-        context.put(splits[0], splits[1]);
-      }
-    }
-    return context;
-  }
-
-  public static String renderTemplate(String template) {
-    return renderTemplate(template, Collections.emptyMap());
-  }
-
-  static {
-    DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
-  }
-}
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java
deleted file mode 100644
index 7d54f0f..0000000
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.ingestion.batch;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
-import org.apache.pinot.spi.utils.JinjaTemplateUtils;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-
-public class IngestionJobLauncherTest {
-
-  @Test
-  public void testIngestionJobLauncherWithTemplate()
-      throws IOException {
-    Map<String, Object> context =
-        JinjaTemplateUtils.getTemplateContext(Arrays.asList("year=2020", "month=05", "day=06"));
-    SegmentGenerationJobSpec spec = IngestionJobLauncher.getSegmentGenerationJobSpec(
-        JinjaTemplateUtils.class.getClassLoader().getResource("ingestionJobSpecTemplate.yaml").getFile(), context);
-    Assert.assertEquals(spec.getInputDirURI(), "file:///path/to/input/2020/05/06");
-    Assert.assertEquals(spec.getOutputDirURI(), "file:///path/to/output/2020/05/06");
-  }
-}
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JinjaTemplateUtilsTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JinjaTemplateUtilsTest.java
deleted file mode 100644
index 768ab6f..0000000
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JinjaTemplateUtilsTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.utils;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.SimpleDateFormat;
-import java.time.Instant;
-import java.time.temporal.ChronoUnit;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-import org.apache.commons.io.IOUtils;
-import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-import org.yaml.snakeyaml.Yaml;
-
-
-public class JinjaTemplateUtilsTest {
-
-  @Test
-  public void testDefaultRenderTemplate() {
-    Date today = new Date(Instant.now().toEpochMilli());
-    Date yesterday = new Date(Instant.now().minus(1, ChronoUnit.DAYS).toEpochMilli());
-    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ today }}"), dateFormat.format(today));
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ yesterday }}"), dateFormat.format(yesterday));
-  }
-
-  @Test
-  public void testRenderTemplateWithGivenContextMap() {
-    Map<String, Object> contextMap = new HashMap<>();
-    contextMap.put("first_date_2020", "2020-01-01");
-    contextMap.put("name", "xiang");
-    contextMap.put("ts", 1577836800);
-    contextMap.put("yyyy", "2020");
-    contextMap.put("YYYY", "1919");
-    contextMap.put("MM", "05");
-    contextMap.put("dd", "06");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ first_date_2020 }}", contextMap), "2020-01-01");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{first_date_2020}}", contextMap), "2020-01-01");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ name }}", contextMap), "xiang");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ name|upper }}", contextMap), "XIANG");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ ts }}", contextMap), "1577836800");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("/var/rawdata/{{ yyyy }}/{{ MM }}/{{ dd }}", contextMap),
-        "/var/rawdata/2020/05/06");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("/var/rawdata/{{yyyy}}/{{MM}}/{{dd}}", contextMap),
-        "/var/rawdata/2020/05/06");
-    Assert.assertEquals(JinjaTemplateUtils.renderTemplate("/var/rawdata/{{YYYY}}/{{MM}}/{{dd}}", contextMap),
-        "/var/rawdata/1919/05/06");
-  }
-
-  @Test
-  public void testIngestionJobTemplate()
-      throws IOException {
-    InputStream resourceAsStream =
-        JinjaTemplateUtils.class.getClassLoader().getResourceAsStream("ingestionJobSpecTemplate.yaml");
-    String yamlTemplate = IOUtils.toString(resourceAsStream);
-    Map<String, Object> context =
-        JinjaTemplateUtils.getTemplateContext(Arrays.asList("year=2020", "month=05", "day=06"));
-    String yamlStr = JinjaTemplateUtils.renderTemplate(yamlTemplate, context);
-    SegmentGenerationJobSpec spec = new Yaml().loadAs(yamlStr, SegmentGenerationJobSpec.class);
-    Assert.assertEquals(spec.getInputDirURI(), "file:///path/to/input/2020/05/06");
-    Assert.assertEquals(spec.getOutputDirURI(), "file:///path/to/output/2020/05/06");
-  }
-}
diff --git a/pinot-spi/src/test/resources/ingestionJobSpecTemplate.yaml b/pinot-spi/src/test/resources/ingestionJobSpecTemplate.yaml
deleted file mode 100644
index 5032e17..0000000
--- a/pinot-spi/src/test/resources/ingestionJobSpecTemplate.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-executionFrameworkSpec:
-  name: 'standalone'
-  segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
-  segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
-  segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
-jobType: SegmentCreationAndTarPush
-inputDirURI: 'file:///path/to/input/{{ year }}/{{ month }}/{{ day }}'
-includeFileNamePattern: 'glob:**/*.parquet'
-excludeFileNamePattern: 'glob:**/*.avro'
-outputDirURI: 'file:///path/to/output/{{year}}/{{month}}/{{day}}'
-overwriteOutput: true
-pinotFSSpecs:
-  - scheme: file
-    className: org.apache.pinot.spi.filesystem.LocalPinotFS
-recordReaderSpec:
-  dataFormat: 'parquet'
-  className: 'org.apache.pinot.parquet.data.readers.ParquetRecordReader'
-tableSpec:
-  tableName: 'myTable'
-  schemaURI: 'http://localhost:9000/tables/myTable/schema'
-  tableConfigURI: 'http://localhost:9000/tables/myTable'
-pinotClusterSpecs:
-  - controllerURI: 'localhost:9000'
-pushJobSpec:
-  pushAttempts: 2
-  pushRetryIntervalMillis: 1000
\ No newline at end of file
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java
index bd37ebb..49c31b2 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java
@@ -18,13 +18,9 @@
  */
 package org.apache.pinot.tools.admin.command;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
 import org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher;
 import org.apache.pinot.tools.Command;
 import org.kohsuke.args4j.Option;
-import org.kohsuke.args4j.spi.StringArrayOptionHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -39,9 +35,6 @@ public class LaunchDataIngestionJobCommand extends AbstractBaseAdminCommand impl
   @Option(name = "-jobSpecFile", required = true, metaVar = "<string>", usage = "Ingestion job spec file")
   private String _jobSpecFile;
 
-  @Option(name = "-values", required = false, metaVar = "<template context>", handler = StringArrayOptionHandler.class, usage = "Context values set to the job spec template")
-  private List<String> _values;
-
   @Option(name = "-help", required = false, help = true, aliases = {"-h", "--h", "--help"}, usage = "Print this message.")
   private boolean _help = false;
 
@@ -54,12 +47,9 @@ public class LaunchDataIngestionJobCommand extends AbstractBaseAdminCommand impl
   public boolean execute()
       throws Exception {
     try {
-      List<String> arguments = new ArrayList();
-      arguments.add(_jobSpecFile);
-      arguments.addAll(_values);
-      IngestionJobLauncher.main(arguments.toArray(new String[0]));
+      IngestionJobLauncher.main(new String[]{_jobSpecFile});
     } catch (Exception e) {
-      LOGGER.error("Got exception to kick off standalone data ingestion job - ", e);
+      LOGGER.error("Got exception to kick off standalone data ingestion job -", e);
       throw e;
     }
     return true;
@@ -72,7 +62,7 @@ public class LaunchDataIngestionJobCommand extends AbstractBaseAdminCommand impl
 
   @Override
   public String toString() {
-    return ("LaunchDataIngestionJob -jobSpecFile " + _jobSpecFile + " -values " + Arrays.toString(_values.toArray()));
+    return ("LaunchDataIngestionJob -jobSpecFile " + _jobSpecFile);
   }
 
   @Override
diff --git a/pom.xml b/pom.xml
index f7d7f11..55c8b35 100644
--- a/pom.xml
+++ b/pom.xml
@@ -135,7 +135,6 @@
     <scala.binary.version>2.11</scala.binary.version>
     <scala.version>2.11.11</scala.version>
     <antlr.version>4.6</antlr.version>
-    <jinjava.version>2.5.3</jinjava.version>
     <calcite.version>1.19.0</calcite.version>
     <lucene.version>8.2.0</lucene.version>
     <!-- commons-configuration, hadoop-common, hadoop-client use commons-lang -->
@@ -963,11 +962,6 @@
         </exclusions>
       </dependency>
       <dependency>
-        <groupId>com.hubspot.jinjava</groupId>
-        <artifactId>jinjava</artifactId>
-        <version>${jinjava.version}</version>
-      </dependency>
-      <dependency>
         <groupId>org.apache.calcite</groupId>
         <artifactId>calcite-babel</artifactId>
         <version>${calcite.version}</version>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org