You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2020/05/12 00:55:28 UTC
[incubator-pinot] 01/01: Revert "Adding template support for Pinot
Ingestion Job Spec (#5341)"
This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch revert_PR
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit 627de88cc2c07f5f5de10f53e04c346d4a1646cb
Author: Xiang Fu <fx...@gmail.com>
AuthorDate: Mon May 11 17:54:49 2020 -0700
Revert "Adding template support for Pinot Ingestion Job Spec (#5341)"
This reverts commit c0cdb7218166f1773274c0126f21dd4494a31f07.
---
pinot-spi/pom.xml | 4 -
.../spi/ingestion/batch/IngestionJobLauncher.java | 29 ++------
.../apache/pinot/spi/utils/JinjaTemplateUtils.java | 75 -------------------
.../ingestion/batch/IngestionJobLauncherTest.java | 42 -----------
.../pinot/spi/utils/JinjaTemplateUtilsTest.java | 86 ----------------------
.../test/resources/ingestionJobSpecTemplate.yaml | 45 -----------
.../command/LaunchDataIngestionJobCommand.java | 16 +---
pom.xml | 6 --
8 files changed, 10 insertions(+), 293 deletions(-)
diff --git a/pinot-spi/pom.xml b/pinot-spi/pom.xml
index b1a4b8b..8ad0aec 100644
--- a/pinot-spi/pom.xml
+++ b/pinot-spi/pom.xml
@@ -100,10 +100,6 @@
<artifactId>jsr305</artifactId>
</dependency>
<dependency>
- <groupId>com.hubspot.jinjava</groupId>
- <artifactId>jinjava</artifactId>
- </dependency>
- <dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</dependency>
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java
index adbd30f..9bb740a 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncher.java
@@ -20,18 +20,13 @@ package org.apache.pinot.spi.ingestion.batch;
import java.io.BufferedReader;
import java.io.FileReader;
-import java.io.IOException;
+import java.io.Reader;
import java.io.StringWriter;
-import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import org.apache.commons.io.IOUtils;
import org.apache.pinot.spi.ingestion.batch.runner.IngestionJobRunner;
import org.apache.pinot.spi.ingestion.batch.spec.ExecutionFrameworkSpec;
import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
import org.apache.pinot.spi.plugin.PluginManager;
-import org.apache.pinot.spi.utils.JinjaTemplateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;
@@ -41,7 +36,7 @@ public class IngestionJobLauncher {
public static final Logger LOGGER = LoggerFactory.getLogger(IngestionJobLauncher.class);
- private static final String USAGE = "usage: [jobSpec.yaml] [template_key=template_value]...";
+ private static final String USAGE = "usage: [jobSpec.yaml]";
private static void usage() {
System.err.println(USAGE);
@@ -49,26 +44,16 @@ public class IngestionJobLauncher {
public static void main(String[] args)
throws Exception {
- if (args.length < 1) {
+ if (args.length != 1) {
usage();
System.exit(1);
}
String jobSpecFilePath = args[0];
- List<String> valueList = new ArrayList<>();
- for (int i = 1; i < args.length; i++) {
- valueList.add(args[i]);
- }
- SegmentGenerationJobSpec spec =
- getSegmentGenerationJobSpec(jobSpecFilePath, JinjaTemplateUtils.getTemplateContext(valueList));
- runIngestionJob(spec);
- }
- public static SegmentGenerationJobSpec getSegmentGenerationJobSpec(String jobSpecFilePath,
- Map<String, Object> context)
- throws IOException {
- String yamlTemplate = IOUtils.toString(new BufferedReader(new FileReader(jobSpecFilePath)));
- String yamlStr = JinjaTemplateUtils.renderTemplate(yamlTemplate, context);
- return new Yaml().loadAs(yamlStr, SegmentGenerationJobSpec.class);
+ try (Reader reader = new BufferedReader(new FileReader(jobSpecFilePath))) {
+ SegmentGenerationJobSpec spec = new Yaml().loadAs(reader, SegmentGenerationJobSpec.class);
+ runIngestionJob(spec);
+ }
}
public static void runIngestionJob(SegmentGenerationJobSpec spec)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JinjaTemplateUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JinjaTemplateUtils.java
deleted file mode 100644
index 5d6e15b..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JinjaTemplateUtils.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.utils;
-
-import com.hubspot.jinjava.Jinjava;
-import java.text.SimpleDateFormat;
-import java.time.Instant;
-import java.time.temporal.ChronoUnit;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-
-public class JinjaTemplateUtils {
-
- private static final Jinjava JINJAVA = new Jinjava();
- private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
-
- public static String renderTemplate(String template, Map<String, Object> newContext) {
- Map<String, Object> contextMap = getDefaultJinjaContextMap();
- contextMap.putAll(newContext);
- return JINJAVA.render(template, contextMap);
- }
-
- /**
- Construct default template context:
- today : today's date in format `yyyy-MM-dd`, example value: '2020-05-06'
- yesterday : yesterday's date in format `yyyy-MM-dd`, example value: '2020-05-06'
- */
- public static Map<String, Object> getDefaultJinjaContextMap() {
- Map<String, Object> defaultJinjaContextMap = new HashMap<>();
- Instant now = Instant.now();
- defaultJinjaContextMap.put("today", DATE_FORMAT.format(new Date(now.toEpochMilli())));
- defaultJinjaContextMap.put("yesterday", DATE_FORMAT.format(new Date(now.minus(1, ChronoUnit.DAYS).toEpochMilli())));
- return defaultJinjaContextMap;
- }
-
- public static Map<String, Object> getTemplateContext(List<String> values) {
- Map<String, Object> context = new HashMap<>();
- for (String value : values) {
- String[] splits = value.split("=", 2);
- if (splits.length > 1) {
- context.put(splits[0], splits[1]);
- }
- }
- return context;
- }
-
- public static String renderTemplate(String template) {
- return renderTemplate(template, Collections.emptyMap());
- }
-
- static {
- DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-}
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java
deleted file mode 100644
index 7d54f0f..0000000
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.ingestion.batch;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
-import org.apache.pinot.spi.utils.JinjaTemplateUtils;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-
-public class IngestionJobLauncherTest {
-
- @Test
- public void testIngestionJobLauncherWithTemplate()
- throws IOException {
- Map<String, Object> context =
- JinjaTemplateUtils.getTemplateContext(Arrays.asList("year=2020", "month=05", "day=06"));
- SegmentGenerationJobSpec spec = IngestionJobLauncher.getSegmentGenerationJobSpec(
- JinjaTemplateUtils.class.getClassLoader().getResource("ingestionJobSpecTemplate.yaml").getFile(), context);
- Assert.assertEquals(spec.getInputDirURI(), "file:///path/to/input/2020/05/06");
- Assert.assertEquals(spec.getOutputDirURI(), "file:///path/to/output/2020/05/06");
- }
-}
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JinjaTemplateUtilsTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JinjaTemplateUtilsTest.java
deleted file mode 100644
index 768ab6f..0000000
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JinjaTemplateUtilsTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.utils;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.SimpleDateFormat;
-import java.time.Instant;
-import java.time.temporal.ChronoUnit;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-import org.apache.commons.io.IOUtils;
-import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-import org.yaml.snakeyaml.Yaml;
-
-
-public class JinjaTemplateUtilsTest {
-
- @Test
- public void testDefaultRenderTemplate() {
- Date today = new Date(Instant.now().toEpochMilli());
- Date yesterday = new Date(Instant.now().minus(1, ChronoUnit.DAYS).toEpochMilli());
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ today }}"), dateFormat.format(today));
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ yesterday }}"), dateFormat.format(yesterday));
- }
-
- @Test
- public void testRenderTemplateWithGivenContextMap() {
- Map<String, Object> contextMap = new HashMap<>();
- contextMap.put("first_date_2020", "2020-01-01");
- contextMap.put("name", "xiang");
- contextMap.put("ts", 1577836800);
- contextMap.put("yyyy", "2020");
- contextMap.put("YYYY", "1919");
- contextMap.put("MM", "05");
- contextMap.put("dd", "06");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ first_date_2020 }}", contextMap), "2020-01-01");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{first_date_2020}}", contextMap), "2020-01-01");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ name }}", contextMap), "xiang");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ name|upper }}", contextMap), "XIANG");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("{{ ts }}", contextMap), "1577836800");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("/var/rawdata/{{ yyyy }}/{{ MM }}/{{ dd }}", contextMap),
- "/var/rawdata/2020/05/06");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("/var/rawdata/{{yyyy}}/{{MM}}/{{dd}}", contextMap),
- "/var/rawdata/2020/05/06");
- Assert.assertEquals(JinjaTemplateUtils.renderTemplate("/var/rawdata/{{YYYY}}/{{MM}}/{{dd}}", contextMap),
- "/var/rawdata/1919/05/06");
- }
-
- @Test
- public void testIngestionJobTemplate()
- throws IOException {
- InputStream resourceAsStream =
- JinjaTemplateUtils.class.getClassLoader().getResourceAsStream("ingestionJobSpecTemplate.yaml");
- String yamlTemplate = IOUtils.toString(resourceAsStream);
- Map<String, Object> context =
- JinjaTemplateUtils.getTemplateContext(Arrays.asList("year=2020", "month=05", "day=06"));
- String yamlStr = JinjaTemplateUtils.renderTemplate(yamlTemplate, context);
- SegmentGenerationJobSpec spec = new Yaml().loadAs(yamlStr, SegmentGenerationJobSpec.class);
- Assert.assertEquals(spec.getInputDirURI(), "file:///path/to/input/2020/05/06");
- Assert.assertEquals(spec.getOutputDirURI(), "file:///path/to/output/2020/05/06");
- }
-}
diff --git a/pinot-spi/src/test/resources/ingestionJobSpecTemplate.yaml b/pinot-spi/src/test/resources/ingestionJobSpecTemplate.yaml
deleted file mode 100644
index 5032e17..0000000
--- a/pinot-spi/src/test/resources/ingestionJobSpecTemplate.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-executionFrameworkSpec:
- name: 'standalone'
- segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
- segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
- segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
-jobType: SegmentCreationAndTarPush
-inputDirURI: 'file:///path/to/input/{{ year }}/{{ month }}/{{ day }}'
-includeFileNamePattern: 'glob:**/*.parquet'
-excludeFileNamePattern: 'glob:**/*.avro'
-outputDirURI: 'file:///path/to/output/{{year}}/{{month}}/{{day}}'
-overwriteOutput: true
-pinotFSSpecs:
- - scheme: file
- className: org.apache.pinot.spi.filesystem.LocalPinotFS
-recordReaderSpec:
- dataFormat: 'parquet'
- className: 'org.apache.pinot.parquet.data.readers.ParquetRecordReader'
-tableSpec:
- tableName: 'myTable'
- schemaURI: 'http://localhost:9000/tables/myTable/schema'
- tableConfigURI: 'http://localhost:9000/tables/myTable'
-pinotClusterSpecs:
- - controllerURI: 'localhost:9000'
-pushJobSpec:
- pushAttempts: 2
- pushRetryIntervalMillis: 1000
\ No newline at end of file
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java
index bd37ebb..49c31b2 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/LaunchDataIngestionJobCommand.java
@@ -18,13 +18,9 @@
*/
package org.apache.pinot.tools.admin.command;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
import org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher;
import org.apache.pinot.tools.Command;
import org.kohsuke.args4j.Option;
-import org.kohsuke.args4j.spi.StringArrayOptionHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -39,9 +35,6 @@ public class LaunchDataIngestionJobCommand extends AbstractBaseAdminCommand impl
@Option(name = "-jobSpecFile", required = true, metaVar = "<string>", usage = "Ingestion job spec file")
private String _jobSpecFile;
- @Option(name = "-values", required = false, metaVar = "<template context>", handler = StringArrayOptionHandler.class, usage = "Context values set to the job spec template")
- private List<String> _values;
-
@Option(name = "-help", required = false, help = true, aliases = {"-h", "--h", "--help"}, usage = "Print this message.")
private boolean _help = false;
@@ -54,12 +47,9 @@ public class LaunchDataIngestionJobCommand extends AbstractBaseAdminCommand impl
public boolean execute()
throws Exception {
try {
- List<String> arguments = new ArrayList();
- arguments.add(_jobSpecFile);
- arguments.addAll(_values);
- IngestionJobLauncher.main(arguments.toArray(new String[0]));
+ IngestionJobLauncher.main(new String[]{_jobSpecFile});
} catch (Exception e) {
- LOGGER.error("Got exception to kick off standalone data ingestion job - ", e);
+ LOGGER.error("Got exception to kick off standalone data ingestion job -", e);
throw e;
}
return true;
@@ -72,7 +62,7 @@ public class LaunchDataIngestionJobCommand extends AbstractBaseAdminCommand impl
@Override
public String toString() {
- return ("LaunchDataIngestionJob -jobSpecFile " + _jobSpecFile + " -values " + Arrays.toString(_values.toArray()));
+ return ("LaunchDataIngestionJob -jobSpecFile " + _jobSpecFile);
}
@Override
diff --git a/pom.xml b/pom.xml
index f7d7f11..55c8b35 100644
--- a/pom.xml
+++ b/pom.xml
@@ -135,7 +135,6 @@
<scala.binary.version>2.11</scala.binary.version>
<scala.version>2.11.11</scala.version>
<antlr.version>4.6</antlr.version>
- <jinjava.version>2.5.3</jinjava.version>
<calcite.version>1.19.0</calcite.version>
<lucene.version>8.2.0</lucene.version>
<!-- commons-configuration, hadoop-common, hadoop-client use commons-lang -->
@@ -963,11 +962,6 @@
</exclusions>
</dependency>
<dependency>
- <groupId>com.hubspot.jinjava</groupId>
- <artifactId>jinjava</artifactId>
- <version>${jinjava.version}</version>
- </dependency>
- <dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-babel</artifactId>
<version>${calcite.version}</version>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org