You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pinot.apache.org by GitBox <gi...@apache.org> on 2018/11/15 23:50:51 UTC

[GitHub] apucher closed pull request #3491: [TE] dataframe - tolerate complex column names

apucher closed pull request #3491: [TE] dataframe - tolerate complex column names
URL: https://github.com/apache/incubator-pinot/pull/3491
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/thirdeye/thirdeye-pinot/src/main/java/com/linkedin/thirdeye/dataframe/DataFrame.java b/thirdeye/thirdeye-pinot/src/main/java/com/linkedin/thirdeye/dataframe/DataFrame.java
index 432319fdf0..081f6cb96b 100644
--- a/thirdeye/thirdeye-pinot/src/main/java/com/linkedin/thirdeye/dataframe/DataFrame.java
+++ b/thirdeye/thirdeye-pinot/src/main/java/com/linkedin/thirdeye/dataframe/DataFrame.java
@@ -37,6 +37,7 @@
 import java.util.regex.Pattern;
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.lang.StringUtils;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 import org.joda.time.Period;
@@ -91,11 +92,13 @@ public DataFrame build() {
       for(int i=0; i<seriesNames.size(); i++) {
         String rawName = seriesNames.get(i);
 
-        String[] parts = rawName.split(":", 2);
-        if(parts.length == 2) {
+        String[] parts = rawName.split(":");
+        String typeString = parts[parts.length - 1];
+
+        if(parts.length > 1 && getValidTypes().contains(typeString)) {
           // user specified type
-          String name = parts[0];
-          Series.SeriesType type = Series.SeriesType.valueOf(parts[1].toUpperCase());
+          String name = StringUtils.join(Arrays.copyOf(parts, parts.length - 1), ":");
+          Series.SeriesType type = Series.SeriesType.valueOf(typeString);
           Series series = buildSeries(type, i);
           df.addSeries(name, series);
 
@@ -2560,6 +2563,14 @@ private static Series makeGroupByGroupSeries(ResultSet resultSet, int keyIndex)
     return DataFrame.toSeries(values);
   }
 
+  private static Set<String> getValidTypes() {
+    Set<String> values = new HashSet<>();
+    for (Series.SeriesType type : Series.SeriesType.values()) {
+      values.add(type.name());
+    }
+    return values;
+  }
+
   public static class Tuple implements Comparable<Tuple> {
     private final Object[] values;
 
diff --git a/thirdeye/thirdeye-pinot/src/test/java/com/linkedin/thirdeye/dataframe/DataFrameTest.java b/thirdeye/thirdeye-pinot/src/test/java/com/linkedin/thirdeye/dataframe/DataFrameTest.java
index 617a760884..0c96a17abe 100644
--- a/thirdeye/thirdeye-pinot/src/test/java/com/linkedin/thirdeye/dataframe/DataFrameTest.java
+++ b/thirdeye/thirdeye-pinot/src/test/java/com/linkedin/thirdeye/dataframe/DataFrameTest.java
@@ -347,6 +347,20 @@ public void testDataFrameBuilderStaticTyping() {
     assertEquals(df.getObjects("object"), 1, 2, 3, 4);
   }
 
+  @Test
+  public void testDataFrameBuilderStaticTypingMultiple() {
+    DataFrame df = DataFrame.builder("double:string:LONG").append(2.5d).build();
+    Assert.assertTrue(df.contains("double:string"));
+    Assert.assertEquals(df.get("double:string").type(), Series.SeriesType.LONG);
+  }
+
+  @Test
+  public void testDataFrameBuilderStaticTypingUnknown() {
+    DataFrame df = DataFrame.builder("double:1:2:string").append(1.1d).build();
+    Assert.assertTrue(df.contains("double:1:2:string"));
+    Assert.assertEquals(df.get("double:1:2:string").type(), Series.SeriesType.DOUBLE);
+  }
+
   @Test(expectedExceptions = NumberFormatException.class)
   public void testDataFrameBuilderStaticTypingFailDouble() {
     DataFrame.builder("double:DOUBLE").append("true").build();


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pinot.apache.org
For additional commands, e-mail: dev-help@pinot.apache.org