You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@gobblin.apache.org by "vikrambohra (via GitHub)" <gi...@apache.org> on 2023/02/10 20:17:56 UTC

[GitHub] [gobblin] vikrambohra commented on a diff in pull request #3632: [GOBBLIN-1774] Util for detecting non optional uniontype columns based on Hive Table metadata

vikrambohra commented on code in PR #3632:
URL: https://github.com/apache/gobblin/pull/3632#discussion_r1103195419


##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit unit) {
     return si;
   }
 
+  public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+    return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+  }
+
+  /**
+   * Util for detecting if a hive table has a non-optional union (aka complex unions) column types. A non optional
+   * union is defined as a uniontype with n >= 2 non-null subtypes
+   *
+   * @param hiveTable Hive table
+   * @return if hive table contains non-optional uniontype columns
+   */
+  public static boolean containsNonOptionalUnionTypeColumn(HiveTable hiveTable) {
+    if (hiveTable.getProps().contains("avro.schema.literal")) {

Review Comment:
   use AvroSerdeUtils.SCHEMA_LITERAL



##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit unit) {
     return si;
   }
 
+  public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+    return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+  }
+
+  /**
+   * Util for detecting if a hive table has a non-optional union (aka complex unions) column types. A non optional
+   * union is defined as a uniontype with n >= 2 non-null subtypes
+   *
+   * @param hiveTable Hive table
+   * @return if hive table contains non-optional uniontype columns
+   */
+  public static boolean containsNonOptionalUnionTypeColumn(HiveTable hiveTable) {
+    if (hiveTable.getProps().contains("avro.schema.literal")) {
+      Schema.Parser parser = new Schema.Parser();
+      Schema schema = parser.parse(hiveTable.getProps().getProp("avro.schema.literal"));

Review Comment:
   use AvroSerdeUtils.SCHEMA_LITERAL



##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit unit) {
     return si;
   }
 
+  public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+    return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+  }
+
+  /**
+   * Util for detecting if a hive table has a non-optional union (aka complex unions) column types. A non optional
+   * union is defined as a uniontype with n >= 2 non-null subtypes
+   *
+   * @param hiveTable Hive table
+   * @return if hive table contains non-optional uniontype columns
+   */
+  public static boolean containsNonOptionalUnionTypeColumn(HiveTable hiveTable) {
+    if (hiveTable.getProps().contains("avro.schema.literal")) {
+      Schema.Parser parser = new Schema.Parser();
+      Schema schema = parser.parse(hiveTable.getProps().getProp("avro.schema.literal"));
+      return isNonOptionalUnion(schema);
+    }
+
+    if (isNonAvroFormat(hiveTable)) {
+      return hiveTable.getColumns().stream()
+          .map(HiveRegistrationUnit.Column::getType)
+          .filter(type -> type.contains("uniontype"))
+          .map(type -> TypeDescription.fromString(type))
+          .anyMatch(type -> isNonOptionalUnion(type));
+    }
+
+    throw new RuntimeException("Avro based Hive tables without \"avro.schema.literal\" are not supported");
+  }
+
+  private static boolean isNonOptionalUnion(Schema schema) {
+    switch (schema.getType()) {
+      case UNION:
+        Stream<Schema.Type> nonNullSubTypes = schema.getTypes().stream()
+            .map(Schema::getType).filter(t -> !t.equals(Schema.Type.NULL));
+        if (nonNullSubTypes.count() >= 2)  {
+          return true;
+        }
+        return schema.getTypes().stream().anyMatch(s -> isNonOptionalUnion(s));
+      case MAP: // key is a string and doesn't need to be checked
+        return isNonOptionalUnion(schema.getValueType());
+      case ARRAY:
+        return isNonOptionalUnion(schema.getElementType());
+      case RECORD:
+        return schema.getFields().stream().map(Schema.Field::schema).anyMatch(s -> isNonOptionalUnion(s));
+      default:
+        return false;
+    }
+  }
+
+  private static boolean isNonOptionalUnion(TypeDescription description) {

Review Comment:
   Useful method. can be made public



##########
gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/metastore/HiveMetaStoreUtils.java:
##########
@@ -256,6 +262,71 @@ public static SerDeInfo getSerDeInfo(HiveRegistrationUnit unit) {
     return si;
   }
 
+  public static boolean containsNonOptionalUnionTypeColumn(Table t) {
+    return containsNonOptionalUnionTypeColumn(getHiveTable(t));
+  }
+
+  /**
+   * Util for detecting if a hive table has a non-optional union (aka complex unions) column types. A non optional
+   * union is defined as a uniontype with n >= 2 non-null subtypes
+   *
+   * @param hiveTable Hive table
+   * @return if hive table contains non-optional uniontype columns
+   */
+  public static boolean containsNonOptionalUnionTypeColumn(HiveTable hiveTable) {
+    if (hiveTable.getProps().contains("avro.schema.literal")) {
+      Schema.Parser parser = new Schema.Parser();
+      Schema schema = parser.parse(hiveTable.getProps().getProp("avro.schema.literal"));
+      return isNonOptionalUnion(schema);
+    }
+
+    if (isNonAvroFormat(hiveTable)) {
+      return hiveTable.getColumns().stream()
+          .map(HiveRegistrationUnit.Column::getType)
+          .filter(type -> type.contains("uniontype"))
+          .map(type -> TypeDescription.fromString(type))
+          .anyMatch(type -> isNonOptionalUnion(type));
+    }
+
+    throw new RuntimeException("Avro based Hive tables without \"avro.schema.literal\" are not supported");
+  }
+
+  private static boolean isNonOptionalUnion(Schema schema) {

Review Comment:
   Useful method. can be made public



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@gobblin.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org