You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/01/09 04:18:01 UTC

[GitHub] [iceberg] smallx commented on a change in pull request #3862: Spark: Supports partition management in V2 Catalog

smallx commented on a change in pull request #3862:
URL: https://github.com/apache/iceberg/pull/3862#discussion_r780732820



##########
File path: spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/sql/TestPartitionedWrites.java
##########
@@ -170,10 +170,25 @@ public void testViewsReturnRecentResults() {
         ImmutableList.of(row(1L, "a")),
         sql("SELECT * FROM tmp"));
 
-    sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName);
+    sql("INSERT INTO TABLE %s VALUES (1, 'a', CURRENT_TIMESTAMP())", tableName);
 
     assertEquals("View should have expected rows",
         ImmutableList.of(row(1L, "a"), row(1L, "a")),
         sql("SELECT * FROM tmp"));
   }
+
+  @Test
+  public void testAddPartition() {
+    sql("ALTER TABLE %s ADD IF NOT EXISTS PARTITION (id_bucket=2, data_trunc='2022', ts_hour='2022-01-08-23')", tableName);
+  }
+
+  @Test
+  public void testDropPartition() {
+    sql("ALTER TABLE %s DROP IF EXISTS PARTITION (id_bucket=2, data_trunc='2022', ts_hour='2022-01-08-23')", tableName);

Review comment:
       Same as above.

##########
File path: spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkTable.java
##########
@@ -313,4 +324,77 @@ private static CaseInsensitiveStringMap addSnapshotId(CaseInsensitiveStringMap o
 
     return options;
   }
+
+  @Override
+  public StructType partitionSchema() {
+    Schema schema = icebergTable.spec().schema();
+    List<PartitionField> fields = icebergTable.spec().fields();
+    List<Types.NestedField> structFields = Lists.newArrayListWithExpectedSize(fields.size());
+    fields.forEach(f -> {
+      Type resultType = Types.StringType.get();
+      Type sourceType = schema.findType(f.sourceId());
+      if (!f.name().endsWith("hour") && !f.name().endsWith("month")) {
+        resultType = f.transform().getResultType(sourceType);
+      }
+      structFields.add(Types.NestedField.optional(f.fieldId(), f.name(), resultType));
+    });
+    return (StructType) SparkSchemaUtil.convert(Types.StructType.of(structFields));
+  }
+
+  @Override
+  public void createPartition(InternalRow ident, Map<String, String> properties) throws UnsupportedOperationException {
+    // use Iceberg SQL extensions
+  }
+
+  @Override
+  public boolean dropPartition(InternalRow ident) {
+    // use Iceberg SQL extensions
+    return false;
+  }
+
+  @Override
+  public void replacePartitionMetadata(InternalRow ident, Map<String, String> properties)
+          throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Iceberg partitions do not support metadata");
+  }
+
+  @Override
+  public Map<String, String> loadPartitionMetadata(InternalRow ident) throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Iceberg partitions do not support metadata");
+  }
+
+  @Override
+  public InternalRow[] listPartitionIdentifiers(String[] names, InternalRow ident) {
+    // support [show partitions] syntax
+    if (!icebergTable.spec().isUnpartitioned()){
+      if (names.length > 0){
+        return new InternalRow[]{ident};
+      }
+      else {

Review comment:
       code style:
   ```java
   } else {
   ```

##########
File path: spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkTable.java
##########
@@ -313,4 +324,77 @@ private static CaseInsensitiveStringMap addSnapshotId(CaseInsensitiveStringMap o
 
     return options;
   }
+
+  @Override
+  public StructType partitionSchema() {
+    Schema schema = icebergTable.spec().schema();
+    List<PartitionField> fields = icebergTable.spec().fields();
+    List<Types.NestedField> structFields = Lists.newArrayListWithExpectedSize(fields.size());
+    fields.forEach(f -> {
+      Type resultType = Types.StringType.get();
+      Type sourceType = schema.findType(f.sourceId());
+      if (!f.name().endsWith("hour") && !f.name().endsWith("month")) {
+        resultType = f.transform().getResultType(sourceType);
+      }
+      structFields.add(Types.NestedField.optional(f.fieldId(), f.name(), resultType));
+    });
+    return (StructType) SparkSchemaUtil.convert(Types.StructType.of(structFields));
+  }
+
+  @Override
+  public void createPartition(InternalRow ident, Map<String, String> properties) throws UnsupportedOperationException {
+    // use Iceberg SQL extensions
+  }
+
+  @Override
+  public boolean dropPartition(InternalRow ident) {
+    // use Iceberg SQL extensions
+    return false;
+  }
+
+  @Override
+  public void replacePartitionMetadata(InternalRow ident, Map<String, String> properties)
+          throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Iceberg partitions do not support metadata");
+  }
+
+  @Override
+  public Map<String, String> loadPartitionMetadata(InternalRow ident) throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Iceberg partitions do not support metadata");
+  }
+
+  @Override
+  public InternalRow[] listPartitionIdentifiers(String[] names, InternalRow ident) {
+    // support [show partitions] syntax
+    if (!icebergTable.spec().isUnpartitioned()){
+      if (names.length > 0){
+        return new InternalRow[]{ident};
+      }
+      else {
+        String fileFormat = icebergTable.properties()
+                .getOrDefault(TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT);
+        List<SparkTableUtil.SparkPartition> partitions = Spark3Util.getPartitions(sparkSession(),
+                new Path(icebergTable.location().concat("\\data")), fileFormat);
+        List<InternalRow> rows = Lists.newArrayList();
+        StructType schema = partitionSchema();
+        StructField[] fields = schema.fields();
+        partitions.forEach(p -> {
+          int i = 0;
+          Map<String, String> values = p.getValues();
+          List<Object> dataTypeVal = Lists.newArrayList();
+          while (i < fields.length) {
+            DataType dataType = schema.apply(fields[i].name()).dataType();
+            dataTypeVal.add(Spark3Util.convertPartitionType(values.get(fields[i].name()), dataType));
+            i += 1;
+          }
+          rows.add(new GenericInternalRow(dataTypeVal.toArray()));
+        });
+        return rows.toArray(new InternalRow[0]);
+      }
+    }
+    else{

Review comment:
       Same as above.

##########
File path: spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/Spark3Util.java
##########
@@ -850,4 +865,46 @@ public String unknown(String sourceName, int sourceId, String transform,
       return String.format("%s(%s) %s %s", transform, sourceName, direction, nullOrder);
     }
   }
+
+  public static Object convertPartitionType(Object value, DataType dataType) {
+    if (value == null && dataType instanceof NullType) {
+      return null;
+    }
+    String old = String.valueOf(value);
+    if (dataType instanceof BooleanType) {
+      return DatatypeConverter.parseBoolean(old);
+    }
+    else if (dataType instanceof ByteType) {

Review comment:
       The code style should be consistent with the existing code:
   ```java
   if (...) {
     ...
   } else if (...) {
     ...
   }
   ```

##########
File path: spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/sql/TestPartitionedWrites.java
##########
@@ -170,10 +170,25 @@ public void testViewsReturnRecentResults() {
         ImmutableList.of(row(1L, "a")),
         sql("SELECT * FROM tmp"));
 
-    sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName);
+    sql("INSERT INTO TABLE %s VALUES (1, 'a', CURRENT_TIMESTAMP())", tableName);
 
     assertEquals("View should have expected rows",
         ImmutableList.of(row(1L, "a"), row(1L, "a")),
         sql("SELECT * FROM tmp"));
   }
+
+  @Test
+  public void testAddPartition() {
+    sql("ALTER TABLE %s ADD IF NOT EXISTS PARTITION (id_bucket=2, data_trunc='2022', ts_hour='2022-01-08-23')", tableName);

Review comment:
       It may be better to check the newly added partition.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org