You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2020/12/06 21:33:58 UTC

[GitHub] [iceberg] pvary opened a new issue #1881: UUID write requires different record in Parquet and ORC/Avro

pvary opened a new issue #1881:
URL: https://github.com/apache/iceberg/issues/1881


   When writing UUID:
   - Parquet expects that the field value is byte[]
   - ORC/Avro expect UUID object.
   
   Fixing this might be incompatible for some of the clients.
   
   Could be tested by this `org.apache.iceberg.TestUUID`:
   ```
   package org.apache.iceberg;
   
   import java.io.IOException;
   import java.util.ArrayList;
   import java.util.Collection;
   import java.util.UUID;
   import org.apache.hadoop.conf.Configuration;
   import org.apache.iceberg.FileFormat;
   import org.apache.iceberg.Schema;
   import org.apache.iceberg.data.GenericAppenderFactory;
   import org.apache.iceberg.data.GenericRecord;
   import org.apache.iceberg.data.Record;
   import org.apache.iceberg.hadoop.HadoopFileIO;
   import org.apache.iceberg.io.FileAppender;
   import org.apache.iceberg.io.OutputFile;
   import org.apache.iceberg.types.Types;
   import org.apache.iceberg.util.UUIDUtil;
   import org.junit.Rule;
   import org.junit.Test;
   import org.junit.rules.TemporaryFolder;
   import org.junit.runner.RunWith;
   import org.junit.runners.Parameterized;
   
   import static org.apache.iceberg.types.Types.NestedField.required;
   
   @RunWith(Parameterized.class)
   public class TestUUID {
     @Rule
     public TemporaryFolder temp = new TemporaryFolder();
   
     @Parameterized.Parameters(name = "fileFormat={0}")
     public static Collection<Object[]> parameters() {
       Collection<Object[]> testParams = new ArrayList<>();
       testParams.add(new Object[] { FileFormat.PARQUET });
       testParams.add(new Object[] { FileFormat.ORC });
       testParams.add(new Object[] { FileFormat.AVRO });
   
       return testParams;
     }
   
     @Parameterized.Parameter(0)
     public FileFormat fileFormat;
   
     @Test
     public void testUUIDWrite() throws IOException {
       Schema schema = new Schema(required(1, "uuid_test", Types.UUIDType.get()));
       GenericAppenderFactory appenderFactory = new GenericAppenderFactory(schema);
       String dataFileLocation = temp.getRoot().getName() + UUID.randomUUID();
       OutputFile dataFile = new HadoopFileIO(new Configuration()).newOutputFile(dataFileLocation);
       FileAppender<Record> appender = appenderFactory.newAppender(dataFile, fileFormat);
       Record record = GenericRecord.create(schema);
       // This one is working for Parquet
   //    record.set(0, UUIDUtil.convert(UUID.randomUUID()));
       // This one is working for ORC and Avro
       record.set(0, UUID.randomUUID());
       appender.add(record);
       appender.close();
     }
   }
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org