You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/04/02 08:59:12 UTC
carbondata git commit: [CARBONDATA-2276][SDK] Support API to read
schema in data file and schema file
Repository: carbondata
Updated Branches:
refs/heads/master 52f8d7111 -> f910cfa98
[CARBONDATA-2276][SDK] Support API to read schema in data file and schema file
This closes #2099
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f910cfa9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f910cfa9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f910cfa9
Branch: refs/heads/master
Commit: f910cfa98d1d558779736e13e534bfff6c981172
Parents: 52f8d71
Author: Jacky Li <ja...@qq.com>
Authored: Sat Mar 24 22:38:20 2018 +0800
Committer: ravipesala <ra...@gmail.com>
Committed: Mon Apr 2 14:28:58 2018 +0530
----------------------------------------------------------------------
.../core/metadata/schema/SchemaReader.java | 2 +-
.../core/reader/CarbonHeaderReader.java | 19 +++
.../apache/carbondata/core/util/CarbonUtil.java | 100 ++++++----------
.../carbondata/sdk/file/CarbonReader.java | 43 +++++++
.../sdk/file/CSVCarbonWriterTest.java | 110 +----------------
.../carbondata/sdk/file/CarbonReaderTest.java | 118 +++++++++++++++++++
.../apache/carbondata/sdk/file/TestUtil.java | 98 +++++++++++++++
7 files changed, 322 insertions(+), 168 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java
index 787a9b9..54814cd 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/SchemaReader.java
@@ -87,7 +87,7 @@ public class SchemaReader {
// Convert the ColumnSchema -> TableSchema -> TableInfo.
// Return the TableInfo.
org.apache.carbondata.format.TableInfo tableInfo =
- CarbonUtil.inferSchema(identifier.getTablePath(), identifier, false);
+ CarbonUtil.inferSchema(identifier.getTablePath(), identifier.getTableName());
SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo(
tableInfo, identifier.getDatabaseName(), identifier.getTableName(),
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java
index ab557cc..9bbdca9 100644
--- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java
+++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java
@@ -17,9 +17,14 @@
package org.apache.carbondata.core.reader;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.format.FileHeader;
+import static org.apache.carbondata.core.util.CarbonUtil.thriftColumnSchmeaToWrapperColumnSchema;
+
import org.apache.thrift.TBase;
/**
@@ -65,4 +70,18 @@ public class CarbonHeaderReader {
});
}
+ /**
+ * Read and return the schema in the header
+ */
+ public List<ColumnSchema> readSchema() throws IOException {
+ FileHeader fileHeader = readHeader();
+ List<ColumnSchema> columnSchemaList = new ArrayList<>();
+ List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema();
+ for (org.apache.carbondata.format.ColumnSchema table_column : table_columns) {
+ ColumnSchema col = thriftColumnSchmeaToWrapperColumnSchema(table_column);
+ col.setColumnReferenceId(col.getColumnUniqueId());
+ columnSchemaList.add(col);
+ }
+ return columnSchemaList;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 3c347db..8e21d46 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -86,8 +86,6 @@ import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.format.BlockletHeader;
import org.apache.carbondata.format.DataChunk2;
import org.apache.carbondata.format.DataChunk3;
-import org.apache.carbondata.format.FileHeader;
-
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
@@ -2336,69 +2334,45 @@ public final class CarbonUtil {
/**
* This method will read the schema file from a given path
*
- * @param schemaFilePath
- * @return
+ * @return table info containing the schema
*/
public static org.apache.carbondata.format.TableInfo inferSchema(
- String carbonDataFilePath, AbsoluteTableIdentifier absoluteTableIdentifier,
- boolean schemaExists) throws IOException {
- TBaseCreator createTBase = new ThriftReader.TBaseCreator() {
- public org.apache.thrift.TBase<org.apache.carbondata.format.TableInfo,
- org.apache.carbondata.format.TableInfo._Fields> create() {
- return new org.apache.carbondata.format.TableInfo();
- }
- };
- if (schemaExists == false) {
- List<String> filePaths =
- getFilePathExternalFilePath(carbonDataFilePath + "/Fact/Part0/Segment_null");
- String fistFilePath = null;
- try {
- fistFilePath = filePaths.get(0);
- } catch (Exception e) {
- LOGGER.error("CarbonData file is not present in the table location");
- }
- CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(fistFilePath);
- FileHeader fileHeader = carbonHeaderReader.readHeader();
- List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
- List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema();
- for (int i = 0; i < table_columns.size(); i++) {
- ColumnSchema col = thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i));
- col.setColumnReferenceId(col.getColumnUniqueId());
- columnSchemaList.add(col);
- }
- TableSchema tableSchema = new TableSchema();
- tableSchema.setTableName(absoluteTableIdentifier.getTableName());
- tableSchema.setBucketingInfo(null);
- tableSchema.setSchemaEvalution(null);
- tableSchema.setTableId(UUID.randomUUID().toString());
- tableSchema.setListOfColumns(columnSchemaList);
-
- ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
- new ThriftWrapperSchemaConverterImpl();
- SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry();
- schemaEvolutionEntry.setTimeStamp(System.currentTimeMillis());
- SchemaEvolution schemaEvol = new SchemaEvolution();
- List<SchemaEvolutionEntry> schEntryList = new ArrayList<>();
- schEntryList.add(schemaEvolutionEntry);
- schemaEvol.setSchemaEvolutionEntryList(schEntryList);
- tableSchema.setSchemaEvalution(schemaEvol);
-
- org.apache.carbondata.format.TableSchema thriftFactTable =
- thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
- org.apache.carbondata.format.TableInfo tableInfo =
- new org.apache.carbondata.format.TableInfo(thriftFactTable,
- new ArrayList<org.apache.carbondata.format.TableSchema>());
-
- tableInfo.setDataMapSchemas(null);
- return tableInfo;
- } else {
- ThriftReader thriftReader = new ThriftReader(carbonDataFilePath, createTBase);
- thriftReader.open();
- org.apache.carbondata.format.TableInfo tableInfo =
- (org.apache.carbondata.format.TableInfo) thriftReader.read();
- thriftReader.close();
- return tableInfo;
- }
+ String carbonDataFilePath, String tableName) throws IOException {
+ List<String> filePaths =
+ getFilePathExternalFilePath(carbonDataFilePath + "/Fact/Part0/Segment_null");
+ String fistFilePath = null;
+ try {
+ fistFilePath = filePaths.get(0);
+ } catch (Exception e) {
+ LOGGER.error("CarbonData file is not present in the table location");
+ }
+ CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(fistFilePath);
+ List<ColumnSchema> columnSchemaList = carbonHeaderReader.readSchema();
+ TableSchema tableSchema = new TableSchema();
+ tableSchema.setTableName(tableName);
+ tableSchema.setBucketingInfo(null);
+ tableSchema.setSchemaEvalution(null);
+ tableSchema.setTableId(UUID.randomUUID().toString());
+ tableSchema.setListOfColumns(columnSchemaList);
+
+ ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
+ new ThriftWrapperSchemaConverterImpl();
+ SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry();
+ schemaEvolutionEntry.setTimeStamp(System.currentTimeMillis());
+ SchemaEvolution schemaEvol = new SchemaEvolution();
+ List<SchemaEvolutionEntry> schEntryList = new ArrayList<>();
+ schEntryList.add(schemaEvolutionEntry);
+ schemaEvol.setSchemaEvolutionEntryList(schEntryList);
+ tableSchema.setSchemaEvalution(schemaEvol);
+
+ org.apache.carbondata.format.TableSchema thriftFactTable =
+ thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
+ org.apache.carbondata.format.TableInfo tableInfo =
+ new org.apache.carbondata.format.TableInfo(thriftFactTable,
+ new ArrayList<org.apache.carbondata.format.TableSchema>());
+
+ tableInfo.setDataMapSchemas(null);
+ return tableInfo;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java
index 8cb8b2c..210d516 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java
@@ -20,8 +20,22 @@ package org.apache.carbondata.sdk.file;
import java.io.IOException;
import java.util.List;
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.common.annotations.InterfaceStability;
+import org.apache.carbondata.core.metadata.converter.SchemaConverter;
+import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl;
+import org.apache.carbondata.core.metadata.schema.table.TableInfo;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.reader.CarbonHeaderReader;
+import org.apache.carbondata.core.util.CarbonUtil;
+
import org.apache.hadoop.mapreduce.RecordReader;
+/**
+ * Reader for carbondata file
+ */
+@InterfaceAudience.User
+@InterfaceStability.Evolving
public class CarbonReader<T> {
private List<RecordReader<Void, T>> readers;
@@ -30,6 +44,9 @@ public class CarbonReader<T> {
private int index;
+ /**
+ * Call {@link #builder(String)} to construct an instance
+ */
CarbonReader(List<RecordReader<Void, T>> readers) {
if (readers.size() == 0) {
throw new IllegalArgumentException("no reader");
@@ -39,6 +56,9 @@ public class CarbonReader<T> {
this.currentReader = readers.get(0);
}
+ /**
+ * Return true if has next row
+ */
public boolean hasNext() throws IOException, InterruptedException {
if (currentReader.nextKeyValue()) {
return true;
@@ -54,11 +74,34 @@ public class CarbonReader<T> {
}
}
+ /**
+ * Read and return next row object
+ */
public T readNextRow() throws IOException, InterruptedException {
return currentReader.getCurrentValue();
}
+ /**
+ * Return a new {@link CarbonReaderBuilder} instance
+ */
public static CarbonReaderBuilder builder(String tablePath) {
return new CarbonReaderBuilder(tablePath);
}
+
+ /**
+ * Read carbondata file and return the schema
+ */
+ public static List<ColumnSchema> readSchemaInDataFile(String dataFilePath) throws IOException {
+ CarbonHeaderReader reader = new CarbonHeaderReader(dataFilePath);
+ return reader.readSchema();
+ }
+
+ /**
+ * Read schmea file and return table info object
+ */
+ public static TableInfo readSchemaFile(String schemaFilePath) throws IOException {
+ org.apache.carbondata.format.TableInfo tableInfo = CarbonUtil.readSchemaFile(schemaFilePath);
+ SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
+ return schemaConverter.fromExternalToWrapperTableInfo(tableInfo, "", "", "");
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java
index 68663ec..eecbf5f 100644
--- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVCarbonWriterTest.java
@@ -47,7 +47,7 @@ public class CSVCarbonWriterTest {
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
- writeFilesAndVerify(new Schema(fields), path);
+ TestUtil.writeFilesAndVerify(new Schema(fields), path);
FileUtils.deleteDirectory(new File(path));
}
@@ -65,105 +65,7 @@ public class CSVCarbonWriterTest {
.append("]")
.toString();
- writeFilesAndVerify(Schema.parseJson(schema), path);
-
- FileUtils.deleteDirectory(new File(path));
- }
-
- private void writeFilesAndVerify(Schema schema, String path) {
- writeFilesAndVerify(schema, path, null);
- }
-
- private void writeFilesAndVerify(Schema schema, String path, String[] sortColumns) {
- writeFilesAndVerify(100, schema, path, sortColumns, false, -1, -1);
- }
-
- private void writeFilesAndVerify(Schema schema, String path, boolean persistSchema) {
- writeFilesAndVerify(100, schema, path, null, persistSchema, -1, -1);
- }
-
- /**
- * Invoke CarbonWriter API to write carbon files and assert the file is rewritten
- * @param rows number of rows to write
- * @param schema schema of the file
- * @param path local write path
- * @param sortColumns sort columns
- * @param persistSchema true if want to persist schema file
- * @param blockletSize blockletSize in the file, -1 for default size
- * @param blockSize blockSize in the file, -1 for default size
- */
- private void writeFilesAndVerify(int rows, Schema schema, String path, String[] sortColumns,
- boolean persistSchema, int blockletSize, int blockSize) {
- try {
- CarbonWriterBuilder builder = CarbonWriter.builder()
- .withSchema(schema)
- .outputPath(path);
- if (sortColumns != null) {
- builder = builder.sortBy(sortColumns);
- }
- if (persistSchema) {
- builder = builder.persistSchemaFile(true);
- }
- if (blockletSize != -1) {
- builder = builder.withBlockletSize(blockletSize);
- }
- if (blockSize != -1) {
- builder = builder.withBlockSize(blockSize);
- }
-
- CarbonWriter writer = builder.buildWriterForCSVInput();
-
- for (int i = 0; i < rows; i++) {
- writer.write(new String[]{"robot" + (i % 10), String.valueOf(i), String.valueOf((double) i / 2)});
- }
- writer.close();
- } catch (IOException e) {
- e.printStackTrace();
- Assert.fail(e.getMessage());
- } catch (InvalidLoadOptionException l) {
- l.printStackTrace();
- Assert.fail(l.getMessage());
- }
-
- File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null"));
- Assert.assertTrue(segmentFolder.exists());
-
- File[] dataFiles = segmentFolder.listFiles(new FileFilter() {
- @Override public boolean accept(File pathname) {
- return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
- }
- });
- Assert.assertNotNull(dataFiles);
- Assert.assertTrue(dataFiles.length > 0);
- }
-
- @Test
- public void testWriteAndReadFiles() throws IOException, InterruptedException {
- String path = "./testWriteFiles";
- FileUtils.deleteDirectory(new File(path));
-
- Field[] fields = new Field[2];
- fields[0] = new Field("name", DataTypes.STRING);
- fields[1] = new Field("age", DataTypes.INT);
-
- writeFilesAndVerify(new Schema(fields), path, true);
-
- File[] files = new File(path + "/Fact/Part0/Segment_null/").listFiles(new FilenameFilter() {
- @Override public boolean accept(File dir, String name) {
- return name.endsWith("carbondata");
- }
- });
-
- CarbonReader reader = CarbonReader.builder(path)
- .projection(new String[]{"name", "age"}).build();
-
- int i = 0;
- while (reader.hasNext()) {
- Object[] row = (Object[])reader.readNextRow();
- Assert.assertEquals("robot" + (i % 10), row[0]);
- Assert.assertEquals(i, row[1]);
- i++;
- }
+ TestUtil.writeFilesAndVerify(Schema.parseJson(schema), path);
FileUtils.deleteDirectory(new File(path));
}
@@ -234,7 +136,7 @@ public class CSVCarbonWriterTest {
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
- writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 1, 100);
+ TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 1, 100);
// TODO: implement reader to verify the number of blocklet in the file
@@ -250,7 +152,7 @@ public class CSVCarbonWriterTest {
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
- writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 2, 2);
+ TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, false, 2, 2);
File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null"));
File[] dataFiles = segmentFolder.listFiles(new FileFilter() {
@@ -273,7 +175,7 @@ public class CSVCarbonWriterTest {
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
- writeFilesAndVerify(new Schema(fields), path, new String[]{"name"});
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, new String[]{"name"});
// TODO: implement reader and verify the data is sorted
@@ -294,7 +196,7 @@ public class CSVCarbonWriterTest {
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
- writeFilesAndVerify(new Schema(fields), path, true);
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
String schemaFile = CarbonTablePath.getSchemaFilePath(path);
Assert.assertTrue(new File(schemaFile).exists());
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
new file mode 100644
index 0000000..14802e6
--- /dev/null
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.sdk.file;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.schema.table.TableInfo;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+
+import org.apache.commons.io.FileUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class CarbonReaderTest {
+
+ @Test
+ public void testWriteAndReadFiles() throws IOException, InterruptedException {
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[2];
+ fields[0] = new Field("name", DataTypes.STRING);
+ fields[1] = new Field("age", DataTypes.INT);
+
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+ CarbonReader reader = CarbonReader.builder(path)
+ .projection(new String[]{"name", "age"}).build();
+
+ int i = 0;
+ while (reader.hasNext()) {
+ Object[] row = (Object[])reader.readNextRow();
+ Assert.assertEquals("robot" + (i % 10), row[0]);
+ Assert.assertEquals(i, row[1]);
+ i++;
+ }
+
+ FileUtils.deleteDirectory(new File(path));
+ }
+
+ @Test
+ public void testReadSchemaFromDataFile() throws IOException {
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[2];
+ fields[0] = new Field("name", DataTypes.STRING);
+ fields[1] = new Field("age", DataTypes.INT);
+
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+ File[] dataFiles = new File(path + "/Fact/Part0/Segment_null/").listFiles(new FilenameFilter() {
+ @Override public boolean accept(File dir, String name) {
+ return name.endsWith("carbondata");
+ }
+ });
+ Assert.assertTrue(dataFiles != null);
+ Assert.assertTrue(dataFiles.length > 0);
+ List<ColumnSchema> columns = CarbonReader.readSchemaInDataFile(dataFiles[0].getAbsolutePath());
+ Assert.assertTrue(columns.size() == 2);
+ Assert.assertEquals("name", columns.get(0).getColumnName());
+ Assert.assertEquals("age", columns.get(1).getColumnName());
+ Assert.assertEquals(DataTypes.STRING, columns.get(0).getDataType());
+ Assert.assertEquals(DataTypes.INT, columns.get(1).getDataType());
+
+ FileUtils.deleteDirectory(new File(path));
+ }
+
+ @Test
+ public void testReadSchemaFromSchemaFile() throws IOException {
+ String path = "./testWriteFiles";
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[2];
+ fields[0] = new Field("name", DataTypes.STRING);
+ fields[1] = new Field("age", DataTypes.INT);
+
+ TestUtil.writeFilesAndVerify(new Schema(fields), path, true);
+
+ File[] dataFiles = new File(path + "/Metadata").listFiles(new FilenameFilter() {
+ @Override public boolean accept(File dir, String name) {
+ return name.endsWith("schema");
+ }
+ });
+ Assert.assertTrue(dataFiles != null);
+ Assert.assertTrue(dataFiles.length > 0);
+ TableInfo tableInfo = CarbonReader.readSchemaFile(dataFiles[0].getAbsolutePath());
+ Assert.assertEquals(2, tableInfo.getFactTable().getListOfColumns().size());
+
+ List<ColumnSchema> columns = tableInfo.getFactTable().getListOfColumns();
+ Assert.assertEquals(2, columns.size());
+ Assert.assertEquals("name", columns.get(0).getColumnName());
+ Assert.assertEquals("age", columns.get(1).getColumnName());
+ Assert.assertEquals(DataTypes.STRING, columns.get(0).getDataType());
+ Assert.assertEquals(DataTypes.INT, columns.get(1).getDataType());
+
+ FileUtils.deleteDirectory(new File(path));
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/f910cfa9/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java
new file mode 100644
index 0000000..dcedf10
--- /dev/null
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/TestUtil.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.sdk.file;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+
+import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+
+import org.junit.Assert;
+
+class TestUtil {
+
+ static void writeFilesAndVerify(Schema schema, String path) {
+ writeFilesAndVerify(schema, path, null);
+ }
+
+ static void writeFilesAndVerify(Schema schema, String path, String[] sortColumns) {
+ writeFilesAndVerify(100, schema, path, sortColumns, false, -1, -1);
+ }
+
+ static void writeFilesAndVerify(Schema schema, String path, boolean persistSchema) {
+ writeFilesAndVerify(100, schema, path, null, persistSchema, -1, -1);
+ }
+
+ /**
+ * Invoke CarbonWriter API to write carbon files and assert the file is rewritten
+ * @param rows number of rows to write
+ * @param schema schema of the file
+ * @param path local write path
+ * @param sortColumns sort columns
+ * @param persistSchema true if want to persist schema file
+ * @param blockletSize blockletSize in the file, -1 for default size
+ * @param blockSize blockSize in the file, -1 for default size
+ */
+ static void writeFilesAndVerify(int rows, Schema schema, String path, String[] sortColumns,
+ boolean persistSchema, int blockletSize, int blockSize) {
+ try {
+ CarbonWriterBuilder builder = CarbonWriter.builder()
+ .withSchema(schema)
+ .outputPath(path);
+ if (sortColumns != null) {
+ builder = builder.sortBy(sortColumns);
+ }
+ if (persistSchema) {
+ builder = builder.persistSchemaFile(true);
+ }
+ if (blockletSize != -1) {
+ builder = builder.withBlockletSize(blockletSize);
+ }
+ if (blockSize != -1) {
+ builder = builder.withBlockSize(blockSize);
+ }
+
+ CarbonWriter writer = builder.buildWriterForCSVInput();
+
+ for (int i = 0; i < rows; i++) {
+ writer.write(new String[]{"robot" + (i % 10), String.valueOf(i), String.valueOf((double) i / 2)});
+ }
+ writer.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail(e.getMessage());
+ } catch (InvalidLoadOptionException l) {
+ l.printStackTrace();
+ Assert.fail(l.getMessage());
+ }
+
+ File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null"));
+ Assert.assertTrue(segmentFolder.exists());
+
+ File[] dataFiles = segmentFolder.listFiles(new FileFilter() {
+ @Override public boolean accept(File pathname) {
+ return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
+ }
+ });
+ Assert.assertNotNull(dataFiles);
+ Assert.assertTrue(dataFiles.length > 0);
+ }
+}