You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2020/01/15 08:06:47 UTC
[carbondata] branch master updated: [CARBONDATA-3627] C++ SDK
support write data withSchemaFile
This is an automated email from the ASF dual-hosted git repository.
jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new f40b349 [CARBONDATA-3627] C++ SDK support write data withSchemaFile
f40b349 is described below
commit f40b34901a5c4d80734dd2ef28c2191e6c5d12c8
Author: xubo245 <60...@qq.com>
AuthorDate: Mon Dec 23 00:47:51 2019 +0800
[CARBONDATA-3627] C++ SDK support write data withSchemaFile
C++ SDK support write data withSchemaFile, which can be used for add segment for transactional table
This closes #3526
---
store/CSDK/src/CarbonWriter.cpp | 40 +++++++++++++++
store/CSDK/src/CarbonWriter.h | 13 +++++
store/CSDK/test/main.cpp | 105 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 158 insertions(+)
diff --git a/store/CSDK/src/CarbonWriter.cpp b/store/CSDK/src/CarbonWriter.cpp
index 243e533..bad31d6 100644
--- a/store/CSDK/src/CarbonWriter.cpp
+++ b/store/CSDK/src/CarbonWriter.cpp
@@ -90,6 +90,12 @@ void CarbonWriter::sortBy(int argc, char **argv) {
}
}
+/**
+ * configure the schema with json style schema
+ *
+ * @param jsonSchema json style schema
+ * @return updated CarbonWriterBuilder
+ */
void CarbonWriter::withCsvInput(char *jsonSchema) {
if (jsonSchema == NULL) {
throw std::runtime_error("jsonSchema parameter can't be NULL.");
@@ -110,6 +116,20 @@ void CarbonWriter::withCsvInput(char *jsonSchema) {
}
};
+void CarbonWriter::withCsvInput() {
+ checkBuilder();
+ jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject);
+ jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "withCsvInput",
+ "()Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;");
+ if (methodID == NULL) {
+ throw std::runtime_error("Can't find the method in java: withCsvInput");
+ }
+ carbonWriterBuilderObject = jniEnv->CallObjectMethod(carbonWriterBuilderObject, methodID);
+ if (jniEnv->ExceptionCheck()) {
+ throw jniEnv->ExceptionOccurred();
+ }
+};
+
void CarbonWriter::withHadoopConf(char *key, char *value) {
if (key == NULL) {
throw std::runtime_error("key parameter can't be NULL.");
@@ -271,6 +291,26 @@ void CarbonWriter::withBlockletSize(int blockletSize) {
}
}
+/**
+ * To set the path of carbon schema file
+ * @param schemaFilePath The path of carbon schema file
+ */
+void CarbonWriter::withSchemaFile(char *schemaFilePath) {
+ checkBuilder();
+ jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject);
+ jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "withSchemaFile",
+ "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;");
+ if (methodID == NULL) {
+ throw std::runtime_error("Can't find the method in java: withSchemaFile");
+ }
+ jvalue args[1];
+ args[0].l = jniEnv->NewStringUTF(schemaFilePath);
+ carbonWriterBuilderObject = jniEnv->CallObjectMethodA(carbonWriterBuilderObject, methodID, args);
+ if (jniEnv->ExceptionCheck()) {
+ throw jniEnv->ExceptionOccurred();
+ }
+}
+
void CarbonWriter::localDictionaryThreshold(int localDictionaryThreshold) {
if (localDictionaryThreshold < 1) {
throw std::runtime_error("localDictionaryThreshold parameter should be positive number.");
diff --git a/store/CSDK/src/CarbonWriter.h b/store/CSDK/src/CarbonWriter.h
index fdb0bc6..afb695f 100644
--- a/store/CSDK/src/CarbonWriter.h
+++ b/store/CSDK/src/CarbonWriter.h
@@ -96,6 +96,13 @@ public:
void withCsvInput(char *jsonSchema);
/**
+ * configure the schema
+ *
+ * @return updated CarbonWriterBuilder
+ */
+ void withCsvInput( );
+
+ /**
* configure parameter, including ak,sk and endpoint
*
* @param key key word
@@ -188,6 +195,12 @@ public:
void withBlockletSize(int blockletSize);
/**
+ * To set the path of carbon schema file
+ * @param schemaFilePath The path of carbon schema file
+ */
+ void withSchemaFile(char *schemaFilePath);
+
+ /**
* @param localDictionaryThreshold is localDictionaryThreshold, default is 10000
* @return updated CarbonWriterBuilder
*/
diff --git a/store/CSDK/test/main.cpp b/store/CSDK/test/main.cpp
index 5128c4b..2e1b5e5 100644
--- a/store/CSDK/test/main.cpp
+++ b/store/CSDK/test/main.cpp
@@ -740,6 +740,110 @@ bool testWriteData(JNIEnv *env, char *path, int argc, char *argv[]) {
}
}
+bool testWriteDataWithSchemaFile(JNIEnv *env, char *path, int argc, char *argv[]) {
+
+ CarbonReader carbonReader;
+ CarbonWriter writer;
+ try {
+ writer.builder(env);
+ writer.outputPath(path);
+ writer.withCsvInput();
+ writer.withSchemaFile("../../../integration/spark-common/target/warehouse/add_segment_test/Metadata/schema");
+ writer.writtenBy("CSDK");
+ writer.taskNo(15541554.81);
+ writer.withThreadSafe(1);
+ writer.uniqueIdentifier(1549911814000000);
+ writer.withBlockSize(1);
+ writer.withBlockletSize(16);
+ writer.enableLocalDictionary(true);
+ writer.localDictionaryThreshold(10000);
+ if (argc > 3) {
+ writer.withHadoopConf("fs.s3a.access.key", argv[1]);
+ writer.withHadoopConf("fs.s3a.secret.key", argv[2]);
+ writer.withHadoopConf("fs.s3a.endpoint", argv[3]);
+ }
+ writer.build();
+
+ int rowNum = 10;
+ int size = 14;
+ jclass objClass = env->FindClass("java/lang/String");
+ for (int i = 0; i < rowNum; ++i) {
+ jobjectArray arr = env->NewObjectArray(size, objClass, 0);
+ char ctrInt[10];
+ gcvt(i, 10, ctrInt);
+
+ char a[15] = "robot";
+ strcat(a, ctrInt);
+ jobject intField = env->NewStringUTF(ctrInt);
+ env->SetObjectArrayElement(arr, 0, intField);
+ jobject stringField = env->NewStringUTF(a);
+ env->SetObjectArrayElement(arr, 1, stringField);
+ jobject string2Field = env->NewStringUTF(a);
+ env->SetObjectArrayElement(arr, 2, string2Field);
+ jobject timeField = env->NewStringUTF("2019-02-12 03:03:34");
+ env->SetObjectArrayElement(arr, 3, timeField);
+ jobject int4Field = env->NewStringUTF(ctrInt);
+ env->SetObjectArrayElement(arr, 4, int4Field);
+ jobject string5Field = env->NewStringUTF(a);
+ env->SetObjectArrayElement(arr, 5, string5Field);
+ jobject int6Field = env->NewStringUTF(ctrInt);
+ env->SetObjectArrayElement(arr, 6, int6Field);
+ jobject string7Field = env->NewStringUTF(a);
+ env->SetObjectArrayElement(arr, 7, string7Field);
+ jobject int8Field = env->NewStringUTF(ctrInt);
+ env->SetObjectArrayElement(arr, 8, int8Field);
+ jobject time9Field = env->NewStringUTF("2019-02-12 03:03:34");
+ env->SetObjectArrayElement(arr, 9, time9Field);
+ jobject dateField = env->NewStringUTF(" 2019-03-02");
+ env->SetObjectArrayElement(arr, 10, dateField);
+ jobject int11Field = env->NewStringUTF(ctrInt);
+ env->SetObjectArrayElement(arr, 11, int11Field);
+ jobject int12Field = env->NewStringUTF(ctrInt);
+ env->SetObjectArrayElement(arr, 12, int12Field);
+ jobject int13Field = env->NewStringUTF(ctrInt);
+ env->SetObjectArrayElement(arr, 13, int13Field);
+
+ writer.write(arr);
+
+ env->DeleteLocalRef(stringField);
+ env->DeleteLocalRef(string2Field);
+ env->DeleteLocalRef(intField);
+ env->DeleteLocalRef(int4Field);
+ env->DeleteLocalRef(string5Field);
+ env->DeleteLocalRef(int6Field);
+ env->DeleteLocalRef(dateField);
+ env->DeleteLocalRef(timeField);
+ env->DeleteLocalRef(string7Field);
+ env->DeleteLocalRef(int8Field);
+ env->DeleteLocalRef(int11Field);
+ env->DeleteLocalRef(int12Field);
+ env->DeleteLocalRef(int13Field);
+ env->DeleteLocalRef(arr);
+ }
+
+ carbonReader.builder(env, path);
+ carbonReader.build();
+ int i = 0;
+ int printNum = 10;
+ CarbonRow carbonRow(env);
+ while (carbonReader.hasNext()) {
+ jobject row = carbonReader.readNextRow();
+ i++;
+ carbonRow.setCarbonRow(row);
+ if (i < printNum) {
+ printf("%s\t%d\t%ld\t", carbonRow.getString(1));
+ }
+ env->DeleteLocalRef(row);
+ }
+ } catch (jthrowable ex) {
+ env->ExceptionDescribe();
+ env->ExceptionClear();
+ }
+ finally:
+ carbonReader.close();
+ writer.close();
+}
+
void writeData(JNIEnv *env, CarbonWriter writer, int size, jclass objClass, char *stringField, short shortField) {
jobjectArray arr = env->NewObjectArray(size, objClass, 0);
@@ -932,6 +1036,7 @@ int main(int argc, char *argv[]) {
testValidateEscapeCharWithImproperValue(env, "./test");
testWriteData(env, "./data", 1, argv);
testWriteData(env, "./dataLoadOption", 1, argv);
+ testWriteDataWithSchemaFile(env, "./data122301", 1, argv);
readFromLocalWithoutProjection(env, smallFilePath);
readFromLocalWithProjection(env, smallFilePath);
testWithTableProperty(env, "./dataProperty", 1, argv);