You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/05/10 20:02:56 UTC
orc git commit: ORC-183: Add a method in Type to build type
Repository: orc
Updated Branches:
refs/heads/master ec95303b0 -> 90f138b06
ORC-183: Add a method in Type to build type
Added static Type* buildTypeFromString(const std::string& input)
Fixes #115
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/90f138b0
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/90f138b0
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/90f138b0
Branch: refs/heads/master
Commit: 90f138b06053b18b86b09edcb33bf8dc25d7f659
Parents: ec95303
Author: Gang Wu <ga...@alibaba-inc.com>
Authored: Thu Apr 27 21:31:35 2017 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Wed May 10 13:02:22 2017 -0700
----------------------------------------------------------------------
c++/include/orc/Type.hh | 5 ++
c++/src/TypeImpl.cc | 196 +++++++++++++++++++++++++++++++++++++++++++
c++/src/TypeImpl.hh | 67 +++++++++++++++
c++/test/TestType.cc | 23 +++++
4 files changed, 291 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/90f138b0/c++/include/orc/Type.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Type.hh b/c++/include/orc/Type.hh
index 25b8f53..68f5ec2 100644
--- a/c++/include/orc/Type.hh
+++ b/c++/include/orc/Type.hh
@@ -82,6 +82,11 @@ namespace orc {
* @return a reference to the union type
*/
virtual Type* addUnionChild(ORC_UNIQUE_PTR<Type> fieldType) = 0;
+
+ /**
+ * Build a Type object from string text representation.
+ */
+ static ORC_UNIQUE_PTR<Type> buildTypeFromString(const std::string& input);
};
const int64_t DEFAULT_DECIMAL_SCALE = 18;
http://git-wip-us.apache.org/repos/asf/orc/blob/90f138b0/c++/src/TypeImpl.cc
----------------------------------------------------------------------
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index fdf66a0..6074f94 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -491,4 +491,200 @@ namespace orc {
return std::unique_ptr<Type>(result);
}
+ ORC_UNIQUE_PTR<Type> Type::buildTypeFromString(const std::string& input) {
+ std::vector<std::pair<std::string, Type*> > res =
+ TypeImpl::parseType(input, 0, input.size());
+ if (res.size() != 1) {
+ throw std::logic_error("Invalid type string.");
+ }
+ return ORC_UNIQUE_PTR<Type>(res[0].second);
+ }
+
+ Type* TypeImpl::parseArrayType(const std::string &input,
+ size_t start,
+ size_t end) {
+ TypeImpl* arrayType = new TypeImpl(LIST);
+ std::vector<std::pair<std::string, Type*> > v =
+ TypeImpl::parseType(input, start, end);
+ if (v.size() != 1) {
+ throw std::logic_error("Array type must contain exactly one sub type.");
+ }
+ arrayType->addChildType(ORC_UNIQUE_PTR<Type>(v[0].second));
+ return arrayType;
+ }
+
+ Type* TypeImpl::parseMapType(const std::string &input,
+ size_t start,
+ size_t end) {
+ TypeImpl * mapType = new TypeImpl(MAP);
+ std::vector<std::pair<std::string, Type*> > v =
+ TypeImpl::parseType(input, start, end);
+ if (v.size() != 2) {
+ throw std::logic_error(
+ "Map type must contain exactly two sub types.");
+ }
+ mapType->addChildType(ORC_UNIQUE_PTR<Type>(v[0].second));
+ mapType->addChildType(ORC_UNIQUE_PTR<Type>(v[1].second));
+ return mapType;
+ }
+
+ Type* TypeImpl::parseStructType(const std::string &input,
+ size_t start,
+ size_t end) {
+ TypeImpl* structType = new TypeImpl(STRUCT);
+ std::vector<std::pair<std::string, Type*> > v =
+ TypeImpl::parseType(input, start, end);
+ if (v.size() == 0) {
+ throw std::logic_error(
+ "Struct type must contain at least one sub type.");
+ }
+ for (size_t i = 0; i < v.size(); ++i) {
+ structType->addStructField(v[i].first, ORC_UNIQUE_PTR<Type>(v[i].second));
+ }
+ return structType;
+ }
+
+ Type* TypeImpl::parseUnionType(const std::string &input,
+ size_t start,
+ size_t end) {
+ TypeImpl* unionType = new TypeImpl(UNION);
+ std::vector<std::pair<std::string, Type*> > v =
+ TypeImpl::parseType(input, start, end);
+ if (v.size() == 0) {
+ throw std::logic_error("Union type must contain at least one sub type.");
+ }
+ for (size_t i = 0; i < v.size(); ++i) {
+ unionType->addChildType(ORC_UNIQUE_PTR<Type>(v[i].second));
+ }
+ return unionType;
+ }
+
+ Type* TypeImpl::parseDecimalType(const std::string &input,
+ size_t start,
+ size_t end) {
+ size_t sep = input.find(',', start);
+ if (sep + 1 >= end || sep == std::string::npos) {
+ throw std::logic_error("Decimal type must specify precision and scale.");
+ }
+ uint64_t precision =
+ static_cast<uint64_t>(atoi(input.substr(start, sep - start).c_str()));
+ uint64_t scale =
+ static_cast<uint64_t>(atoi(input.substr(sep + 1, end - sep - 1).c_str()));
+ return new TypeImpl(DECIMAL, precision, scale);
+ }
+
+ Type* TypeImpl::parseCategory(std::string category,
+ const std::string &input,
+ size_t start,
+ size_t end) {
+ if (category == "boolean") {
+ return new TypeImpl(BOOLEAN);
+ } else if (category == "tinyint") {
+ return new TypeImpl(BYTE);
+ } else if (category == "smallint") {
+ return new TypeImpl(SHORT);
+ } else if (category == "int") {
+ return new TypeImpl(INT);
+ } else if (category == "bigint") {
+ return new TypeImpl(LONG);
+ } else if (category == "float") {
+ return new TypeImpl(FLOAT);
+ } else if (category == "double") {
+ return new TypeImpl(DOUBLE);
+ } else if (category == "string") {
+ return new TypeImpl(STRING);
+ } else if (category == "binary") {
+ return new TypeImpl(BINARY);
+ } else if (category == "timestamp") {
+ return new TypeImpl(TIMESTAMP);
+ } else if (category == "array") {
+ return parseArrayType(input, start, end);
+ } else if (category == "map") {
+ return parseMapType(input, start, end);
+ } else if (category == "struct") {
+ return parseStructType(input, start, end);
+ } else if (category == "uniontype") {
+ return parseUnionType(input, start, end);
+ } else if (category == "decimal") {
+ return parseDecimalType(input, start, end);
+ } else if (category == "date") {
+ return new TypeImpl(DATE);
+ } else if (category == "varchar") {
+ uint64_t maxLength = static_cast<uint64_t>(
+ atoi(input.substr(start, end - start).c_str()));
+ return new TypeImpl(VARCHAR, maxLength);
+ } else if (category == "char") {
+ uint64_t maxLength = static_cast<uint64_t>(
+ atoi(input.substr(start, end - start).c_str()));
+ return new TypeImpl(CHAR, maxLength);
+ } else {
+ throw std::logic_error("Unknown type " + category);
+ }
+ }
+
+ std::vector<std::pair<std::string, Type *> > TypeImpl::parseType(
+ const std::string &input,
+ size_t start,
+ size_t end) {
+ std::string types = input.substr(start, end - start);
+ std::vector<std::pair<std::string, Type *> > res;
+ size_t pos = 0;
+
+ while (pos < types.size()) {
+ size_t endPos = pos;
+ while (endPos < types.size() && isalnum(types[endPos])) {
+ ++endPos;
+ }
+
+ std::string fieldName;
+ if (types[endPos] == ':') {
+ fieldName = types.substr(pos, endPos - pos);
+ pos = ++endPos;
+ while (endPos < types.size() && isalpha(types[endPos])) {
+ ++endPos;
+ }
+ }
+
+ size_t nextPos = endPos + 1;
+ if (types[endPos] == '<') {
+ int count = 1;
+ while (nextPos < types.size()) {
+ if (types[nextPos] == '<') {
+ ++count;
+ } else if (types[nextPos] == '>') {
+ --count;
+ }
+ if (count == 0) {
+ break;
+ }
+ ++nextPos;
+ }
+ if (nextPos == types.size()) {
+ throw std::logic_error("Invalid type string. Cannot find closing >");
+ }
+ } else if (types[endPos] == '(') {
+ while (nextPos < types.size() && types[nextPos] != ')') {
+ ++nextPos;
+ }
+ if (nextPos == types.size()) {
+ throw std::logic_error("Invalid type string. Cannot find closing )");
+ }
+ } else if (types[endPos] != ',' && types[endPos] != '\0') {
+ throw std::logic_error("Unrecognized character.");
+ }
+
+ std::string category = types.substr(pos, endPos - pos);
+ Type* type = parseCategory(category, types, endPos + 1, nextPos);
+ res.push_back(std::make_pair(fieldName, type));
+
+ if (types[nextPos] == ')' || types[nextPos] == '>') {
+ pos = nextPos + 2;
+ } else {
+ pos = nextPos;
+ }
+ }
+
+ return res;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/orc/blob/90f138b0/c++/src/TypeImpl.hh
----------------------------------------------------------------------
diff --git a/c++/src/TypeImpl.hh b/c++/src/TypeImpl.hh
index e2866e4..3c3f739 100644
--- a/c++/src/TypeImpl.hh
+++ b/c++/src/TypeImpl.hh
@@ -98,6 +98,11 @@ namespace orc {
*/
void addChildType(std::unique_ptr<Type> childType);
+ static std::vector<std::pair<std::string, Type *> > parseType(
+ const std::string &input,
+ size_t start,
+ size_t end);
+
private:
/**
* Assign ids to this node and its children giving this
@@ -110,6 +115,68 @@ namespace orc {
* Ensure that ids are assigned to all of the nodes.
*/
void ensureIdAssigned() const;
+
+ /**
+ * Parse array type from string
+ * @param input the input string of an array type
+ * @param start start position of the input string
+ * @param end end position of the input string
+ */
+ static Type* parseArrayType(const std::string &input,
+ size_t start,
+ size_t end);
+
+ /**
+ * Parse map type from string
+ * @param input the input string of a map type
+ * @param start start position of the input string
+ * @param end end position of the input string
+ */
+ static Type* parseMapType(const std::string &input,
+ size_t start,
+ size_t end);
+
+ /**
+ * Parse struct type from string
+ * @param input the input string of a struct type
+ * @param start start position of the input string
+ * @param end end position of the input string
+ */
+ static Type* parseStructType(const std::string &input,
+ size_t start,
+ size_t end);
+
+ /**
+ * Parse union type from string
+ * @param input the input string of an union type
+ * @param start start position of the input string
+ * @param end end position of the input string
+ */
+ static Type* parseUnionType(const std::string &input,
+ size_t start,
+ size_t end);
+
+ /**
+ * Parse decimal type from string
+ * @param input the input string of a decimal type
+ * @param start start position of the input string
+ * @param end end position of the input string
+ */
+ static Type* parseDecimalType(const std::string &input,
+ size_t start,
+ size_t end);
+
+ /**
+ * Parse type for a category
+ * @param category type name
+ * @param input the input string of the category
+ * @param start start position of the input string
+ * @param end end position of the input string
+ */
+ static Type* parseCategory(std::string category,
+ const std::string &input,
+ size_t start,
+ size_t end);
};
std::unique_ptr<Type> convertType(const proto::Type& type,
http://git-wip-us.apache.org/repos/asf/orc/blob/90f138b0/c++/test/TestType.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc
index 3c595d0..8ce9313 100644
--- a/c++/test/TestType.cc
+++ b/c++/test/TestType.cc
@@ -274,4 +274,27 @@ namespace orc {
EXPECT_EQ(13, cutType->getSubtype(1)->getColumnId());
EXPECT_EQ(13, cutType->getSubtype(1)->getMaximumColumnId());
}
+
+ TEST(TestType, buildTypeFromString) {
+ std::string typeStr = "struct<a:int,b:string,c:decimal(10,2),d:varchar(5)>";
+ ORC_UNIQUE_PTR<Type> type = Type::buildTypeFromString(typeStr);
+ EXPECT_EQ(typeStr, type->toString());
+
+ typeStr = "map<boolean,float>";
+ type = Type::buildTypeFromString(typeStr);
+ EXPECT_EQ(typeStr, type->toString());
+
+ typeStr = "uniontype<bigint,binary,timestamp>";
+ type = Type::buildTypeFromString(typeStr);
+ EXPECT_EQ(typeStr, type->toString());
+
+ typeStr = "struct<a:bigint,b:struct<a:binary,b:timestamp>>";
+ type = Type::buildTypeFromString(typeStr);
+ EXPECT_EQ(typeStr, type->toString());
+
+ typeStr =
+ "struct<a:bigint,b:struct<a:binary,b:timestamp>,c:map<double,tinyint>>";
+ type = Type::buildTypeFromString(typeStr);
+ EXPECT_EQ(typeStr, type->toString());
+ }
}