You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/02/25 03:17:36 UTC
svn commit: r747644 - in /hadoop/hive/branches/branch-0.2: ./
serde/src/java/org/apache/hadoop/hive/serde2/lazy/
serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/
serde/src/test/org/apache/hadoop/hive/serde2/lazy/
Author: zshao
Date: Wed Feb 25 02:17:35 2009
New Revision: 747644
URL: http://svn.apache.org/viewvc?rev=747644&view=rev
Log:
HIVE-298. Add LazySimpleSerDe. (zshao)
Added:
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/LazySimpleStructObjectInspector.java
hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/
hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java
hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
Modified:
hadoop/hive/branches/branch-0.2/CHANGES.txt
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
Modified: hadoop/hive/branches/branch-0.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/CHANGES.txt?rev=747644&r1=747643&r2=747644&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.2/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.2/CHANGES.txt Wed Feb 25 02:17:35 2009
@@ -9,6 +9,8 @@
NEW FEATURES
+ HIVE-298. Add LazySimpleSerDe. (zshao)
+
HIVE-269. Add log/exp/pow UDF functions to Hive. (zshao)
HIVE-258. New UDF IF(test, valueTrue, valueFalseOrNull). (zshao)
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+/**
+ * LazyObject for storing a value of Byte.
+ *
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project.
+ *
+ * As with the specification, this implementation relied on code laid out in <a
+ * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
+ * Delight, (Addison Wesley, 2002)</a> as well as <a
+ * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ *
+ */
+public class LazyByte extends LazyPrimitive<Byte> {
+
+ public LazyByte() {
+ super(Byte.class);
+ }
+
+ @Override
+ public Byte getPrimitiveObject() {
+ if (bytes == null) return null;
+ try {
+ // Slower method: convert to String and then convert to Integer
+ // return Byte.valueOf(LazyUtils.convertToString(bytes, start, length));
+ return Byte.valueOf(parseByte(bytes, start, length));
+ } catch (NumberFormatException e) {
+ return null;
+ }
+ }
+
+
+ /**
+ * Parses the string argument as if it was a byte value and returns the
+ * result. Throws NumberFormatException if the string does not represent a
+ * single byte quantity.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of a single byte quantity.
+ * @return byte the value represented by the argument
+ * @throws NumberFormatException
+ * if the argument could not be parsed as a byte quantity.
+ */
+ public static byte parseByte(byte[] bytes, int start, int length) throws NumberFormatException {
+ return parseByte(bytes, start, length, 10);
+ }
+
+ /**
+ * Parses the string argument as if it was a byte value and returns the
+ * result. Throws NumberFormatException if the string does not represent a
+ * single byte quantity. The second argument specifies the radix to use when
+ * parsing the value.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of a single byte quantity.
+ * @param radix
+ * the radix to use when parsing.
+ * @return byte the value represented by the argument
+ * @throws NumberFormatException
+ * if the argument could not be parsed as a byte quantity.
+ */
+ public static byte parseByte(byte[] bytes, int start, int length, int radix)
+ throws NumberFormatException {
+ int intValue = LazyInteger.parseInt(bytes, start, length, radix);
+ byte result = (byte) intValue;
+ if (result == intValue) {
+ return result;
+ }
+ throw new NumberFormatException();
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+import java.nio.charset.CharacterCodingException;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * LazyObject for storing a value of Double.
+ *
+ */
+public class LazyDouble extends LazyPrimitive<Double> {
+
+ public LazyDouble() {
+ super(Double.class);
+ }
+
+ Text text = new Text();
+
+ @Override
+ public Double getPrimitiveObject() {
+ // TODO: replace this by directly parsing the bytes buffer for better performance.
+ if (bytes == null) return null;
+ try {
+ return Double.valueOf(Text.decode(bytes, start, length));
+ } catch (NumberFormatException e) {
+ return null;
+ } catch (CharacterCodingException e) {
+ return null;
+ }
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+/**
+ * LazyObject for storing a value of Integer.
+ *
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project.
+ *
+ * As with the specification, this implementation relied on code laid out in <a
+ * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
+ * Delight, (Addison Wesley, 2002)</a> as well as <a
+ * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ *
+ */
+public class LazyInteger extends LazyPrimitive<Integer> {
+
+ public LazyInteger() {
+ super(Integer.class);
+ }
+
+ @Override
+ public Integer getPrimitiveObject() {
+ try {
+ // Slower method: convert to String and then convert to Integer
+ // return Integer.valueOf(LazyUtils.convertToString(bytes, start, length));
+ return Integer.valueOf(parseInt(bytes, start, length));
+ } catch (NumberFormatException e) {
+ return null;
+ }
+ }
+
+ /**
+ * Parses the string argument as if it was an int value and returns the
+ * result. Throws NumberFormatException if the string does not represent an
+ * int quantity.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of an int quantity.
+ * @return int the value represented by the argument
+ * @exception NumberFormatException
+ * if the argument could not be parsed as an int quantity.
+ */
+ public static int parseInt(byte[] bytes, int start, int length) throws NumberFormatException {
+ return parseInt(bytes, start, length, 10);
+ }
+
+ /**
+ * Parses the string argument as if it was an int value and returns the
+ * result. Throws NumberFormatException if the string does not represent an
+ * int quantity. The second argument specifies the radix to use when parsing
+ * the value.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of an int quantity.
+ * @param radix
+ * the base to use for conversion.
+ * @return int the value represented by the argument
+ * @exception NumberFormatException
+ * if the argument could not be parsed as an int quantity.
+ */
+ public static int parseInt(byte[] bytes, int start, int length, int radix)
+ throws NumberFormatException {
+ if (bytes == null) {
+ throw new NumberFormatException("String is null");
+ }
+ if (radix < Character.MIN_RADIX ||
+ radix > Character.MAX_RADIX) {
+ throw new NumberFormatException("Invalid radix: " + radix);
+ }
+ if (length == 0) {
+ throw new NumberFormatException("Empty string!");
+ }
+ int offset = start;
+ boolean negative = bytes[start] == '-';
+ if (negative || bytes[start] == '+') {
+ offset++;
+ if (length == 1) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ }
+
+ return parse(bytes, start, length, offset, radix, negative);
+ }
+
+ private static int parse(byte[] bytes, int start, int length, int offset, int radix,
+ boolean negative) throws NumberFormatException {
+ int max = Integer.MIN_VALUE / radix;
+ int result = 0, end = start + length;
+ while (offset < end) {
+ int digit = LazyUtils.digit(bytes[offset++], radix);
+ if (digit == -1) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ if (max > result) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ int next = result * radix - digit;
+ if (next > result) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ result = next;
+ }
+ if (!negative) {
+ result = -result;
+ if (result < 0) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ }
+ return result;
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+/**
+ * LazyObject for storing a value of Long.
+ *
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project.
+ *
+ * As with the specification, this implementation relied on code laid out in <a
+ * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
+ * Delight, (Addison Wesley, 2002)</a> as well as <a
+ * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ *
+ */
+public class LazyLong extends LazyPrimitive<Long> {
+
+ public LazyLong() {
+ super(Long.class);
+ }
+
+ @Override
+ public Long getPrimitiveObject() {
+ try {
+ // Slower method: convert to String and then convert to Long
+ // return Long.valueOf(LazyUtils.convertToString(bytes, start, length));
+ return Long.valueOf(parseLong(bytes, start, length));
+ } catch (NumberFormatException e) {
+ return null;
+ }
+ }
+
+ /**
+ * Parses the string argument as if it was a long value and returns the
+ * result. Throws NumberFormatException if the string does not represent a
+ * long quantity.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of a long quantity.
+ * @return long the value represented by the argument
+ * @exception NumberFormatException
+ * if the argument could not be parsed as a long quantity.
+ */
+ public static long parseLong(byte[] bytes, int start, int length) throws NumberFormatException {
+ return parseLong(bytes, start, length, 10);
+ }
+
+ /**
+ * Parses the string argument as if it was an long value and returns the
+ * result. Throws NumberFormatException if the string does not represent an
+ * long quantity. The second argument specifies the radix to use when
+ * parsing the value.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of a long quantity.
+ * @param radix
+ * the base to use for conversion.
+ * @return long the value represented by the argument
+ * @exception NumberFormatException
+ * if the argument could not be parsed as an long quantity.
+ */
+ public static long parseLong(byte[] bytes, int start, int length, int radix)
+ throws NumberFormatException {
+ if (bytes == null) {
+ throw new NumberFormatException("String is null");
+ }
+ if (radix < Character.MIN_RADIX ||
+ radix > Character.MAX_RADIX) {
+ throw new NumberFormatException("Invalid radix: " + radix);
+ }
+ if (length == 0) {
+ throw new NumberFormatException("Empty string!");
+ }
+ int offset = start;
+ boolean negative = bytes[start] == '-';
+ if (negative || bytes[start] == '+') {
+ offset ++;
+ if (length == 1) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ }
+
+ return parse(bytes, start, length, offset, radix, negative);
+ }
+
+ private static long parse(byte[] bytes, int start, int length, int offset, int radix,
+ boolean negative) {
+ long max = Long.MIN_VALUE / radix;
+ long result = 0, end = start + length;
+ while (offset < end) {
+ int digit = LazyUtils.digit(bytes[offset++], radix);
+ if (digit == -1 || max > result) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ long next = result * radix - digit;
+ if (next > result) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ result = next;
+ }
+ if (!negative) {
+ result = -result;
+ if (result < 0) {
+ throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length));
+ }
+ }
+ return result;
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * LazyObject stores an object in a range of bytes in a byte[].
+ *
+ * A LazyObject can represent anything.
+ *
+ */
+public class LazyObject {
+
+ protected byte[] bytes;
+ protected int start;
+ protected int length;
+
+ protected LazyObject() {
+ bytes = null;
+ start = 0;
+ length = 0;
+ }
+
+ protected LazyObject(byte[] bytes, int start, int length) {
+ setAll(bytes, start, length);
+ }
+
+ protected void setAll(byte[] bytes, int start, int length) {
+ this.bytes = bytes;
+ this.start = start;
+ this.length = length;
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+
+/**
+ * LazyPrimitive stores a primitive Object in a LazyObject.
+ */
+public abstract class LazyPrimitive<T> extends LazyObject {
+
+ Class<T> primitiveClass;
+
+ protected LazyPrimitive(Class<T> primitiveClass) {
+ this.primitiveClass = primitiveClass;
+ }
+
+ /**
+ * Returns the actual primitive object represented by this LazyObject.
+ */
+ public abstract T getPrimitiveObject();
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+/**
+ * LazyObject for storing a value of Short.
+ *
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project.
+ *
+ * As with the specification, this implementation relied on code laid out in <a
+ * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
+ * Delight, (Addison Wesley, 2002)</a> as well as <a
+ * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ *
+ */
+public class LazyShort extends LazyPrimitive<Short> {
+
+ public LazyShort() {
+ super(Short.class);
+ }
+
+ @Override
+ public Short getPrimitiveObject() {
+ try {
+ // Slower method: convert to String and then convert to Integer
+ // return Short.valueOf(LazyUtils.convertToString(bytes, start, length));
+ return Short.valueOf(parseShort(bytes, start, length));
+ } catch (NumberFormatException e) {
+ return null;
+ }
+ }
+
+ /**
+ * Parses the string argument as if it was a short value and returns the
+ * result. Throws NumberFormatException if the string does not represent an
+ * short quantity.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of a short quantity.
+ * @return short the value represented by the argument
+ * @exception NumberFormatException
+ * if the argument could not be parsed as a short quantity.
+ */
+ public static short parseShort(byte[] bytes, int start, int length)
+ throws NumberFormatException {
+ return parseShort(bytes, start, length, 10);
+ }
+
+ /**
+ * Parses the string argument as if it was a short value and returns the
+ * result. Throws NumberFormatException if the string does not represent a
+ * single short quantity. The second argument specifies the radix to use
+ * when parsing the value.
+ *
+ * @param bytes
+ * @param start
+ * @param length
+ * a UTF-8 encoded string representation of a short quantity.
+ * @param radix
+ * the radix to use when parsing.
+ * @return short the value represented by the argument
+ * @exception NumberFormatException
+ * if the argument could not be parsed as a short quantity.
+ */
+ public static short parseShort(byte[] bytes, int start, int length, int radix)
+ throws NumberFormatException {
+ int intValue = LazyInteger.parseInt(bytes, start, length, radix);
+ short result = (short) intValue;
+ if (result == intValue) {
+ return result;
+ }
+ throw new NumberFormatException();
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * LazySimpleSerDe can be used to read the same data format as
+ * MetadataTypedColumnsetSerDe and TCTLSeparatedProtocol.
+ *
+ * However, LazySimpleSerDe creates Objects in a lazy way, to
+ * provide better performance.
+ *
+ * Also LazySimpleSerDe outputs typed columns instead of treating
+ * all columns as String like MetadataTypedColumnsetSerDe.
+ */
+public class LazySimpleSerDe implements SerDe {
+
+ public static final Log LOG = LogFactory.getLog(LazySimpleSerDe.class.getName());
+
+ final public static byte DefaultSeparator = 1;
+ private byte separator;
+
+
+ private List<String> columnNames;
+ private List<String> columnTypes;
+ private ObjectInspector cachedObjectInspector;
+
+ private String nullString;
+ private boolean lastColumnTakesRest;
+
+ public String toString() {
+ return getClass().toString() + "[" + separator + ":"
+ + columnNames + ":" + columnTypes + "]";
+ }
+
+ public LazySimpleSerDe() throws SerDeException {
+ separator = DefaultSeparator;
+ }
+
+ private byte getByte(String altValue, byte defaultVal) {
+ if (altValue != null && altValue.length() > 0) {
+ try {
+ return Byte.valueOf(altValue).byteValue();
+ } catch(NumberFormatException e) {
+ return (byte)altValue.charAt(0);
+ }
+ }
+ return defaultVal;
+ }
+
+ /**
+ * Initialize the SerDe given the parameters.
+ * serialization.format: separator char or byte code (only supports byte-value up to 127)
+ * columns: ,-separated column naems
+ * columns.types: :-separated column types
+ */
+ public void initialize(Configuration job, Properties tbl) throws SerDeException {
+ // Read the separator
+ String alt_sep = tbl.getProperty(Constants.SERIALIZATION_FORMAT);
+ separator = getByte(alt_sep, DefaultSeparator);
+
+ // Read the configuration parameters
+ String columnNameProperty = tbl.getProperty("columns");
+ // NOTE: if "columns.types" is missing, all columns will be of String type
+ String columnTypeProperty = tbl.getProperty("columns.types");
+
+ nullString = tbl.getProperty(Constants.SERIALIZATION_NULL_FORMAT);
+ if (nullString == null) {
+ nullString = "\\N";
+ }
+
+ String lastColumnTakesRestString = tbl.getProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
+ lastColumnTakesRest = (lastColumnTakesRestString != null && lastColumnTakesRestString.equalsIgnoreCase("true"));
+
+ // Parse the configuration parameters
+ if (columnNameProperty != null) {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ } else {
+ columnNames = new ArrayList<String>();
+ }
+ if (columnTypeProperty != null) {
+ columnTypes = Arrays.asList(columnTypeProperty.split(":"));
+ } else {
+ // Default type: all string
+ columnTypes = new ArrayList<String>();
+ for (int i = 0; i < columnNames.size(); i++) {
+ columnTypes.add(Constants.STRING_TYPE_NAME);
+ }
+ }
+ if (columnNames.size() != columnTypes.size()) {
+ throw new SerDeException(getClass().toString()
+ + ": columns has " + columnNames.size()
+ + " elements while columns.types has " + columnTypes.size() + " elements!");
+ }
+
+ // Create the LazyObject for storing the rows
+ LazyObject[] lazyPrimitives = new LazyObject[columnNames.size()];
+ // Create the ObjectInspectors for the fields
+ ArrayList<ObjectInspector> columnObjectInspectors
+ = new ArrayList<ObjectInspector>(columnNames.size());
+ for (int i=0; i<columnTypes.size(); i++) {
+ Class<?> primitiveClass = ObjectInspectorUtils.typeNameToClass.get( columnTypes.get(i) );
+ if (primitiveClass == null) {
+ throw new SerDeException(getClass().toString()
+ + ": type " + columnTypes.get(i) + " not supported!");
+ }
+ columnObjectInspectors.add(ObjectInspectorFactory.
+ getStandardPrimitiveObjectInspector(primitiveClass));
+ lazyPrimitives[i] = LazyUtils.createLazyPrimitiveClass(primitiveClass);
+ }
+
+ cachedObjectInspector =
+ ObjectInspectorFactory.getLazySimpleStructObjectInspector(columnNames,
+ columnObjectInspectors);
+
+ cachedLazyStruct = new LazyStruct(lazyPrimitives, separator,
+ new Text(nullString), lastColumnTakesRest);
+
+ LOG.debug("LazySimpleSerDe initialized with: columnNames=" + columnNames + " columnTypes="
+ + columnTypes + " separator=" + separator + " nullstring=" + nullString
+ + " lastColumnTakesRest=" + lastColumnTakesRest);
+ }
+
+ // The object for storing row data
+ LazyStruct cachedLazyStruct;
+
+ /**
+ * Deserialize a row from the Writable to a LazyObject.
+ */
+ public Object deserialize(Writable field) throws SerDeException {
+ if (field instanceof BytesWritable) {
+ BytesWritable b = (BytesWritable)field;
+ // For backward-compatibility with hadoop 0.17
+ cachedLazyStruct.setAll(b.get(), 0, b.getSize());
+ } else if (field instanceof Text) {
+ Text t = (Text)field;
+ cachedLazyStruct.setAll(t.getBytes(), 0, t.getLength());
+ } else {
+ throw new SerDeException(getClass().toString()
+ + ": expects either BytesWritable or Text object!");
+ }
+ return cachedLazyStruct;
+ }
+
+
+ /**
+ * Returns the ObjectInspector for the row.
+ */
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ /**
+ * Returns the Writable Class after serialization.
+ */
+ public Class<? extends Writable> getSerializedClass() {
+ return Text.class;
+ }
+
+ Text serializeCache = new Text();
+ /**
+ * Serialize a row of data.
+ */
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+
+ // TODO: We can switch the serialization to be directly based on
+ if (objInspector.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString()
+ + " can only serialize struct types, but we got: " + objInspector.getTypeName());
+ }
+ StructObjectInspector soi = (StructObjectInspector) objInspector;
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+
+ StringBuilder sb = new StringBuilder();
+ for(int i=0; i<fields.size(); i++) {
+ if (i>0) {
+ sb.append((char)separator);
+ }
+ StructField field = fields.get(i);
+ Object fieldData = soi.getStructFieldData(obj, field);
+ if (field.getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
+ // For primitive object, serialize to plain string
+ sb.append(fieldData == null ? nullString : fieldData.toString());
+ } else {
+ // For complex object, serialize to JSON format
+ sb.append(SerDeUtils.getJSONString(fieldData, field.getFieldObjectInspector()));
+ }
+ }
+ serializeCache.set(sb.toString());
+ return serializeCache;
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+import java.nio.charset.CharacterCodingException;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * LazyObject for storing a value of String.
+ */
+public class LazyString extends LazyPrimitive<String> {
+
+ public LazyString() {
+ super(String.class);
+ }
+
+ @Override
+ public String getPrimitiveObject() {
+ // In the future, we should allow returning a Text Object to save the UTF-8
+ // decoding/encoding, and the creation of new String object.
+ if (bytes == null) return null;
+ try {
+ return Text.decode(bytes, start, length);
+ } catch (CharacterCodingException e) {
+ return null;
+ }
+ }
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+import java.nio.charset.CharacterCodingException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Text;
+
+
+/**
+ * LazyObject for storing a struct.
+ * The field of a struct can be primitive or non-primitive.
+ *
+ */
+public class LazyStruct extends LazyObject {
+
+
+ private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
+
+ LazyObject[] fields;
+ boolean[] fieldIsPrimitive;
+
+ byte separator;
+ Text nullSequence;
+ boolean lastColumnTakesAll;
+
+ boolean parsed;
+
+ /**
+ * Create a new LazyStruct Object.
+ * @param fields The field LazyObjects
+ * @param separator The separator for delimiting the fields in the byte[]
+ * @param nullSequence The sequence for null value
+ * @param lastColumnTakesAll whether the additional fields should be all put into the last column
+ * in case the data contains more columns than the schema.
+ */
+ public LazyStruct(LazyObject[] fields, byte separator,
+ Text nullSequence, boolean lastColumnTakesAll) {
+ this.fields = fields;
+ this.separator = separator;
+ this.nullSequence = nullSequence;
+ this.lastColumnTakesAll = lastColumnTakesAll;
+
+ parsed = false;
+ fieldIsPrimitive = new boolean[fields.length];
+ for(int i=0; i<fields.length; i++) {
+ fieldIsPrimitive[i] = (fields[i] instanceof LazyPrimitive);
+ }
+ }
+
+ /**
+ * Set the row data for this LazyStruct.
+ */
+ protected void setAll(byte[] bytes, int start, int length) {
+ super.setAll(bytes, start, length);
+ parsed = false;
+ }
+
+
+ boolean missingFieldWarned = false;
+ boolean extraFieldWarned = false;
+ /**
+ * Parse the byte[] and fill each field.
+ */
+ private void parse() {
+
+ int structByteEnd = start + length;
+ int fieldId = 0;
+ int fieldByteBegin = start;
+ int fieldByteEnd = start;
+
+ // Go through all bytes in the byte[]
+ while (fieldByteEnd <= structByteEnd) {
+ if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
+ // end of field reached
+ if (lastColumnTakesAll && fieldId == fields.length - 1) {
+ fieldByteEnd = structByteEnd;
+ }
+ // Test the length first so in most cases we avoid doing a byte[] comparison.
+ int fieldLength = fieldByteEnd - fieldByteBegin;
+ if (fieldLength == nullSequence.getLength()
+ && LazyUtils.compare(bytes, fieldByteBegin, fieldLength,
+ nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
+ fields[fieldId].setAll(null, 0, 0);
+ } else {
+ fields[fieldId].setAll(bytes, fieldByteBegin,
+ fieldByteEnd - fieldByteBegin);
+ }
+ fieldId ++;
+ if (fieldId == fields.length || fieldByteEnd == structByteEnd) {
+ // all fields have been parsed, or all bytes have been parsed
+ break;
+ }
+ fieldByteBegin = fieldByteEnd + 1;
+ }
+ fieldByteEnd++;
+ }
+
+ // Extra bytes at the end?
+ if (!extraFieldWarned && fieldByteEnd < structByteEnd) {
+ extraFieldWarned = true;
+ LOG.warn("Extra bytes detected at the end of the row! Ignoring similar problems.");
+ }
+
+ // Missing fields?
+ if (!missingFieldWarned && fieldId < fields.length) {
+ missingFieldWarned = true;
+ LOG.warn("Missing fields! Expected " + fields.length + " fields but only got "
+ + fieldId + "! Ignoring similar problems.");
+ }
+
+ // Fill all missing fields with nulls.
+ for(; fieldId < fields.length; fieldId ++) {
+ fields[fieldId].setAll(null, 0, 0);
+ }
+
+ parsed = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ *
+ * If the field is a primitive field, return the actual object.
+ * Otherwise return the LazyObject. This is because PrimitiveObjectInspector
+ * does not have control over the object used by the user - the user simply
+ * directly use the Object instead of going through
+ * Object PrimitiveObjectInspector.get(Object).
+ *
+ * @param i the field ID
+ * @return the field as a LazyObject
+ */
+ public Object getField(int i) {
+ if (!parsed) {
+ parse();
+ }
+ if (!fieldIsPrimitive[i]) {
+ return fields[i];
+ } else {
+ return ((LazyPrimitive)fields[i]).getPrimitiveObject();
+ }
+ }
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+import java.nio.charset.CharacterCodingException;
+
+import org.apache.hadoop.io.Text;
+
+public class LazyUtils {
+
+ /**
+ * Create a lazy primitive class given the java class.
+ */
+ public static LazyPrimitive<?> createLazyPrimitiveClass(Class<?> c) {
+ if (String.class.equals(c)) {
+ return new LazyString();
+ } else if (Integer.class.equals(c)) {
+ return new LazyInteger();
+ } else if (Double.class.equals(c)) {
+ return new LazyDouble();
+ } else if (Byte.class.equals(c)) {
+ return new LazyByte();
+ } else if (Short.class.equals(c)) {
+ return new LazyShort();
+ } else if (Long.class.equals(c)) {
+ return new LazyLong();
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Returns the digit represented by character b.
+ * @param b The ascii code of the character
+ * @param radix The radix
+ * @return -1 if it's invalid
+ */
+ public static int digit(int b, int radix) {
+ int r = -1;
+ if (b >= '0' && b<='9') {
+ r = b - '0';
+ } else if (b >= 'A' && b<='Z') {
+ r = b - 'A' + 10;
+ } else if (b >= 'a' && b <= 'z') {
+ r = b - 'a' + 10;
+ }
+ if (r >= radix) r = -1;
+ return r;
+ }
+
+ /**
+ * Returns -1 if the first byte sequence is lexicographically less than the second;
+ * returns +1 if the second byte sequence is lexicographically less than the first;
+ * otherwise return 0.
+ */
+ public static int compare(byte[] b1, int start1, int length1, byte[] b2, int start2, int length2) {
+
+ int min = Math.min(length1, length2);
+
+ for (int i = 0; i < min; i++) {
+ if (b1[start1 + i] == b2[start2 + i]) {
+ continue;
+ }
+ if (b1[start1 + i] < b2[start2 + i]) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+
+ if (length1 < length2) return -1;
+ if (length1 > length2) return 1;
+ return 0;
+ }
+
+ /**
+ * Convert a UTF-8 byte array to String.
+ */
+ public static String convertToString(byte[] bytes, int start, int length) {
+ try {
+ return Text.decode(bytes, start, length);
+ } catch (CharacterCodingException e) {
+ return null;
+ }
+ }
+
+
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/LazySimpleStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/LazySimpleStructObjectInspector.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/LazySimpleStructObjectInspector.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/LazySimpleStructObjectInspector.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
+
+/**
+ * LazySimpleStructObjectInspector works on struct data that is stored in LazyStruct.
+ * It only supports primitive types as its fields for simplicity and efficiency.
+ *
+ * The names of the struct fields and the internal structure of the struct fields
+ * are specified in the ctor of the LazySimpleStructObjectInspector.
+ *
+ * Always use the ObjectInspectorFactory to create new ObjectInspector objects, instead
+ * of directly creating an instance of this class.
+ */
+public class LazySimpleStructObjectInspector implements StructObjectInspector {
+
+ public static final Log LOG = LogFactory.getLog(LazySimpleStructObjectInspector.class.getName());
+
+ protected static class MyField implements StructField {
+ protected int fieldID;
+ protected String fieldName;
+ protected ObjectInspector fieldObjectInspector;
+
+ public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) {
+ this.fieldID = fieldID;
+ this.fieldName = fieldName.toLowerCase();
+ this.fieldObjectInspector = fieldObjectInspector;
+ }
+
+ public int getFieldID() {
+ return fieldID;
+ }
+ public String getFieldName() {
+ return fieldName;
+ }
+ public ObjectInspector getFieldObjectInspector() {
+ return fieldObjectInspector;
+ }
+
+ public String toString() {
+ return "" + fieldID + ":" + fieldName;
+ }
+ }
+
+ protected List<MyField> fields;
+
+ public String getTypeName() {
+ return ObjectInspectorUtils.getStandardStructTypeName(this);
+ }
+
+ /** Call ObjectInspectorFactory.getLazySimpleStructObjectInspector instead.
+ */
+ protected LazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ init(structFieldNames, structFieldObjectInspectors);
+ }
+ protected void init(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ assert(structFieldNames.size() == structFieldObjectInspectors.size());
+
+ fields = new ArrayList<MyField>(structFieldNames.size());
+ for(int i=0; i<structFieldNames.size(); i++) {
+ fields.add(new MyField(i, structFieldNames.get(i), structFieldObjectInspectors.get(i)));
+ }
+ }
+
+ protected LazySimpleStructObjectInspector(List<StructField> fields) {
+ init(fields);
+ }
+ protected void init(List<StructField> fields) {
+ this.fields = new ArrayList<MyField>(fields.size());
+ for(int i=0; i<fields.size(); i++) {
+ this.fields.add(new MyField(i, fields.get(i).getFieldName(), fields.get(i).getFieldObjectInspector()));
+ }
+ }
+
+
+ public final Category getCategory() {
+ return Category.STRUCT;
+ }
+
+ // Without Data
+ public StructField getStructFieldRef(String fieldName) {
+ return ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields);
+ }
+ public List<? extends StructField> getAllStructFieldRefs() {
+ return fields;
+ }
+
+ // With Data
+ @SuppressWarnings("unchecked")
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyStruct struct = (LazyStruct)data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert(fieldID >= 0 && fieldID < fields.size());
+
+ return struct.getField(fieldID);
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ List<Object> fieldsData = new ArrayList<Object>(fields.size());
+ LazyStruct struct = (LazyStruct)data;
+ for (int i=0; i<fields.size(); i++) {
+ fieldsData.add(struct.getField(i));
+ }
+ return fieldsData;
+ }
+
+}
Modified: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java?rev=747644&r1=747643&r2=747644&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (original)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java Wed Feb 25 02:17:35 2009
@@ -180,6 +180,20 @@
}
return result;
}
+
+ static HashMap<ArrayList<List<?>>, LazySimpleStructObjectInspector> cachedLazySimpleStructObjectInspector =
+ new HashMap<ArrayList<List<?>>, LazySimpleStructObjectInspector>();
+ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<List<?>> signature = new ArrayList<List<?>>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazySimpleStructObjectInspector.put(signature, result);
+ }
+ return result;
+ }
static HashMap<List<StructObjectInspector>, UnionStructObjectInspector> cachedUnionStructObjectInspector =
new HashMap<List<StructObjectInspector>, UnionStructObjectInspector>();
Modified: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java?rev=747644&r1=747643&r2=747644&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java (original)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java Wed Feb 25 02:17:35 2009
@@ -25,6 +25,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde.Constants;
/**
* ObjectInspectorFactory is the primary way to create new ObjectInspector instances.
@@ -75,6 +76,26 @@
classToTypeName.put(String.class, org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME);
classToTypeName.put(java.sql.Date.class, org.apache.hadoop.hive.serde.Constants.DATE_TYPE_NAME);
}
+
+ /**
+ * The mapping from type name in DDL to the Java class.
+ */
+ public static final Map<String, Class<?>> typeNameToClass = new HashMap<String, Class<?>>();
+ static {
+ typeNameToClass.put(Constants.BOOLEAN_TYPE_NAME, Boolean.class);
+ typeNameToClass.put(Constants.TINYINT_TYPE_NAME, Byte.class);
+ typeNameToClass.put(Constants.SMALLINT_TYPE_NAME, Short.class);
+ typeNameToClass.put(Constants.INT_TYPE_NAME, Integer.class);
+ typeNameToClass.put(Constants.BIGINT_TYPE_NAME, Long.class);
+ typeNameToClass.put(Constants.FLOAT_TYPE_NAME, Float.class);
+ typeNameToClass.put(Constants.DOUBLE_TYPE_NAME, Double.class);
+ typeNameToClass.put(Constants.STRING_TYPE_NAME, String.class);
+ typeNameToClass.put(Constants.DATE_TYPE_NAME, java.sql.Date.class);
+ // These types are not supported yet.
+ // TypeNameToClass.put(Constants.DATETIME_TYPE_NAME);
+ // TypeNameToClass.put(Constants.TIMESTAMP_TYPE_NAME);
+ }
+
/**
* Get the short name for the types
*/
Added: hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,316 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+
+import junit.framework.TestCase;
+
+public class TestLazyPrimitive extends TestCase {
+
+ /**
+ * Test the LazyByte class.
+ */
+ public void testLazyByte() throws Throwable {
+ try {
+ LazyByte b = new LazyByte();
+ b.setAll(new byte[]{'0'}, 0, 1);
+ assertEquals(Byte.valueOf((byte)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '0'}, 0, 2);
+ assertEquals(Byte.valueOf((byte)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '0'}, 0, 2);
+ assertEquals(Byte.valueOf((byte)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 1);
+ assertEquals(Byte.valueOf((byte)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '-', '1'}, 1, 2);
+ assertEquals(Byte.valueOf((byte)-1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '+', '1'}, 1, 2);
+ assertEquals(Byte.valueOf((byte)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '8'}, 0, 4);
+ assertEquals(Byte.valueOf((byte)-128), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '7'}, 0, 4);
+ assertEquals(Byte.valueOf((byte)127), b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 2);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '8'}, 0, 4);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '9'}, 0, 4);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ /**
+ * Test the LazyShort class.
+ */
+ public void testLazyShort() throws Throwable {
+ try {
+ LazyShort b = new LazyShort();
+ b.setAll(new byte[]{'0'}, 0, 1);
+ assertEquals(Short.valueOf((short)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '0'}, 0, 2);
+ assertEquals(Short.valueOf((short)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '0'}, 0, 2);
+ assertEquals(Short.valueOf((short)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 1);
+ assertEquals(Short.valueOf((short)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '-', '1'}, 1, 2);
+ assertEquals(Short.valueOf((short)-1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '+', '1'}, 1, 2);
+ assertEquals(Short.valueOf((short)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '8'}, 0, 4);
+ assertEquals(Short.valueOf((short)-128), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '7'}, 0, 4);
+ assertEquals(Short.valueOf((short)127), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '3', '2', '7', '6', '8'}, 0, 6);
+ assertEquals(Short.valueOf((short)-32768), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '3', '2', '7', '6', '7'}, 0, 6);
+ assertEquals(Short.valueOf((short)32767), b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 2);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '3', '2', '7', '6', '9'}, 0, 6);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '3', '2', '7', '6', '8'}, 0, 6);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+
+ /**
+ * Test the LazyInteger class.
+ */
+ public void testLazyInteger() throws Throwable {
+ try {
+ LazyInteger b = new LazyInteger();
+ b.setAll(new byte[]{'0'}, 0, 1);
+ assertEquals(Integer.valueOf((int)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '0'}, 0, 2);
+ assertEquals(Integer.valueOf((int)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '0'}, 0, 2);
+ assertEquals(Integer.valueOf((int)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 1);
+ assertEquals(Integer.valueOf((int)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '-', '1'}, 1, 2);
+ assertEquals(Integer.valueOf((int)-1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '+', '1'}, 1, 2);
+ assertEquals(Integer.valueOf((int)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '8'}, 0, 4);
+ assertEquals(Integer.valueOf((int)-128), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '7'}, 0, 4);
+ assertEquals(Integer.valueOf((int)127), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '3', '2', '7', '6', '8'}, 0, 6);
+ assertEquals(Integer.valueOf((int)-32768), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '3', '2', '7', '6', '7'}, 0, 6);
+ assertEquals(Integer.valueOf((int)32767), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, 0, 11);
+ assertEquals(Integer.valueOf((int)-2147483648), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '2', '1', '4', '7', '4', '8', '3', '6', '4', '7'}, 0, 11);
+ assertEquals(Integer.valueOf((int)2147483647), b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 2);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '9'}, 0, 11);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, 0, 11);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+
+ /**
+ * Test the LazyLong class.
+ */
+ public void testLazyLong() throws Throwable {
+ try {
+ LazyLong b = new LazyLong();
+ b.setAll(new byte[]{'0'}, 0, 1);
+ assertEquals(Long.valueOf((long)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '0'}, 0, 2);
+ assertEquals(Long.valueOf((long)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '0'}, 0, 2);
+ assertEquals(Long.valueOf((long)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 1);
+ assertEquals(Long.valueOf((long)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '-', '1'}, 1, 2);
+ assertEquals(Long.valueOf((long)-1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '+', '1'}, 1, 2);
+ assertEquals(Long.valueOf((long)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '8'}, 0, 4);
+ assertEquals(Long.valueOf((long)-128), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '7'}, 0, 4);
+ assertEquals(Long.valueOf((long)127), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '3', '2', '7', '6', '8'}, 0, 6);
+ assertEquals(Long.valueOf((long)-32768), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '3', '2', '7', '6', '7'}, 0, 6);
+ assertEquals(Long.valueOf((long)32767), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, 0, 11);
+ assertEquals(Long.valueOf((long)-2147483648), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '2', '1', '4', '7', '4', '8', '3', '6', '4', '7'}, 0, 11);
+ assertEquals(Long.valueOf((long)2147483647), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5',
+ '4', '7', '7', '5', '8', '0', '8'}, 0, 20);
+ assertEquals(Long.valueOf((long)-9223372036854775808L), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5',
+ '4', '7', '7', '5', '8', '0', '7'}, 0, 20);
+ assertEquals(Long.valueOf((long)9223372036854775807L), b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 2);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5',
+ '4', '7', '7', '5', '8', '0', '9'}, 0, 20);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5',
+ '4', '7', '7', '5', '8', '0', '8'}, 0, 20);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ /**
+ * Test the LazyDouble class.
+ */
+ public void testLazyDouble() throws Throwable {
+ try {
+ LazyDouble b = new LazyDouble();
+ b.setAll(new byte[]{'0'}, 0, 1);
+ assertEquals(Double.valueOf((double)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '0'}, 0, 2);
+ assertEquals(Double.valueOf((double)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '0'}, 0, 2);
+ assertEquals(Double.valueOf((double)-0.0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 1);
+ assertEquals(Double.valueOf((double)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '-', '1'}, 1, 2);
+ assertEquals(Double.valueOf((double)-1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'a', '+', '1'}, 1, 2);
+ assertEquals(Double.valueOf((double)1), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '8'}, 0, 4);
+ assertEquals(Double.valueOf((double)-128), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '7'}, 0, 4);
+ assertEquals(Double.valueOf((double)127), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '3', '2', '7', '6', '8'}, 0, 6);
+ assertEquals(Double.valueOf((double)-32768), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '3', '2', '7', '6', '7'}, 0, 6);
+ assertEquals(Double.valueOf((double)32767), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, 0, 11);
+ assertEquals(Double.valueOf((double)-2147483648), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '2', '1', '4', '7', '4', '8', '3', '6', '4', '7'}, 0, 11);
+ assertEquals(Double.valueOf((double)2147483647), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5',
+ '4', '7', '7', '5', '8', '0', '8'}, 0, 20);
+ assertEquals(Double.valueOf((double)-9223372036854775808L), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5',
+ '4', '7', '7', '5', '8', '0', '7'}, 0, 20);
+ assertEquals(Double.valueOf((long)9223372036854775807L), b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'-', '3', '.', '7', '6', '8'}, 0, 6);
+ assertEquals(Double.valueOf((double)-3.768), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '3', '.', '7', '6', '7'}, 0, 6);
+ assertEquals(Double.valueOf((double)3.767), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '2', '.', '4', '7', '4', '8', '3', '6', 'e', '8'}, 0, 11);
+ assertEquals(Double.valueOf((double)-2.474836e8), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '2', '.', '4', '7', '4', '8', '3', 'E', '-', '7'}, 0, 11);
+ assertEquals(Double.valueOf((double)2.47483E-7), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '.', '4', '7', '4', '8', '3', '6', 'e', '8'}, 0, 10);
+ assertEquals(Double.valueOf((double)-.474836e8), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '.', '4', '7', '4', '8', '3', 'E', '-', '7'}, 0, 10);
+ assertEquals(Double.valueOf((double).47483E-7), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '.'}, 0, 11);
+ assertEquals(Double.valueOf((double)-214748364.), b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '2', '1', '4', '7', '4', '8', '3', '6', '4', '.'}, 0, 11);
+ assertEquals(Double.valueOf((double)+214748364.), b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'.', '0'}, 0, 2);
+ assertEquals(Double.valueOf((double).0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'0', '.'}, 0, 2);
+ assertEquals(Double.valueOf((double)0.), b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'a', '1', 'b'}, 1, 2);
+ assertNull(b.getPrimitiveObject());
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'.', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', '2', '3'}, 0, 1);
+ assertNull(b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'-', '1', 'e', '3', '3', '3', '3', '3', '3'}, 0, 9);
+ assertEquals(Double.NEGATIVE_INFINITY, b.getPrimitiveObject());
+ b.setAll(new byte[]{'+', '1', 'e', '3', '3', '3', '3', '3', '3'}, 0, 9);
+ assertEquals(Double.POSITIVE_INFINITY, b.getPrimitiveObject());
+
+ b.setAll(new byte[]{'+', '1', 'e', '-', '3', '3', '3', '3', '3'}, 0, 8);
+ assertEquals(Double.valueOf((double)0), b.getPrimitiveObject());
+ b.setAll(new byte[]{'-', '1', 'e', '-', '3', '3', '3', '3', '3'}, 0, 8);
+ assertEquals(Double.valueOf((double)-0.0), b.getPrimitiveObject());
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ /**
+ * Test the LazyString class.
+ */
+ public void testLazyString() throws Throwable {
+ try {
+ LazyString b = new LazyString();
+ b.setAll(new byte[]{'0'}, 0, 1);
+ assertEquals("0", b.getPrimitiveObject());
+ b.setAll(new byte[]{'0', '1', '2'}, 1, 1);
+ assertEquals("1", b.getPrimitiveObject());
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+}
Added: hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java?rev=747644&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java (added)
+++ hadoop/hive/branches/branch-0.2/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java Wed Feb 25 02:17:35 2009
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.Text;
+
+import junit.framework.TestCase;
+
+public class TestLazySimpleSerDe extends TestCase {
+
+ /**
+ * Test the LazySimpleSerDe class.
+ */
+ public void testLazySimpleSerDe() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\tNULL");
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\tNULL";
+ Object[] expectedFieldsData = { (Object)Byte.valueOf((byte)123),
+ Short.valueOf((short)456), Integer.valueOf(789),
+ Long.valueOf(1000), Double.valueOf(5.3), "hive and hadoop", null,
+ null
+ };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ private void deserializeAndSerialize(LazySimpleSerDe serDe, Text t, String s,
+ Object[] expectedFieldsData) throws SerDeException {
+ // Get the row structure
+ StructObjectInspector oi = (StructObjectInspector)serDe.getObjectInspector();
+ List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
+ assertEquals(8, fieldRefs.size());
+
+ // Deserialize
+ Object row = serDe.deserialize(t);
+ for (int i = 0; i < fieldRefs.size(); i++) {
+ Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
+ assertEquals("Field " + i, fieldData, expectedFieldsData[i]);
+ }
+ // Serialize
+ assertEquals(Text.class, serDe.getSerializedClass());
+ Text serializedText = (Text)serDe.serialize(row, oi);
+ assertEquals("Serialized data", s, serializedText.toString());
+ }
+
+ private Properties createProperties() {
+ Properties tbl = new Properties();
+
+ // Set the configuration parameters
+ tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9");
+ tbl.setProperty("columns",
+ "abyte,ashort,aint,along,adouble,astring,anullint,anullstring");
+ tbl.setProperty("columns.types",
+ "tinyint:smallint:int:bigint:double:string:int:string");
+ tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
+ return tbl;
+ }
+
+ /**
+ * Test the LazySimpleSerDe class with LastColumnTakesRest option.
+ */
+ public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true");
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t";
+ Object[] expectedFieldsData = { (Object)Byte.valueOf((byte)123),
+ Short.valueOf((short)456), Integer.valueOf(789),
+ Long.valueOf(1000), Double.valueOf(5.3), "hive and hadoop", null,
+ "a\tb\t"
+ };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+
+ /**
+ * Test the LazySimpleSerDe class with extra columns.
+ */
+ public void testLazySimpleSerDeExtraColumns() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta";
+ Object[] expectedFieldsData = { (Object)Byte.valueOf((byte)123),
+ Short.valueOf((short)456), Integer.valueOf(789),
+ Long.valueOf(1000), Double.valueOf(5.3), "hive and hadoop", null,
+ "a"
+ };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+
+ /**
+ * Test the LazySimpleSerDe class with missing columns.
+ */
+ public void testLazySimpleSerDeMissingColumns() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\t");
+ String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
+ Object[] expectedFieldsData = { (Object)Byte.valueOf((byte)123),
+ Short.valueOf((short)456), Integer.valueOf(789),
+ Long.valueOf(1000), Double.valueOf(5.3), "", null,
+ null
+ };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+
+}