You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/05/08 10:48:57 UTC
svn commit: r1480183 - in /hive/branches/HIVE-4115: ./
hcatalog/build-support/ant/ hcatalog/src/test/
metastore/src/java/org/apache/hadoop/hive/metastore/
ql/src/java/org/apache/hadoop/hive/ql/io/orc/
ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src...
Author: amareshwari
Date: Wed May 8 08:48:57 2013
New Revision: 1480183
URL: http://svn.apache.org/r1480183
Log:
Merging r1480158 through r1480180 into HIVE-4115
Added:
hive/branches/HIVE-4115/ql/src/test/queries/clientpositive/ctas_colname.q
- copied unchanged from r1480180, hive/trunk/ql/src/test/queries/clientpositive/ctas_colname.q
hive/branches/HIVE-4115/ql/src/test/results/clientpositive/ctas_colname.q.out
- copied unchanged from r1480180, hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out
Removed:
hive/branches/HIVE-4115/hcatalog/src/test/.gitignore
Modified:
hive/branches/HIVE-4115/ (props changed)
hive/branches/HIVE-4115/build-common.xml
hive/branches/HIVE-4115/build.properties
hive/branches/HIVE-4115/build.xml
hive/branches/HIVE-4115/hcatalog/build-support/ant/checkstyle.xml
hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestMemoryManager.java
hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
hive/branches/HIVE-4115/ql/src/test/resources/orc-file-dump.out
Propchange: hive/branches/HIVE-4115/
------------------------------------------------------------------------------
Merged /hive/trunk:r1480158-1480180
Modified: hive/branches/HIVE-4115/build-common.xml
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/build-common.xml?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/build-common.xml (original)
+++ hive/branches/HIVE-4115/build-common.xml Wed May 8 08:48:57 2013
@@ -70,6 +70,10 @@
<property name="hadoop.opts.23" value="-D mapreduce.framework.name=local" />
<property name="hadoop.opts.20" value="" />
+ <condition property="test.halt.on.failure" value="no" else="yes">
+ <equals arg1="${test.continue.on.failure}" arg2="true"/>
+ </condition>
+
<target name="set-test-classpath">
<typedef name="distinctelementsclasspath" classname="org.apache.hadoop.hive.ant.DistinctElementsClassPath"
classpath="${build.dir.hive}/anttasks/hive-anttasks-${version}.jar:${build.ivy.lib.dir}/default/commons-collections-${commons-collections.version}.jar:${build.ivy.lib.dir}/default/commons-lang-${commons-lang.version}.jar"/>
@@ -429,7 +433,8 @@
<echo message="Test Classpath: ${hadoop.testcp}"/>
</then>
</if>
- <junit showoutput="${test.output}" printsummary="yes" haltonfailure="no"
+
+ <junit showoutput="${test.output}" printsummary="yes" haltonfailure="${test.halt.on.failure}"
fork="yes" maxmemory="${test.junit.maxmemory}" dir="${basedir}" timeout="${test.junit.timeout}"
errorProperty="tests.failed" failureProperty="tests.failed" filtertrace="off">
<jvmarg value="-XX:+HeapDumpOnOutOfMemoryError"/>
Modified: hive/branches/HIVE-4115/build.properties
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/build.properties?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/build.properties (original)
+++ hive/branches/HIVE-4115/build.properties Wed May 8 08:48:57 2013
@@ -95,7 +95,7 @@ test.junit.timeout=43200000
# ant test -Dtest.junit.exclude="**/Test*CliDriver.class,**/TestPartitions.class"
test.junit.exclude=
-test.continue.on.failure=false
+test.continue.on.failure=true
test.submodule.exclude=
test.junit.maxmemory=512m
@@ -129,10 +129,6 @@ mvn.pom.dir=${build.dir.hive}/maven/poms
mvn.license.dir=${build.dir.hive}/maven/licenses
mvn.deploy.id=apache.snapshots.https
mvn.deploy.url=https://repository.apache.org/content/repositories/snapshots
-#
-# unit test Properties
-#
-failonerror=false
#
# Data nucleus repository - needed for jdo2-api-2.3-ec.jar download
Modified: hive/branches/HIVE-4115/build.xml
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/build.xml?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/build.xml (original)
+++ hive/branches/HIVE-4115/build.xml Wed May 8 08:48:57 2013
@@ -70,6 +70,10 @@
<equals arg1="${mvn.publish.repo}" arg2="staging"/>
</condition>
+ <condition property="test.halt.on.failure" value="no" else="yes">
+ <equals arg1="${test.continue.on.failure}" arg2="true"/>
+ </condition>
+
<taskdef resource="net/sf/antcontrib/antcontrib.properties">
<classpath>
<pathelement location="${hive.root}/testlibs/ant-contrib-1.0b3.jar"/>
@@ -293,7 +297,7 @@
<condition property="target.module" value="${module}" else="${iterate.hive.test}">
<isset property="module"/>
</condition>
- <for list="${target.module}" param="module">
+ <for keepgoing="${test.continue.on.failure}" list="${target.module}" param="module">
<sequential>
<ant antfile="@{module}/build.xml" target="test" inheritAll="false" inheritRefs="true">
<property name="build.dir.hive" location="${build.dir.hive}"/>
@@ -305,7 +309,7 @@
<target name="test-shims">
<echo message="Project: ${ant.project.name}"/>
- <subant target="test" failonerror="${failonerror}">
+ <subant target="test" failonerror="${test.halt.on.failure}">
<property name="hadoop.version" value="${hadoop.security.version}"/>
<property name="hadoop.security.version" value="${hadoop.security.version}"/>
<fileset dir="${hive.root}/shims" includes="build.xml"/>
Modified: hive/branches/HIVE-4115/hcatalog/build-support/ant/checkstyle.xml
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/hcatalog/build-support/ant/checkstyle.xml?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/hcatalog/build-support/ant/checkstyle.xml (original)
+++ hive/branches/HIVE-4115/hcatalog/build-support/ant/checkstyle.xml Wed May 8 08:48:57 2013
@@ -31,6 +31,7 @@
<checkstyle classpathref="checkstyle.class.path"
config="${path.to.basedir}/build-support/checkstyle/coding_style.xml">
<fileset dir="${basedir}">
+ <exclude name="**/.*"/>
<exclude name="**/build/**"/>
<exclude name=".idea/**"/>
<exclude name="historical/**"/>
Modified: hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java (original)
+++ hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java Wed May 8 08:48:57 2013
@@ -64,11 +64,14 @@ public class HiveAlterHandler implements
throw new InvalidOperationException("New table is invalid: " + newt);
}
- if (!MetaStoreUtils.validateName(newt.getTableName())
- || !MetaStoreUtils.validateTblColumns(newt.getSd().getCols())) {
+ if (!MetaStoreUtils.validateName(newt.getTableName())) {
throw new InvalidOperationException(newt.getTableName()
+ " is not a valid object name");
}
+ String validate = MetaStoreUtils.validateTblColumns(newt.getSd().getCols());
+ if (validate != null) {
+ throw new InvalidOperationException("Invalid column " + validate);
+ }
Path srcPath = null;
FileSystem srcFs = null;
Modified: hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Wed May 8 08:48:57 2013
@@ -80,6 +80,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo;
import org.apache.hadoop.hive.metastore.api.Role;
+import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore;
import org.apache.hadoop.hive.metastore.api.Type;
@@ -1003,20 +1004,32 @@ public class HiveMetaStore extends Thrif
throws AlreadyExistsException, MetaException,
InvalidObjectException, NoSuchObjectException {
- if (!MetaStoreUtils.validateName(tbl.getTableName())
- || !MetaStoreUtils.validateTblColumns(tbl.getSd().getCols())
- || (tbl.getPartitionKeys() != null && !MetaStoreUtils
- .validateTblColumns(tbl.getPartitionKeys()))
- || !MetaStoreUtils.validateSkewedColNames(
- (null == tbl.getSd().getSkewedInfo()) ?
- null : tbl.getSd().getSkewedInfo().getSkewedColNames())
- || !MetaStoreUtils.validateSkewedColNamesSubsetCol(
- (null == tbl.getSd().getSkewedInfo()) ?
- null : tbl.getSd().getSkewedInfo().getSkewedColNames(),
- tbl.getSd().getCols())) {
+ if (!MetaStoreUtils.validateName(tbl.getTableName())) {
throw new InvalidObjectException(tbl.getTableName()
+ " is not a valid object name");
}
+ String validate = MetaStoreUtils.validateTblColumns(tbl.getSd().getCols());
+ if (validate != null) {
+ throw new InvalidObjectException("Invalid column " + validate);
+ }
+ if (tbl.getPartitionKeys() != null) {
+ validate = MetaStoreUtils.validateTblColumns(tbl.getPartitionKeys());
+ if (validate != null) {
+ throw new InvalidObjectException("Invalid partition column " + validate);
+ }
+ }
+ SkewedInfo skew = tbl.getSd().getSkewedInfo();
+ if (skew != null) {
+ validate = MetaStoreUtils.validateSkewedColNames(skew.getSkewedColNames());
+ if (validate != null) {
+ throw new InvalidObjectException("Invalid skew column " + validate);
+ }
+ validate = MetaStoreUtils.validateSkewedColNamesSubsetCol(
+ skew.getSkewedColNames(), tbl.getSd().getCols());
+ if (validate != null) {
+ throw new InvalidObjectException("Invalid skew column " + validate);
+ }
+ }
Path tblPath = null;
boolean success = false, madeDir = false;
Modified: hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (original)
+++ hive/branches/HIVE-4115/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java Wed May 8 08:48:57 2013
@@ -352,16 +352,16 @@ public class MetaStoreUtils {
return false;
}
- static public boolean validateTblColumns(List<FieldSchema> cols) {
+ static public String validateTblColumns(List<FieldSchema> cols) {
for (FieldSchema fieldSchema : cols) {
if (!validateName(fieldSchema.getName())) {
- return false;
+ return "name: " + fieldSchema.getName();
}
if (!validateColumnType(fieldSchema.getType())) {
- return false;
+ return "type: " + fieldSchema.getType();
}
}
- return true;
+ return null;
}
static void throwExceptionIfIncompatibleColTypeChange(
@@ -434,22 +434,22 @@ public class MetaStoreUtils {
return true;
}
- public static boolean validateSkewedColNames(List<String> cols) {
+ public static String validateSkewedColNames(List<String> cols) {
if (null == cols) {
- return true;
+ return null;
}
for (String col : cols) {
if (!validateName(col)) {
- return false;
+ return col;
}
}
- return true;
+ return null;
}
- public static boolean validateSkewedColNamesSubsetCol(List<String> skewedColNames,
+ public static String validateSkewedColNamesSubsetCol(List<String> skewedColNames,
List<FieldSchema> cols) {
if (null == skewedColNames) {
- return true;
+ return null;
}
List<String> colNames = new ArrayList<String>();
for (FieldSchema fieldSchema : cols) {
@@ -459,7 +459,10 @@ public class MetaStoreUtils {
List<String> copySkewedColNames = new ArrayList<String>(skewedColNames);
// remove valid columns
copySkewedColNames.removeAll(colNames);
- return (copySkewedColNames.size() > 0) ? false : true;
+ if (copySkewedColNames.isEmpty()) {
+ return null;
+ }
+ return copySkewedColNames.toString();
}
public static String getListType(String t) {
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java Wed May 8 08:48:57 2013
@@ -103,7 +103,7 @@ final class DynamicByteArray {
* @param value the array to copy from
* @param valueOffset the first location to copy from value
* @param valueLength the number of bytes to copy from value
- * @return
+ * @return the offset of the start of the value
*/
public int add(byte[] value, int valueOffset, int valueLength) {
int i = length / chunkSize;
@@ -266,5 +266,12 @@ final class DynamicByteArray {
currentLength = Math.min(length, chunkSize - currentOffset);
}
}
+
+ /**
+ * Get the size of the buffers.
+ */
+ public long getSizeInBytes() {
+ return initializedChunks * chunkSize;
+ }
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java Wed May 8 08:48:57 2013
@@ -135,5 +135,8 @@ final class DynamicIntArray {
return sb.toString();
}
+ public int getSizeInBytes() {
+ return 4 * initializedChunks * chunkSize;
+ }
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MemoryManager.java Wed May 8 08:48:57 2013
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.io.orc;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -33,19 +35,25 @@ import java.util.Map;
* dynamic partitions, it is easy to end up with many writers in the same task.
* By managing the size of each allocation, we try to cut down the size of each
* allocation and keep the task from running out of memory.
+ *
+ * This class is thread safe and uses synchronization around the shared state
+ * to prevent race conditions.
*/
class MemoryManager {
+
+ private static final Log LOG = LogFactory.getLog(MemoryManager.class);
+
/**
- * How much does the pool need to change between notifications?
+ * How often should we check the memory sizes? Measured in rows added
+ * to all of the writers.
*/
- private static final double NOTIFICATION_FACTOR = 1.1;
+ private static final int ROWS_BETWEEN_CHECKS = 5000;
private final long totalMemoryPool;
- private long notificationTrigger;
private final Map<Path, WriterInfo> writerList =
new HashMap<Path, WriterInfo>();
private long totalAllocation = 0;
private double currentScale = 1;
- private double lastNotificationScale = 1;
+ private int rowsAddedSinceCheck = 0;
private static class WriterInfo {
long allocation;
@@ -57,7 +65,13 @@ class MemoryManager {
}
public interface Callback {
- void checkMemory(double newScale) throws IOException;
+ /**
+ * The writer needs to check its memory usage
+ * @param newScale the current scale factor for memory allocations
+ * @return true if the writer was over the limit
+ * @throws IOException
+ */
+ boolean checkMemory(double newScale) throws IOException;
}
/**
@@ -70,22 +84,26 @@ class MemoryManager {
double maxLoad = conf.getFloat(poolVar.varname, poolVar.defaultFloatVal);
totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().
getHeapMemoryUsage().getMax() * maxLoad);
- notificationTrigger = Math.round(totalMemoryPool * NOTIFICATION_FACTOR);
}
/**
- * Add a new writer's memory allocation to the pool
+ * Add a new writer's memory allocation to the pool. We use the path
+ * as a unique key to ensure that we don't get duplicates.
* @param path the file that is being written
* @param requestedAllocation the requested buffer size
*/
synchronized void addWriter(Path path, long requestedAllocation,
Callback callback) throws IOException {
WriterInfo oldVal = writerList.get(path);
+ // this should always be null, but we handle the case where the memory
+ // manager wasn't told that a writer wasn't still in use and the task
+ // starts writing to the same path.
if (oldVal == null) {
oldVal = new WriterInfo(requestedAllocation, callback);
writerList.put(path, oldVal);
totalAllocation += requestedAllocation;
} else {
+ // handle a new writer that is writing to the same path
totalAllocation += requestedAllocation - oldVal.allocation;
oldVal.allocation = requestedAllocation;
oldVal.callback = callback;
@@ -125,6 +143,31 @@ class MemoryManager {
}
/**
+ * Give the memory manager an opportunity for doing a memory check.
+ * @throws IOException
+ */
+ synchronized void addedRow() throws IOException {
+ if (++rowsAddedSinceCheck >= ROWS_BETWEEN_CHECKS) {
+ notifyWriters();
+ }
+ }
+
+ /**
+ * Notify all of the writers that they should check their memory usage.
+ * @throws IOException
+ */
+ private void notifyWriters() throws IOException {
+ LOG.debug("Notifying writers after " + rowsAddedSinceCheck);
+ for(WriterInfo writer: writerList.values()) {
+ boolean flushed = writer.callback.checkMemory(currentScale);
+ if (LOG.isDebugEnabled() && flushed) {
+ LOG.debug("flushed " + writer.toString());
+ }
+ }
+ rowsAddedSinceCheck = 0;
+ }
+
+ /**
* Update the currentScale based on the current allocation and pool size.
* This also updates the notificationTrigger.
* @param isAllocate is this an allocation?
@@ -135,21 +178,5 @@ class MemoryManager {
} else {
currentScale = (double) totalMemoryPool / totalAllocation;
}
- if (!isAllocate) {
- // ensure that we notify if we drop 10% from the high water mark
- notificationTrigger =
- Math.min(notificationTrigger,
- Math.round(totalMemoryPool * NOTIFICATION_FACTOR / currentScale));
- } else {
- // we've allocated a new writer, so check to see if we need to notify
- if (totalAllocation > notificationTrigger) {
- for(WriterInfo writer: writerList.values()) {
- writer.callback.checkMemory(currentScale);
- }
- // set the next notification trigger
- notificationTrigger =
- Math.round(totalMemoryPool * NOTIFICATION_FACTOR / currentScale);
- }
- }
}
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java Wed May 8 08:48:57 2013
@@ -23,15 +23,40 @@ import java.nio.ByteBuffer;
class OutStream extends PositionedOutputStream {
interface OutputReceiver {
+ /**
+ * Output the given buffer to the final destination
+ * @param buffer the buffer to output
+ * @throws IOException
+ */
void output(ByteBuffer buffer) throws IOException;
}
static final int HEADER_SIZE = 3;
private final String name;
private final OutputReceiver receiver;
+
+ /**
+ * Stores the uncompressed bytes that have been serialized, but not
+ * compressed yet. When this fills, we compress the entire buffer.
+ */
+ private ByteBuffer current = null;
+
+ /**
+ * Stores the compressed bytes until we have a full buffer and then outputs
+ * them to the receiver. If no compression is being done, this (and overflow)
+ * will always be null and the current buffer will be sent directly to the
+ * receiver.
+ */
private ByteBuffer compressed = null;
+
+ /**
+ * Since the compressed buffer may start with contents from previous
+ * compression blocks, we allocate an overflow buffer so that the
+ * output of the codec can be split between the two buffers. After the
+ * compressed buffer is sent to the receiver, the overflow buffer becomes
+ * the new compressed buffer.
+ */
private ByteBuffer overflow = null;
- private ByteBuffer current;
private final int bufferSize;
private final CompressionCodec codec;
private long compressedBytes = 0;
@@ -85,9 +110,11 @@ class OutStream extends PositionedOutput
}
}
+ /**
+ * Allocate a new output buffer if we are compressing.
+ */
private ByteBuffer getNewOutputBuffer() throws IOException {
- return ByteBuffer.allocate(bufferSize +
- (codec == null ? 0 : HEADER_SIZE));
+ return ByteBuffer.allocate(bufferSize + HEADER_SIZE);
}
private void flip() throws IOException {
@@ -128,7 +155,8 @@ class OutStream extends PositionedOutput
private void spill() throws java.io.IOException {
// if there isn't anything in the current buffer, don't spill
- if (current == null || current.position() == (codec == null ? 0 : HEADER_SIZE)) {
+ if (current == null ||
+ current.position() == (codec == null ? 0 : HEADER_SIZE)) {
return;
}
flip();
@@ -223,8 +251,18 @@ class OutStream extends PositionedOutput
}
@Override
- public long getSize() {
- return uncompressedBytes + compressedBytes;
+ public long getBufferSize() {
+ long result = 0;
+ if (current != null) {
+ result += current.capacity();
+ }
+ if (compressed != null) {
+ result += compressed.capacity();
+ }
+ if (overflow != null) {
+ result += overflow.capacity();
+ }
+ return result;
}
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java Wed May 8 08:48:57 2013
@@ -21,6 +21,18 @@ import java.io.IOException;
import java.io.OutputStream;
abstract class PositionedOutputStream extends OutputStream {
+
+ /**
+ * Record the current position to the recorder.
+ * @param recorder the object that receives the position
+ * @throws IOException
+ */
abstract void getPosition(PositionRecorder recorder) throws IOException;
- abstract long getSize();
+
+ /**
+ * Get the memory size currently allocated as buffer associated with this
+ * stream.
+ * @return the number of bytes used by buffers.
+ */
+ abstract long getBufferSize();
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java Wed May 8 08:48:57 2013
@@ -25,13 +25,11 @@ package org.apache.hadoop.hive.ql.io.orc
*/
abstract class RedBlackTree {
public static final int NULL = -1;
- private static final int DEFAULT_INITIAL_CAPACITY = 16 * 1024;
// Various values controlling the offset of the data within the array.
private static final int LEFT_OFFSET = 0;
private static final int RIGHT_OFFSET = 1;
- private static final int COUNT_OFFSET = 2;
- private static final int ELEMENT_SIZE = 3;
+ private static final int ELEMENT_SIZE = 2;
protected int size = 0;
private final DynamicIntArray data;
@@ -40,13 +38,6 @@ abstract class RedBlackTree {
private boolean wasAdd = false;
/**
- * Create a set with a default initial capacity.
- */
- public RedBlackTree() {
- data = new DynamicIntArray(DEFAULT_INITIAL_CAPACITY * ELEMENT_SIZE);
- }
-
- /**
* Create a set with the given initial capacity.
*/
public RedBlackTree(int initialCapacity) {
@@ -63,7 +54,6 @@ abstract class RedBlackTree {
size += 1;
setLeft(position, left, isRed);
setRight(position, right);
- setCount(position, 1);
return position;
}
@@ -109,18 +99,6 @@ abstract class RedBlackTree {
return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
}
- protected int getCount(int position) {
- return data.get(position * ELEMENT_SIZE + COUNT_OFFSET);
- }
-
- private void setCount(int position, int value) {
- data.set(position * ELEMENT_SIZE + COUNT_OFFSET, value);
- }
-
- private void incrementCount(int position, int value) {
- data.increment(position * ELEMENT_SIZE + COUNT_OFFSET, value);
- }
-
/**
* Set the left field of the given position.
* Note that we are storing the node color in the low bit of the left pointer.
@@ -200,7 +178,6 @@ abstract class RedBlackTree {
} else {
lastAdd = node;
wasAdd = false;
- incrementCount(node, 1);
return false;
}
@@ -322,5 +299,11 @@ abstract class RedBlackTree {
data.clear();
}
+ /**
+ * Get the buffer size in bytes.
+ */
+ public long getSizeInBytes() {
+ return data.getSizeInBytes();
+ }
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java Wed May 8 08:48:57 2013
@@ -24,19 +24,16 @@ import java.io.OutputStream;
/**
* A red-black tree that stores strings. The strings are stored as UTF-8 bytes
- * and an offset/length for each entry.
+ * and an offset for each entry.
*/
class StringRedBlackTree extends RedBlackTree {
private final DynamicByteArray byteArray = new DynamicByteArray();
- private final DynamicIntArray keySizes = new DynamicIntArray();
+ private final DynamicIntArray keyOffsets;
private final Text newKey = new Text();
- public StringRedBlackTree() {
- // PASS
- }
-
public StringRedBlackTree(int initialCapacity) {
super(initialCapacity);
+ keyOffsets = new DynamicIntArray(initialCapacity);
}
public int add(String value) {
@@ -44,16 +41,22 @@ class StringRedBlackTree extends RedBlac
// if the key is new, add it to our byteArray and store the offset & length
if (add()) {
int len = newKey.getLength();
- keySizes.add(byteArray.add(newKey.getBytes(), 0, len));
- keySizes.add(len);
+ keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
}
return lastAdd;
}
@Override
protected int compareValue(int position) {
+ int start = keyOffsets.get(position);
+ int end;
+ if (position + 1 == keyOffsets.size()) {
+ end = byteArray.size();
+ } else {
+ end = keyOffsets.get(position+1);
+ }
return byteArray.compare(newKey.getBytes(), 0, newKey.getLength(),
- keySizes.get(2 * position), keySizes.get(2 * position + 1));
+ start, end - start);
}
/**
@@ -84,12 +87,6 @@ class StringRedBlackTree extends RedBlac
* @return the string's length in bytes
*/
int getLength();
-
- /**
- * Get the count for this key.
- * @return the number of times this key was added
- */
- int getCount();
}
/**
@@ -106,6 +103,8 @@ class StringRedBlackTree extends RedBlac
private class VisitorContextImpl implements VisitorContext {
private int originalPosition;
+ private int start;
+ private int end;
private final Text text = new Text();
public int getOriginalPosition() {
@@ -113,20 +112,26 @@ class StringRedBlackTree extends RedBlac
}
public Text getText() {
- byteArray.setText(text, keySizes.get(originalPosition * 2), getLength());
+ byteArray.setText(text, start, end - start);
return text;
}
public void writeBytes(OutputStream out) throws IOException {
- byteArray.write(out, keySizes.get(originalPosition * 2), getLength());
+ byteArray.write(out, start, end - start);
}
public int getLength() {
- return keySizes.get(originalPosition * 2 + 1);
+ return end - start;
}
- public int getCount() {
- return StringRedBlackTree.this.getCount(originalPosition);
+ void setPosition(int position) {
+ originalPosition = position;
+ start = keyOffsets.get(originalPosition);
+ if (position + 1 == keyOffsets.size()) {
+ end = byteArray.size();
+ } else {
+ end = keyOffsets.get(originalPosition + 1);
+ }
}
}
@@ -134,7 +139,7 @@ class StringRedBlackTree extends RedBlac
) throws IOException {
if (node != NULL) {
recurse(getLeft(node), visitor, context);
- context.originalPosition = node;
+ context.setPosition(node);
visitor.visit(context);
recurse(getRight(node), visitor, context);
}
@@ -155,7 +160,7 @@ class StringRedBlackTree extends RedBlac
public void clear() {
super.clear();
byteArray.clear();
- keySizes.clear();
+ keyOffsets.clear();
}
/**
@@ -170,7 +175,8 @@ class StringRedBlackTree extends RedBlac
* Calculate the approximate size in memory.
* @return the number of bytes used in storing the tree.
*/
- public long getByteSize() {
- return byteArray.size() + 5 * 4 * size();
+ public long getSizeInBytes() {
+ return byteArray.getSizeInBytes() + keyOffsets.getSizeInBytes() +
+ super.getSizeInBytes();
}
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Wed May 8 08:48:57 2013
@@ -27,6 +27,8 @@ import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -62,9 +64,15 @@ import com.google.protobuf.CodedOutputSt
* type of column. TreeWriters may have children TreeWriters that handle the
* sub-types. Each of the TreeWriters writes the column's data as a set of
* streams.
+ *
+ * This class is synchronized so that multi-threaded access is ok. In
+ * particular, because the MemoryManager is shared between writers, this class
+ * assumes that checkMemory may be called from a separate thread.
*/
class WriterImpl implements Writer, MemoryManager.Callback {
+ private static final Log LOG = LogFactory.getLog(WriterImpl.class);
+
private static final int HDFS_BUFFER_SIZE = 256 * 1024;
private static final int MIN_ROW_INDEX_STRIDE = 1000;
@@ -154,10 +162,18 @@ class WriterImpl implements Writer, Memo
}
@Override
- public void checkMemory(double newScale) throws IOException {
- if (estimateStripeSize() > Math.round(stripeSize * newScale)) {
- flushStripe();
+ public synchronized boolean checkMemory(double newScale) throws IOException {
+ long limit = (long) Math.round(stripeSize * newScale);
+ long size = estimateStripeSize();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("ORC writer " + path + " size = " + size + " limit = " +
+ limit);
+ }
+ if (size > limit) {
+ flushStripe();
+ return true;
}
+ return false;
}
/**
@@ -186,6 +202,18 @@ class WriterImpl implements Writer, Memo
}
/**
+ * Get the number of bytes in buffers that are allocated to this stream.
+ * @return number of bytes in buffers
+ */
+ public long getBufferSize() {
+ long result = 0;
+ for(ByteBuffer buf: output) {
+ result += buf.capacity();
+ }
+ return outStream.getBufferSize() + result;
+ }
+
+ /**
* Flush the stream to the codec.
* @throws IOException
*/
@@ -214,12 +242,9 @@ class WriterImpl implements Writer, Memo
}
}
- /**
- * Get the size of compressed and uncompressed data in the stream's buffers.
- * @return the number of bytes in the buffers.
- */
- long getSize() {
- return outStream.getSize();
+ @Override
+ public String toString() {
+ return outStream.toString();
}
}
@@ -681,11 +706,12 @@ class WriterImpl implements Writer, Memo
}
private static class StringTreeWriter extends TreeWriter {
+ private static final int INITIAL_DICTIONARY_SIZE = 4096;
private final PositionedOutputStream stringOutput;
private final RunLengthIntegerWriter lengthOutput;
private final RunLengthIntegerWriter rowOutput;
- private final RunLengthIntegerWriter countOutput;
- private final StringRedBlackTree dictionary = new StringRedBlackTree();
+ private final StringRedBlackTree dictionary =
+ new StringRedBlackTree(INITIAL_DICTIONARY_SIZE);
private final DynamicIntArray rows = new DynamicIntArray();
private final List<OrcProto.RowIndexEntry> savedRowIndex =
new ArrayList<OrcProto.RowIndexEntry>();
@@ -703,12 +729,6 @@ class WriterImpl implements Writer, Memo
OrcProto.Stream.Kind.LENGTH), false);
rowOutput = new RunLengthIntegerWriter(writer.createStream(id,
OrcProto.Stream.Kind.DATA), false);
- if (writer.buildIndex()) {
- countOutput = new RunLengthIntegerWriter(writer.createStream(id,
- OrcProto.Stream.Kind.DICTIONARY_COUNT), false);
- } else {
- countOutput = null;
- }
recordPosition(rowIndexPosition);
rowIndexValueCount.add(0L);
buildIndex = writer.buildIndex();
@@ -739,9 +759,6 @@ class WriterImpl implements Writer, Memo
context.writeBytes(stringOutput);
lengthOutput.write(context.getLength());
dumpOrder[context.getOriginalPosition()] = currentId++;
- if (countOutput != null) {
- countOutput.write(context.getCount());
- }
}
});
int length = rows.size();
@@ -770,9 +787,6 @@ class WriterImpl implements Writer, Memo
stringOutput.flush();
lengthOutput.flush();
rowOutput.flush();
- if (countOutput != null) {
- countOutput.flush();
- }
// reset all of the fields to be ready for the next stripe.
dictionary.clear();
rows.clear();
@@ -809,7 +823,7 @@ class WriterImpl implements Writer, Memo
@Override
long estimateMemory() {
- return rows.size() * 4 + dictionary.getByteSize();
+ return rows.getSizeInBytes() + dictionary.getSizeInBytes();
}
}
@@ -1434,40 +1448,43 @@ class WriterImpl implements Writer, Memo
private long estimateStripeSize() {
long result = 0;
for(BufferedStream stream: streams.values()) {
- result += stream.getSize();
+ result += stream.getBufferSize();
}
result += treeWriter.estimateMemory();
return result;
}
@Override
- public void addUserMetadata(String name, ByteBuffer value) {
+ public synchronized void addUserMetadata(String name, ByteBuffer value) {
userMetadata.put(name, ByteString.copyFrom(value));
}
@Override
public void addRow(Object row) throws IOException {
- treeWriter.write(row);
- rowsInStripe += 1;
- if (buildIndex) {
- rowsInIndex += 1;
-
- if (rowsInIndex >= rowIndexStride) {
- createRowIndexEntry();
+ synchronized (this) {
+ treeWriter.write(row);
+ rowsInStripe += 1;
+ if (buildIndex) {
+ rowsInIndex += 1;
+
+ if (rowsInIndex >= rowIndexStride) {
+ createRowIndexEntry();
+ }
}
}
- // once every 1000 rows, check the size to see if we should spill
- if (rowsInStripe % 1000 == 0) {
- checkMemory(memoryManager.getAllocationScale());
- }
+ memoryManager.addedRow();
}
@Override
public void close() throws IOException {
- flushStripe();
- int footerLength = writeFooter(rawWriter.getPos());
- rawWriter.writeByte(writePostScript(footerLength));
- rawWriter.close();
+ // remove us from the memory manager so that we don't get any callbacks
memoryManager.removeWriter(path);
+ // actually close the file
+ synchronized (this) {
+ flushStripe();
+ int footerLength = writeFooter(rawWriter.getPos());
+ rawWriter.writeByte(writePostScript(footerLength));
+ rawWriter.close();
+ }
}
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Wed May 8 08:48:57 2013
@@ -1050,8 +1050,7 @@ public class PTFTranslator {
}
ColumnInfo cInfo = new ColumnInfo(wFnDef.getAlias(),
TypeInfoUtils.getTypeInfoFromObjectInspector(wFnOI),
- null,
- false);
+ null, true, true);
rr.putExpression(ast, cInfo);
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed May 8 08:48:57 2013
@@ -5167,10 +5167,11 @@ public class SemanticAnalyzer extends Ba
if (field_schemas != null) {
FieldSchema col = new FieldSchema();
- if (nm[1] != null) {
- col.setName(unescapeIdentifier(colInfo.getAlias()).toLowerCase()); // remove ``
- } else {
+ if ("".equals(nm[0]) || nm[1] == null) {
+ // ast expression is not a valid column name for table
col.setName(colInfo.getInternalName());
+ } else {
+ col.setName(unescapeIdentifier(colInfo.getAlias()).toLowerCase()); // remove ``
}
col.setType(colInfo.getType().getTypeName());
field_schemas.add(col);
@@ -7385,6 +7386,10 @@ public class SemanticAnalyzer extends Ba
for (ColumnInfo colInfo : rr.getColumnInfos()) {
String name = colInfo.getInternalName();
String[] tmp = rr.reverseLookup(name);
+ if ("".equals(tmp[0]) || tmp[1] == null) {
+ // ast expression is not a valid column name for table
+ tmp[1] = colInfo.getInternalName();
+ }
newRR.put(alias, tmp[1], colInfo);
}
opParseCtx.get(curr).setRowResolver(newRR);
Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestMemoryManager.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestMemoryManager.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestMemoryManager.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestMemoryManager.java Wed May 8 08:48:57 2013
@@ -39,8 +39,8 @@ public class TestMemoryManager {
private static final double ERROR = 0.000001;
private static class NullCallback implements MemoryManager.Callback {
- public void checkMemory(double newScale) {
- // PASS
+ public boolean checkMemory(double newScale) {
+ return false;
}
}
@@ -120,17 +120,13 @@ public class TestMemoryManager {
calls[i] = mock(MemoryManager.Callback.class);
mgr.addWriter(new Path(Integer.toString(i)), pool/4, calls[i]);
}
- double[] spills = new double[]{0, 0, 0, 0, 0.8, 0.666666666667,
- 0.571428571429, 0.5, 0.444444444444,
- 0.4, 0, 0.333333333333, 0, 0.285714285714,
- 0, 0.25, 0, 0.222222222222, 0, 0.2};
- for(int spill=0; spill < spills.length; ++spill) {
- if (spills[spill] != 0) {
- for(int call=0; call < spill + 1; ++call) {
- verify(calls[call], times(1))
- .checkMemory(doubleThat(closeTo(spills[spill], ERROR)));
- }
- }
+ // add enough rows to get the memory manager to check the limits
+ for(int i=0; i < 10000; ++i) {
+ mgr.addedRow();
+ }
+ for(int call=0; call < calls.length; ++call) {
+ verify(calls[call], times(2))
+ .checkMemory(doubleThat(closeTo(0.2, ERROR)));
}
}
}
Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Wed May 8 08:48:57 2013
@@ -52,6 +52,7 @@ import org.junit.Test;
import org.junit.rules.TestName;
import java.io.File;
+import java.io.IOException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
@@ -639,7 +640,7 @@ public class TestOrcFile {
row.setFieldValue(0, null);
union.set((byte) 0, new IntWritable(1732050807));
row.setFieldValue(2, null);
- for(int i=0; i < 1000; ++i) {
+ for(int i=0; i < 5000; ++i) {
writer.addRow(row);
}
union.set((byte) 0, new IntWritable(0));
@@ -651,7 +652,7 @@ public class TestOrcFile {
writer.close();
Reader reader = OrcFile.createReader(fs, testFilePath);
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
- assertEquals(1309, reader.getNumberOfRows());
+ assertEquals(5309, reader.getNumberOfRows());
DecimalColumnStatistics stats =
(DecimalColumnStatistics) reader.getStatistics()[5];
assertEquals(303, stats.getNumberOfValues());
@@ -732,7 +733,7 @@ public class TestOrcFile {
assertEquals(new HiveDecimal(new BigInteger(118, rand),
rand.nextInt(36)), row.getFieldValue(2));
}
- for(int i=0; i < 1000; ++i) {
+ for(int i=0; i < 5000; ++i) {
row = (OrcStruct) rows.next(row);
assertEquals(new IntWritable(1732050807), union.getObject());
}
@@ -942,6 +943,8 @@ public class TestOrcFile {
double rate;
Path path = null;
long lastAllocation = 0;
+ int rows = 0;
+ MemoryManager.Callback callback;
MyMemoryManager(Configuration conf, long totalSpace, double rate) {
super(conf);
@@ -954,6 +957,7 @@ public class TestOrcFile {
MemoryManager.Callback callback) {
this.path = path;
this.lastAllocation = requestedAllocation;
+ this.callback = callback;
}
@Override
@@ -971,6 +975,13 @@ public class TestOrcFile {
double getAllocationScale() {
return rate;
}
+
+ @Override
+ void addedRow() throws IOException {
+ if (++rows % 100 == 0) {
+ callback.checkMemory(rate);
+ }
+ }
}
@Test
@@ -995,9 +1006,9 @@ public class TestOrcFile {
for(StripeInformation stripe: reader.getStripes()) {
i += 1;
assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
- stripe.getDataLength() < 10000);
+ stripe.getDataLength() < 5000);
}
- assertEquals(3, i);
+ assertEquals(25, i);
assertEquals(2500, reader.getNumberOfRows());
}
}
Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java Wed May 8 08:48:57 2013
@@ -104,7 +104,7 @@ public class TestStringRedBlackTree {
System.err.println(indent + "NULL");
} else {
System.err.println(indent + "Node " + node + " color " +
- (tree.isRed(node) ? "red" : "black") + " count " + tree.getCount(node));
+ (tree.isRed(node) ? "red" : "black"));
printTree(tree, indent + " ", tree.getLeft(node));
printTree(tree, indent + " ", tree.getRight(node));
}
@@ -112,14 +112,12 @@ public class TestStringRedBlackTree {
private static class MyVisitor implements StringRedBlackTree.Visitor {
private final String[] words;
- private final int[] counts;
private final int[] order;
private final DataOutputBuffer buffer = new DataOutputBuffer();
int current = 0;
- MyVisitor(String[] args, int[] counts, int[] order) {
+ MyVisitor(String[] args, int[] order) {
words = args;
- this.counts = counts;
this.order = order;
}
@@ -128,7 +126,6 @@ public class TestStringRedBlackTree {
) throws IOException {
String word = context.getText().toString();
assertEquals("in word " + current, words[current], word);
- assertEquals("in word " + current, counts[current], context.getCount());
assertEquals("in word " + current, order[current],
context.getOriginalPosition());
buffer.reset();
@@ -138,14 +135,14 @@ public class TestStringRedBlackTree {
}
}
- void checkContents(StringRedBlackTree tree, int[] counts, int[] order,
+ void checkContents(StringRedBlackTree tree, int[] order,
String... params
) throws IOException {
- tree.visit(new MyVisitor(params, counts, order));
+ tree.visit(new MyVisitor(params, order));
}
StringRedBlackTree buildTree(String... params) throws IOException {
- StringRedBlackTree result = new StringRedBlackTree();
+ StringRedBlackTree result = new StringRedBlackTree(1000);
for(String word: params) {
result.add(word);
checkTree(result);
@@ -156,7 +153,7 @@ public class TestStringRedBlackTree {
@Test
public void test1() throws Exception {
StringRedBlackTree tree = new StringRedBlackTree(5);
- assertEquals(0, tree.getByteSize());
+ assertEquals(0, tree.getSizeInBytes());
checkTree(tree);
assertEquals(0, tree.add("owen"));
checkTree(tree);
@@ -186,15 +183,12 @@ public class TestStringRedBlackTree {
checkTree(tree);
assertEquals(9, tree.add("z"));
checkTree(tree);
- checkContents(tree, new int[]{2,1,2,1,1,1,1,2,1,1},
- new int[]{2,5,1,4,6,3,7,0,9,8},
+ checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8},
"alan", "arun", "ashutosh", "eric", "eric14", "greg",
"o", "owen", "z", "ziggy");
- assertEquals(10*5*4 + 8 + 6 + 5 + 5 * 4 + 2 * 1, tree.getByteSize());
+ assertEquals(32888, tree.getSizeInBytes());
// check that adding greg again bumps the count
- assertEquals(1, tree.getCount(3));
assertEquals(3, tree.add("greg"));
- assertEquals(2, tree.getCount(3));
assertEquals(41, tree.getCharacterSize());
// add some more strings to test the different branches of the
// rebalancing
@@ -210,7 +204,7 @@ public class TestStringRedBlackTree {
checkTree(tree);
tree.clear();
checkTree(tree);
- assertEquals(0, tree.getByteSize());
+ assertEquals(0, tree.getSizeInBytes());
assertEquals(0, tree.getCharacterSize());
}
@@ -220,8 +214,7 @@ public class TestStringRedBlackTree {
buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
"m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
assertEquals(26, tree.size());
- checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
- 1,1,1, 1,1,1, 1,1}, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
+ checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
15,16,17, 18,19,20, 21,22,23, 24,25},
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
@@ -233,8 +226,7 @@ public class TestStringRedBlackTree {
buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
"m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
assertEquals(26, tree.size());
- checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
- 1,1,1, 1,1,1, 1,1}, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
+ checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
Modified: hive/branches/HIVE-4115/ql/src/test/resources/orc-file-dump.out
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/resources/orc-file-dump.out?rev=1480183&r1=1480182&r2=1480183&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/resources/orc-file-dump.out (original)
+++ hive/branches/HIVE-4115/ql/src/test/resources/orc-file-dump.out Wed May 8 08:48:57 2013
@@ -11,74 +11,87 @@ Statistics:
Column 3: count: 21000 min: Darkness, max: worst
Stripes:
- Stripe: offset: 3 data: 83505 rows: 6000 tail: 91 index: 179
+ Stripe: offset: 3 data: 69638 rows: 5000 tail: 85 index: 126
Stream: column 0 section ROW_INDEX start: 3 length 10
Stream: column 1 section ROW_INDEX start: 13 length 38
Stream: column 2 section ROW_INDEX start: 51 length 42
- Stream: column 3 section DICTIONARY_COUNT start: 93 length 53
- Stream: column 3 section ROW_INDEX start: 146 length 36
- Stream: column 1 section PRESENT start: 182 length 11
- Stream: column 1 section DATA start: 193 length 27086
- Stream: column 2 section PRESENT start: 27279 length 11
- Stream: column 2 section DATA start: 27290 length 52124
- Stream: column 3 section PRESENT start: 79414 length 11
- Stream: column 3 section DATA start: 79425 length 4091
- Stream: column 3 section LENGTH start: 83516 length 38
- Stream: column 3 section DICTIONARY_DATA start: 83554 length 133
+ Stream: column 3 section ROW_INDEX start: 93 length 36
+ Stream: column 1 section PRESENT start: 129 length 11
+ Stream: column 1 section DATA start: 140 length 22605
+ Stream: column 2 section PRESENT start: 22745 length 11
+ Stream: column 2 section DATA start: 22756 length 43426
+ Stream: column 3 section PRESENT start: 66182 length 11
+ Stream: column 3 section DATA start: 66193 length 3403
+ Stream: column 3 section LENGTH start: 69596 length 38
+ Stream: column 3 section DICTIONARY_DATA start: 69634 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT
Encoding column 2: DIRECT
Encoding column 3: DICTIONARY[35]
- Stripe: offset: 83778 data: 83453 rows: 6000 tail: 91 index: 180
- Stream: column 0 section ROW_INDEX start: 83778 length 10
- Stream: column 1 section ROW_INDEX start: 83788 length 39
- Stream: column 2 section ROW_INDEX start: 83827 length 42
- Stream: column 3 section DICTIONARY_COUNT start: 83869 length 53
- Stream: column 3 section ROW_INDEX start: 83922 length 36
- Stream: column 1 section PRESENT start: 83958 length 11
- Stream: column 1 section DATA start: 83969 length 27093
- Stream: column 2 section PRESENT start: 111062 length 11
- Stream: column 2 section DATA start: 111073 length 52119
- Stream: column 3 section PRESENT start: 163192 length 11
- Stream: column 3 section DATA start: 163203 length 4037
- Stream: column 3 section LENGTH start: 167240 length 38
- Stream: column 3 section DICTIONARY_DATA start: 167278 length 133
+ Stripe: offset: 69852 data: 69617 rows: 5000 tail: 83 index: 124
+ Stream: column 0 section ROW_INDEX start: 69852 length 10
+ Stream: column 1 section ROW_INDEX start: 69862 length 36
+ Stream: column 2 section ROW_INDEX start: 69898 length 42
+ Stream: column 3 section ROW_INDEX start: 69940 length 36
+ Stream: column 1 section PRESENT start: 69976 length 11
+ Stream: column 1 section DATA start: 69987 length 22597
+ Stream: column 2 section PRESENT start: 92584 length 11
+ Stream: column 2 section DATA start: 92595 length 43439
+ Stream: column 3 section PRESENT start: 136034 length 11
+ Stream: column 3 section DATA start: 136045 length 3377
+ Stream: column 3 section LENGTH start: 139422 length 38
+ Stream: column 3 section DICTIONARY_DATA start: 139460 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT
Encoding column 2: DIRECT
Encoding column 3: DICTIONARY[35]
- Stripe: offset: 167502 data: 83456 rows: 6000 tail: 92 index: 182
- Stream: column 0 section ROW_INDEX start: 167502 length 10
- Stream: column 1 section ROW_INDEX start: 167512 length 39
- Stream: column 2 section ROW_INDEX start: 167551 length 42
- Stream: column 3 section DICTIONARY_COUNT start: 167593 length 55
- Stream: column 3 section ROW_INDEX start: 167648 length 36
- Stream: column 1 section PRESENT start: 167684 length 11
- Stream: column 1 section DATA start: 167695 length 27080
- Stream: column 2 section PRESENT start: 194775 length 11
- Stream: column 2 section DATA start: 194786 length 52093
- Stream: column 3 section PRESENT start: 246879 length 11
- Stream: column 3 section DATA start: 246890 length 4079
- Stream: column 3 section LENGTH start: 250969 length 38
- Stream: column 3 section DICTIONARY_DATA start: 251007 length 133
+ Stripe: offset: 139676 data: 69603 rows: 5000 tail: 85 index: 127
+ Stream: column 0 section ROW_INDEX start: 139676 length 10
+ Stream: column 1 section ROW_INDEX start: 139686 length 39
+ Stream: column 2 section ROW_INDEX start: 139725 length 42
+ Stream: column 3 section ROW_INDEX start: 139767 length 36
+ Stream: column 1 section PRESENT start: 139803 length 11
+ Stream: column 1 section DATA start: 139814 length 22594
+ Stream: column 2 section PRESENT start: 162408 length 11
+ Stream: column 2 section DATA start: 162419 length 43415
+ Stream: column 3 section PRESENT start: 205834 length 11
+ Stream: column 3 section DATA start: 205845 length 3390
+ Stream: column 3 section LENGTH start: 209235 length 38
+ Stream: column 3 section DICTIONARY_DATA start: 209273 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT
Encoding column 2: DIRECT
Encoding column 3: DICTIONARY[35]
- Stripe: offset: 251232 data: 41842 rows: 3000 tail: 90 index: 172
- Stream: column 0 section ROW_INDEX start: 251232 length 10
- Stream: column 1 section ROW_INDEX start: 251242 length 39
- Stream: column 2 section ROW_INDEX start: 251281 length 43
- Stream: column 3 section DICTIONARY_COUNT start: 251324 length 44
- Stream: column 3 section ROW_INDEX start: 251368 length 36
- Stream: column 1 section PRESENT start: 251404 length 9
- Stream: column 1 section DATA start: 251413 length 13544
- Stream: column 2 section PRESENT start: 264957 length 9
- Stream: column 2 section DATA start: 264966 length 26072
- Stream: column 3 section PRESENT start: 291038 length 9
- Stream: column 3 section DATA start: 291047 length 2028
- Stream: column 3 section LENGTH start: 293075 length 38
- Stream: column 3 section DICTIONARY_DATA start: 293113 length 133
+ Stripe: offset: 209491 data: 69584 rows: 5000 tail: 84 index: 126
+ Stream: column 0 section ROW_INDEX start: 209491 length 10
+ Stream: column 1 section ROW_INDEX start: 209501 length 38
+ Stream: column 2 section ROW_INDEX start: 209539 length 42
+ Stream: column 3 section ROW_INDEX start: 209581 length 36
+ Stream: column 1 section PRESENT start: 209617 length 11
+ Stream: column 1 section DATA start: 209628 length 22575
+ Stream: column 2 section PRESENT start: 232203 length 11
+ Stream: column 2 section DATA start: 232214 length 43426
+ Stream: column 3 section PRESENT start: 275640 length 11
+ Stream: column 3 section DATA start: 275651 length 3379
+ Stream: column 3 section LENGTH start: 279030 length 38
+ Stream: column 3 section DICTIONARY_DATA start: 279068 length 133
+ Encoding column 0: DIRECT
+ Encoding column 1: DIRECT
+ Encoding column 2: DIRECT
+ Encoding column 3: DICTIONARY[35]
+ Stripe: offset: 279285 data: 14111 rows: 1000 tail: 80 index: 127
+ Stream: column 0 section ROW_INDEX start: 279285 length 10
+ Stream: column 1 section ROW_INDEX start: 279295 length 39
+ Stream: column 2 section ROW_INDEX start: 279334 length 42
+ Stream: column 3 section ROW_INDEX start: 279376 length 36
+ Stream: column 1 section PRESENT start: 279412 length 5
+ Stream: column 1 section DATA start: 279417 length 4529
+ Stream: column 2 section PRESENT start: 283946 length 5
+ Stream: column 2 section DATA start: 283951 length 8690
+ Stream: column 3 section PRESENT start: 292641 length 5
+ Stream: column 3 section DATA start: 292646 length 706
+ Stream: column 3 section LENGTH start: 293352 length 38
+ Stream: column 3 section DICTIONARY_DATA start: 293390 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT
Encoding column 2: DIRECT