You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by ji...@apache.org on 2015/10/29 01:25:05 UTC
[3/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize
Encoding to store length of String and ByteArray
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
index 1f33915..3cc2a23 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
@@ -54,8 +54,8 @@ import org.junit.Test;
public abstract class AbstractRunGeneratorTest {
static TestUtils testUtils = new TestUtils();
- static ISerializerDeserializer[] SerDers = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ static ISerializerDeserializer[] SerDers = new ISerializerDeserializer[] {
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
static RecordDescriptor RecordDesc = new RecordDescriptor(SerDers);
static Random GRandom = new Random(System.currentTimeMillis());
static int[] SortFields = new int[] { 0, 1 };
@@ -153,7 +153,7 @@ public abstract class AbstractRunGeneratorTest {
for (Map.Entry<Integer, String> entry : specialData.entrySet()) {
tb.reset();
tb.addField(IntegerSerializerDeserializer.INSTANCE, entry.getKey());
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, entry.getValue());
+ tb.addField(new UTF8StringSerializerDeserializer(), entry.getValue());
VSizeFrame frame = new VSizeFrame(ctx, FrameHelper.calcAlignedFrameSizeToStore(
tb.getFieldEndOffsets().length, tb.getSize(), ctx.getInitialFrameSize()));
@@ -173,7 +173,7 @@ public abstract class AbstractRunGeneratorTest {
if (!keyValuePair.containsKey(key)) {
String value = generateRandomRecord(minRecordSize, maxRecordSize);
tb.addField(IntegerSerializerDeserializer.INSTANCE, key);
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, value);
+ tb.addField(new UTF8StringSerializerDeserializer(), value);
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
frameList.add(frame);
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
index ca0a6bb..e6d10f2 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
@@ -109,7 +109,7 @@ public class RunMergingFrameReaderTest {
while (true) {
tb.reset();
tb.addField(IntegerSerializerDeserializer.INSTANCE, lastEntry.getKey());
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, lastEntry.getValue());
+ tb.addField(new UTF8StringSerializerDeserializer(), lastEntry.getValue());
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
break;
} else {
@@ -148,7 +148,7 @@ public class RunMergingFrameReaderTest {
for (Map.Entry<Integer, String> entry : specialData.entrySet()) {
tb.reset();
tb.addField(IntegerSerializerDeserializer.INSTANCE, entry.getKey());
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, entry.getValue());
+ tb.addField(new UTF8StringSerializerDeserializer(), entry.getValue());
int size = tb.getSize() + tb.getFieldEndOffsets().length * 4;
datasize += size;
if (size > maxtuple) {
@@ -164,7 +164,7 @@ public class RunMergingFrameReaderTest {
int key = GRandom.nextInt(datasize + 1);
if (!result.containsKey(key)) {
tb.addField(IntegerSerializerDeserializer.INSTANCE, key);
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, value);
+ tb.addField(new UTF8StringSerializerDeserializer(), value);
int size = tb.getSize() + tb.getFieldEndOffsets().length * 4;
datasize += size;
if (size > maxtuple) {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
index 3f9b0e9..965e194 100644
--- a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
+++ b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
@@ -168,10 +168,10 @@ public class ExternalGroupClient {
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
index a3e1ee0..cb6006b 100644
--- a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
+++ b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
@@ -132,14 +132,14 @@ public class WordCountMain {
IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
RecordDescriptor wordDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor wordScanner = new FileScanOperatorDescriptor(spec, splitsProvider,
new WordTupleParserFactory(), wordDesc);
createPartitionConstraint(spec, wordScanner, inSplits);
RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
IOperatorDescriptor gBy;
int[] keys = new int[] { 0 };
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
index 28b62a7..ac172fd 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
@@ -34,27 +34,27 @@ import org.apache.hyracks.dataflow.std.file.FileSplit;
public class Common {
static RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
static RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
static RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
static IValueParserFactory[] orderParserFactories = new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
index dadb935..b20d2b8 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
@@ -229,7 +229,7 @@ public class Join {
if (hasGroupBy) {
RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
HashGroupOperatorDescriptor gby = new HashGroupOperatorDescriptor(
spec,
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
index e36dd06..1515037 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
@@ -141,7 +141,7 @@ public class DataflowTest extends TestCase {
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID,
HyracksUtils.NC2_ID };
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
index a703e57..0b41b07 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
@@ -148,7 +148,7 @@ public class DataflowTest extends TestCase {
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID,
HyracksUtils.NC2_ID };
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/pom.xml b/hyracks/hyracks-storage-am-common/pom.xml
index 727da41..86b0eac 100644
--- a/hyracks/hyracks-storage-am-common/pom.xml
+++ b/hyracks/hyracks-storage-am-common/pom.xml
@@ -68,5 +68,10 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
</dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
index c99b1e5..554cd3f 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
@@ -38,7 +38,7 @@ public class TreeIndexStatsOperatorDescriptor extends AbstractTreeIndexOperatorD
private static final long serialVersionUID = 1L;
private static final RecordDescriptor recDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
public TreeIndexStatsOperatorDescriptor(IOperatorDescriptorRegistry spec, IStorageManagerInterface storageManager,
IIndexLifecycleManagerProvider lifecycleManagerProvider, IFileSplitProvider fileSplitProvider,
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
index 51ca3c2..584418c 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
@@ -38,6 +38,7 @@ public class TreeIndexStatsOperatorNodePushable extends AbstractUnaryOutputSourc
private final AbstractTreeIndexOperatorDescriptor opDesc;
private final IHyracksTaskContext ctx;
private final TreeIndexDataflowHelper treeIndexHelper;
+ private final UTF8StringSerializerDeserializer utf8SerDer = new UTF8StringSerializerDeserializer();
private TreeIndexStatsGatherer statsGatherer;
public TreeIndexStatsOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
@@ -77,7 +78,7 @@ public class TreeIndexStatsOperatorNodePushable extends AbstractUnaryOutputSourc
ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
DataOutput dos = tb.getDataOutput();
tb.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize(stats.toString(), dos);
+ utf8SerDer.serialize(stats.toString(), dos);
tb.addFieldEndOffset();
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
throw new HyracksDataException(
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
index 1a17a5a..c6a0035 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
@@ -19,11 +19,14 @@
package org.apache.hyracks.storage.am.common.tuples;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.VarLenIntDecoder;
+
import java.nio.ByteBuffer;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
public class TypeAwareTupleReference implements ITreeIndexTupleReference {
protected ByteBuffer buf;
@@ -34,7 +37,7 @@ public class TypeAwareTupleReference implements ITreeIndexTupleReference {
protected int dataStartOff;
protected ITypeTraits[] typeTraits;
- protected VarLenIntEncoderDecoder encDec = new VarLenIntEncoderDecoder();
+ protected VarLenIntDecoder encDec = VarLenIntEncoderDecoder.createDecoder();
protected int[] decodedFieldSlots;
public TypeAwareTupleReference(ITypeTraits[] typeTraits) {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
index 73e6e3e..c44cb6b 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
@@ -25,11 +25,12 @@ import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
protected ITypeTraits[] typeTraits;
- protected VarLenIntEncoderDecoder encDec = new VarLenIntEncoderDecoder();
+ protected VarLenIntEncoderDecoder.VarLenIntDecoder decoder = VarLenIntEncoderDecoder.createDecoder();
public TypeAwareTupleWriter(ITypeTraits[] typeTraits) {
this.typeTraits = typeTraits;
@@ -73,13 +74,11 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
}
// write field slots for variable length fields
- encDec.reset(targetBuf, runner);
for (int i = 0; i < tuple.getFieldCount(); i++) {
if (!typeTraits[i].isFixedLength()) {
- encDec.encode(tuple.getFieldLength(i));
+ runner += VarLenIntEncoderDecoder.encode(tuple.getFieldLength(i), targetBuf, runner);
}
}
- runner = encDec.getPos();
// write data fields
for (int i = 0; i < tuple.getFieldCount(); i++) {
@@ -100,13 +99,11 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
}
// write field slots for variable length fields
- encDec.reset(targetBuf, runner);
for (int i = startField; i < startField + numFields; i++) {
if (!typeTraits[i].isFixedLength()) {
- encDec.encode(tuple.getFieldLength(i));
+ runner += VarLenIntEncoderDecoder.encode(tuple.getFieldLength(i), targetBuf, runner);
}
}
- runner = encDec.getPos();
for (int i = startField; i < startField + numFields; i++) {
System.arraycopy(tuple.getFieldData(i), tuple.getFieldStart(i), targetBuf, runner, tuple.getFieldLength(i));
@@ -124,7 +121,7 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
int fieldSlotBytes = 0;
for (int i = 0; i < tuple.getFieldCount(); i++) {
if (!typeTraits[i].isFixedLength()) {
- fieldSlotBytes += encDec.getBytesRequired(tuple.getFieldLength(i));
+ fieldSlotBytes += VarLenIntEncoderDecoder.getBytesRequired(tuple.getFieldLength(i));
}
}
return fieldSlotBytes;
@@ -138,7 +135,7 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
int fieldSlotBytes = 0;
for (int i = startField; i < startField + numFields; i++) {
if (!typeTraits[i].isFixedLength()) {
- fieldSlotBytes += encDec.getBytesRequired(tuple.getFieldLength(i));
+ fieldSlotBytes += VarLenIntEncoderDecoder.getBytesRequired(tuple.getFieldLength(i));
}
}
return fieldSlotBytes;
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
deleted file mode 100644
index cd3d366..0000000
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hyracks.storage.am.common.tuples;
-
-// encodes positive integers in a variable-byte format
-
-public class VarLenIntEncoderDecoder {
- public static final int ENCODE_MASK = 0x0000007F;
- public static final byte CONTINUE_CHUNK = (byte) 0x80;
- public static final byte DECODE_MASK = (byte) 0x7F;
-
- private byte[] encTmp = new byte[5];
-
- private int pos;
- private byte[] bytes;
-
- public void reset(byte[] bytes, int pos) {
- this.bytes = bytes;
- this.pos = pos;
- }
-
- public int encode(int val) {
- int origPos = 0;
- int tmpPos = 0;
- while (val > ENCODE_MASK) {
- encTmp[tmpPos++] = (byte) (val & ENCODE_MASK);
- val = val >>> 7;
- }
- encTmp[tmpPos++] = (byte) (val);
-
- // reverse order to optimize for decoding speed
- for (int i = 0; i < tmpPos - 1; i++) {
- bytes[pos++] = (byte) (encTmp[tmpPos - 1 - i] | CONTINUE_CHUNK);
- }
- bytes[pos++] = encTmp[0];
-
- return pos - origPos;
- }
-
- public int decode() {
- int sum = 0;
- while ((bytes[pos] & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
- sum = (sum + (bytes[pos] & DECODE_MASK)) << 7;
- pos++;
- }
- sum += bytes[pos++];
- return sum;
- }
-
- // calculate the number of bytes needed for encoding
- public int getBytesRequired(int val) {
- int byteCount = 0;
- while (val > ENCODE_MASK) {
- val = val >>> 7;
- byteCount++;
- }
- return byteCount + 1;
- }
-
- public int getPos() {
- return pos;
- }
-
- // fast encoding, slow decoding version
- /*
- * public void encode(int val) { while(val > ENCODE_MASK) { bytes[pos++] =
- * (byte)(((byte)(val & ENCODE_MASK)) | CONTINUE_CHUNK); val = val >>> 7; }
- * bytes[pos++] = (byte)(val); }
- *
- * public int decode() { int sum = 0; int shift = 0; while( (bytes[pos] &
- * CONTINUE_CHUNK) == CONTINUE_CHUNK) { sum = (sum + (bytes[pos] &
- * DECODE_MASK)) << 7 * shift++; pos++; } sum += bytes[pos++] << 7 * shift;
- * return sum; }
- */
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
index af70253..dc5282e 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
@@ -17,41 +17,46 @@
! under the License.
!-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
- <parent>
- <artifactId>hyracks</artifactId>
- <groupId>org.apache.hyracks</groupId>
- <version>0.2.17-SNAPSHOT</version>
- <relativePath>..</relativePath>
- </parent>
+ <parent>
+ <artifactId>hyracks</artifactId>
+ <groupId>org.apache.hyracks</groupId>
+ <version>0.2.17-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
- <licenses>
- <license>
- <name>Apache License, Version 2.0</name>
- <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
- <distribution>repo</distribution>
- <comments>A business-friendly OSS license</comments>
- </license>
- </licenses>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- </dependencies>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-storage-am-btree</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-storage-am-lsm-common</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
index f536a67..7d34198 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
@@ -50,7 +50,7 @@ import org.apache.hyracks.storage.am.lsm.invertedindex.util.ObjectCache;
public abstract class AbstractTOccurrenceSearcher implements IInvertedIndexSearcher {
protected static final RecordDescriptor QUERY_TOKEN_REC_DESC = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
protected final int OBJECT_CACHE_INIT_SIZE = 10;
protected final int OBJECT_CACHE_EXPAND_SIZE = 10;
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
index 1460857..9d4446f 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
@@ -19,19 +19,16 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokenizer {
- protected byte[] data;
- protected int start;
- protected int length;
- protected int tokenLength;
- protected int index;
- protected int originalIndex;
- protected int utf8Length;
- protected boolean tokenCountCalculated = false;
- protected short tokenCount;
+ protected byte[] sentenceBytes;
+ protected int sentenceStartOffset;
+ protected int sentenceEndOffset;
+ protected int sentenceUtf8Length;
+
+ protected int byteIndex;
protected final IntArray tokensStart;
protected final IntArray tokensLength;
@@ -59,27 +56,27 @@ public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokeni
return token;
}
+ //TODO: This UTF8Tokenizer strongly relies on the Asterix data format,
+ // i.e. the TypeTag and the byteIndex increasing both assume the given byte[] sentence
+ // is an AString object. A better way (if we want to keep the byte[] interface) would be
+ // giving this tokenizer the pure UTF8 character sequence whose {@code start} is the start
+ // of the first character, and move the shift offset to the caller.
@Override
- public void reset(byte[] data, int start, int length) {
- this.start = start;
- index = this.start;
+ public void reset(byte[] sentenceData, int start, int length) {
+ this.sentenceBytes = sentenceData;
+ this.sentenceStartOffset = start;
+ this.sentenceEndOffset = length + start;
+
+ byteIndex = this.sentenceStartOffset;
if (sourceHasTypeTag) {
- index++; // skip type tag
+ byteIndex++; // skip type tag
}
- utf8Length = UTF8StringPointable.getUTFLength(data, index);
- index += 2; // skip utf8 length indicator
- this.data = data;
- this.length = length + start;
+ sentenceUtf8Length = UTF8StringUtil.getUTFLength(sentenceData, byteIndex);
+ byteIndex += UTF8StringUtil.getNumBytesToStoreLength(sentenceUtf8Length); // skip utf8 length indicator
- tokenLength = 0;
if (!ignoreTokenCount) {
tokensStart.reset();
tokensLength.reset();
}
-
- // Needed for calculating the number of tokens
- originalIndex = index;
- tokenCountCalculated = false;
- tokenCount = 0;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
index 7aeb6fa..9613fb9 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
@@ -21,17 +21,18 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
import java.io.DataOutput;
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public abstract class AbstractUTF8Token implements IToken {
public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
- protected int length;
+ protected byte[] data;
+ protected int startOffset;
+ protected int endOffset;
protected int tokenLength;
- protected int start;
protected int tokenCount;
- protected byte[] data;
protected final byte tokenTypeTag;
protected final byte countTypeTag;
@@ -51,24 +52,24 @@ public abstract class AbstractUTF8Token implements IToken {
}
@Override
- public int getLength() {
- return length;
+ public int getEndOffset() {
+ return endOffset;
}
- public int getLowerCaseUTF8Len(int size) {
+ public int getLowerCaseUTF8Len(int limit) {
int lowerCaseUTF8Len = 0;
- int pos = start;
- for (int i = 0; i < size; i++) {
- char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
- lowerCaseUTF8Len += UTF8StringPointable.getModifiedUTF8Len(c);
- pos += UTF8StringPointable.charSize(data, pos);
+ int pos = startOffset;
+ for (int i = 0; i < limit; i++) {
+ char c = Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
+ lowerCaseUTF8Len += UTF8StringUtil.getModifiedUTF8Len(c);
+ pos += UTF8StringUtil.charSize(data, pos);
}
return lowerCaseUTF8Len;
}
@Override
- public int getStart() {
- return start;
+ public int getStartOffset() {
+ return startOffset;
}
@Override
@@ -88,11 +89,20 @@ public abstract class AbstractUTF8Token implements IToken {
}
}
+ /**
+ * Note: the {@code startOffset} is the offset of first character, not the string length offset
+ *
+ * @param data
+ * @param startOffset
+ * @param endOffset
+ * @param tokenLength
+ * @param tokenCount the count of this token in a document , or a record, or something else.
+ */
@Override
- public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
+ public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount) {
this.data = data;
- this.start = start;
- this.length = length;
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
this.tokenLength = tokenLength;
this.tokenCount = tokenCount;
}
@@ -102,4 +112,38 @@ public abstract class AbstractUTF8Token implements IToken {
handleCountTypeTag(out.getDataOutput());
out.getDataOutput().writeInt(tokenCount);
}
+
+ // The preChar and postChar are required to be a single byte utf8 char, e.g. ASCII char.
+ protected void serializeToken(UTF8StringBuilder builder, GrowableArray out, int numPreChars, int numPostChars,
+ char preChar, char postChar)
+ throws IOException {
+
+ handleTokenTypeTag(out.getDataOutput());
+
+ assert UTF8StringUtil.getModifiedUTF8Len(preChar) == 1 && UTF8StringUtil.getModifiedUTF8Len(postChar) == 1;
+ int actualUtfLen = endOffset - startOffset;
+
+ builder.reset(out, actualUtfLen + numPreChars + numPostChars);
+ // pre chars
+ for (int i = 0; i < numPreChars; i++) {
+ builder.appendChar(preChar);
+ }
+
+ /// regular chars
+ int numRegChars = tokenLength - numPreChars - numPostChars;
+ int pos = startOffset;
+ for (int i = 0; i < numRegChars; i++) {
+ char c = Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
+ builder.appendChar(c);
+ pos += UTF8StringUtil.charSize(data, pos);
+ }
+
+ // post chars
+ for (int i = 0; i < numPostChars; i++) {
+ builder.appendChar(postChar);
+ }
+
+ builder.finish();
+ }
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
index ddf3a43..f6d6be4 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
@@ -19,54 +19,66 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
+ protected short tokenCount;
+ private boolean tokenCountCalculated;
+ private int originalIndex;
+
public DelimitedUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
ITokenFactory tokenFactory) {
super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
}
@Override
+ public void reset(byte[] sentenceData, int start, int length) {
+ super.reset(sentenceData, start, length);
+ // Needed for calculating the number of tokens
+ tokenCount = 0;
+ tokenCountCalculated = false;
+ originalIndex = byteIndex;
+ }
+
+ @Override
public boolean hasNext() {
// skip delimiters
- while (index < length && isSeparator(UTF8StringPointable.charAt(data, index))) {
- index += UTF8StringPointable.charSize(data, index);
+ while (byteIndex < sentenceEndOffset && isSeparator(UTF8StringUtil.charAt(sentenceBytes, byteIndex))) {
+ byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
}
- return index < length;
+ return byteIndex < sentenceEndOffset;
}
- private boolean isSeparator(char c) {
- return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+ private static boolean isSeparator(char c) {
+ return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER
+ || Character.getType(c) == Character.OTHER_NUMBER);
}
@Override
public void next() {
- tokenLength = 0;
- int currentTokenStart = index;
- while (index < length && !isSeparator(UTF8StringPointable.charAt(data, index))) {
- index += UTF8StringPointable.charSize(data, index);
+ int tokenLength = 0;
+ int currentTokenStart = byteIndex;
+ while (byteIndex < sentenceEndOffset && !isSeparator(UTF8StringUtil.charAt(sentenceBytes, byteIndex))) {
+ byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
tokenLength++;
}
- int tokenCount = 1;
+ int curTokenCount = 1;
if (tokenLength > 0 && !ignoreTokenCount) {
// search if we got the same token before
for (int i = 0; i < tokensStart.length(); ++i) {
if (tokenLength == tokensLength.get(i)) {
int tokenStart = tokensStart.get(i);
- tokenCount++; // assume we found it
+ curTokenCount++; // assume we found it
int offset = 0;
- int currLength = 0;
- while (currLength < tokenLength) {
+ for (int charPos= 0; charPos < tokenLength; charPos++) {
// case insensitive comparison
- if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
- .toLowerCase(UTF8StringPointable.charAt(data, tokenStart + offset))) {
- tokenCount--;
+ if (Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, currentTokenStart + offset))
+ != Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, tokenStart + offset))) {
+ curTokenCount--;
break;
}
- offset += UTF8StringPointable.charSize(data, currentTokenStart + offset);
- currLength++;
+ offset += UTF8StringUtil.charSize(sentenceBytes, currentTokenStart + offset);
}
}
}
@@ -76,16 +88,19 @@ public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinary
}
// set token
- token.reset(data, currentTokenStart, index, tokenLength, tokenCount);
+ token.reset(sentenceBytes, currentTokenStart, byteIndex, tokenLength, curTokenCount);
+ tokenCount++;
}
+
+ // TODO Why we bother to get the tokenCount in advance? It seems a caller's problem.
@Override
public short getTokensCount() {
if (!tokenCountCalculated) {
tokenCount = 0;
boolean previousCharIsSeparator = true;
- while (originalIndex < length) {
- if (isSeparator(UTF8StringPointable.charAt(data, originalIndex))) {
+ while (originalIndex < sentenceEndOffset) {
+ if (isSeparator(UTF8StringUtil.charAt(sentenceBytes, originalIndex))) {
previousCharIsSeparator = true;
} else {
if (previousCharIsSeparator) {
@@ -93,7 +108,7 @@ public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinary
previousCharIsSeparator = false;
}
}
- originalIndex += UTF8StringPointable.charSize(data, originalIndex);
+ originalIndex += UTF8StringUtil.charSize(sentenceBytes, originalIndex);
}
}
return tokenCount;
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
index 43ee3c0..8ffd355 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
@@ -21,8 +21,8 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class HashedUTF8NGramToken extends UTF8NGramToken {
public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
@@ -43,11 +43,11 @@ public class HashedUTF8NGramToken extends UTF8NGramToken {
// regular chars
int numRegGrams = tokenLength - numPreChars - numPostChars;
- int pos = start;
+ int pos = startOffset;
for (int i = 0; i < numRegGrams; i++) {
- hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
+ hash ^= Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
- pos += UTF8StringPointable.charSize(data, pos);
+ pos += UTF8StringUtil.charSize(data, pos);
}
// post chars
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
index 18f958d..150ffd6 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
@@ -21,8 +21,8 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class HashedUTF8WordToken extends UTF8WordToken {
@@ -46,11 +46,11 @@ public class HashedUTF8WordToken extends UTF8WordToken {
}
int offset = 0;
for (int i = 0; i < tokenLength; i++) {
- if (UTF8StringPointable.charAt(t.getData(), t.getStart() + offset) != UTF8StringPointable.charAt(data,
- start + offset)) {
+ if (UTF8StringUtil.charAt(t.getData(), t.getStartOffset() + offset) != UTF8StringUtil.charAt(data,
+ startOffset + offset)) {
return false;
}
- offset += UTF8StringPointable.charSize(data, start + offset);
+ offset += UTF8StringUtil.charSize(data, startOffset + offset);
}
return true;
}
@@ -61,16 +61,16 @@ public class HashedUTF8WordToken extends UTF8WordToken {
}
@Override
- public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
- super.reset(data, start, length, tokenLength, tokenCount);
+ public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount) {
+ super.reset(data, startOffset, endOffset, tokenLength, tokenCount);
// pre-compute hash value using JAQL-like string hashing
- int pos = start;
+ int pos = startOffset;
hash = GOLDEN_RATIO_32;
for (int i = 0; i < tokenLength; i++) {
- hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
+ hash ^= Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
- pos += UTF8StringPointable.charSize(data, pos);
+ pos += UTF8StringUtil.charSize(data, pos);
}
hash += tokenCount;
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
index d48af44..cb1b098 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
@@ -24,18 +24,26 @@ import java.io.IOException;
import org.apache.hyracks.data.std.util.GrowableArray;
public interface IToken {
- public byte[] getData();
+ public byte[] getData();
- public int getLength();
+ public int getEndOffset();
- public int getStart();
+ public int getStartOffset();
- public int getTokenLength();
+ public int getTokenLength();
- public void reset(byte[] data, int start, int length, int tokenLength,
- int tokenCount);
+ /**
+ * reset the storage byte array.
+ *
+ * @param data
+ * @param startOffset
+ * @param endOffset
+ * @param tokenLength
+ * @param tokenCount the count of this token in a document , or a record, or something else.
+ */
+ public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount);
- public void serializeToken(GrowableArray out) throws IOException;
+ public void serializeToken(GrowableArray out) throws IOException;
- public void serializeTokenCount(GrowableArray out) throws IOException;
+ public void serializeTokenCount(GrowableArray out) throws IOException;
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
index def7ad2..9161a54 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
@@ -19,7 +19,7 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
@@ -50,7 +50,7 @@ public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryToke
@Override
public void next() {
- int currentTokenStart = index;
+ int currentTokenStart = byteIndex;
int tokenCount = 1;
int numPreChars = 0;
int numPostChars = 0;
@@ -62,46 +62,48 @@ public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryToke
concreteToken.setNumPrePostChars(numPreChars, numPostChars);
if (numPreChars == 0) {
- index += UTF8StringPointable.charSize(data, index);
+ byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
}
// compute token count
// ignore pre and post grams for duplicate detection
if (!ignoreTokenCount && numPreChars == 0 && numPostChars == 0) {
- int tmpIndex = start + 2; // skip utf8 length indicator
+ int tmpIndex = sentenceStartOffset;
if (sourceHasTypeTag) {
tmpIndex++; // skip type tag
}
+ int utfLength = UTF8StringUtil.getUTFLength(sentenceBytes, tmpIndex);
+ tmpIndex += UTF8StringUtil.getNumBytesToStoreLength(utfLength); // skip utf8 length indicator
while (tmpIndex < currentTokenStart) {
tokenCount++; // assume found
int offset = 0;
for (int j = 0; j < gramLength; j++) {
- if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
- .toLowerCase(UTF8StringPointable.charAt(data, tmpIndex + offset))) {
+ if (Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, currentTokenStart + offset))
+ != Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, tmpIndex + offset))) {
tokenCount--;
break;
}
- offset += UTF8StringPointable.charSize(data, tmpIndex + offset);
+ offset += UTF8StringUtil.charSize(sentenceBytes, tmpIndex + offset);
}
- tmpIndex += UTF8StringPointable.charSize(data, tmpIndex);
+ tmpIndex += UTF8StringUtil.charSize(sentenceBytes, tmpIndex);
}
}
// set token
- token.reset(data, currentTokenStart, length, gramLength, tokenCount);
+ token.reset(sentenceBytes, currentTokenStart, sentenceEndOffset, gramLength, tokenCount);
}
@Override
- public void reset(byte[] data, int start, int length) {
- super.reset(data, start, length);
+ public void reset(byte[] sentenceData, int start, int length) {
+ super.reset(sentenceData, start, length);
gramNum = 0;
int numChars = 0;
- int pos = index;
- int end = pos + utf8Length;
+ int pos = byteIndex;
+ int end = pos + sentenceUtf8Length;
while (pos < end) {
numChars++;
- pos += UTF8StringPointable.charSize(data, pos);
+ pos += UTF8StringUtil.charSize(sentenceData, pos);
}
if (usePrePost) {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
index 7d68d6f..259288c 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
@@ -21,9 +21,8 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
-import org.apache.hyracks.dataflow.common.data.util.StringUtils;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
@@ -34,6 +33,8 @@ public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
protected int numPreChars;
protected int numPostChars;
+ private UTF8StringBuilder builder = new UTF8StringBuilder();
+
public UTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
super(tokenTypeTag, countTypeTag);
}
@@ -50,38 +51,7 @@ public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
@Override
public void serializeToken(GrowableArray out) throws IOException {
- handleTokenTypeTag(out.getDataOutput());
- int tokenUTF8LenOff = out.getLength();
-
- // regular chars
- int numRegChars = tokenLength - numPreChars - numPostChars;
-
- // assuming pre and post char need 1-byte each in utf8
- int tokenUTF8Len = numPreChars + numPostChars;
-
- // Write dummy UTF length which will be correctly set later.
- out.getDataOutput().writeShort(0);
-
- // pre chars
- for (int i = 0; i < numPreChars; i++) {
- StringUtils.writeCharAsModifiedUTF8(PRECHAR, out.getDataOutput());
- }
-
- int pos = start;
- for (int i = 0; i < numRegChars; i++) {
- char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
- tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
- pos += UTF8StringPointable.charSize(data, pos);
- }
-
- // post chars
- for (int i = 0; i < numPostChars; i++) {
- StringUtils.writeCharAsModifiedUTF8(POSTCHAR, out.getDataOutput());
- }
-
- // Set UTF length of token.
- out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
- out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
+ super.serializeToken(builder, out, numPreChars, numPostChars, PRECHAR, POSTCHAR);
}
public void setNumPrePostChars(int numPreChars, int numPostChars) {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
index caaa682..bc7085c 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
@@ -21,31 +21,21 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
-import org.apache.hyracks.dataflow.common.data.util.StringUtils;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
public class UTF8WordToken extends AbstractUTF8Token {
+ private static char NULL_PLACEHOLDER = 1; // can't be 0, cause utf8 modified char will use 2 bytes to write 0
+
+ private UTF8StringBuilder builder = new UTF8StringBuilder();
+
public UTF8WordToken(byte tokenTypeTag, byte countTypeTag) {
super(tokenTypeTag, countTypeTag);
}
@Override
public void serializeToken(GrowableArray out) throws IOException {
- handleTokenTypeTag(out.getDataOutput());
- int tokenUTF8LenOff = out.getLength();
- int tokenUTF8Len = 0;
- // Write dummy UTF length which will be correctly set later.
- out.getDataOutput().writeShort(0);
- int pos = start;
- for (int i = 0; i < tokenLength; i++) {
- char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
- tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
- pos += UTF8StringPointable.charSize(data, pos);
- }
- // Set UTF length of token.
- out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
- out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
+ super.serializeToken(builder, out, 0, 0, NULL_PLACEHOLDER, NULL_PLACEHOLDER);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java b/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
index d2332f0..40b0481 100644
--- a/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
+++ b/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
@@ -24,6 +24,7 @@ import java.nio.ByteBuffer;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
public class RTreeTypeAwareTupleWriter extends TypeAwareTupleWriter {
@@ -41,13 +42,11 @@ public class RTreeTypeAwareTupleWriter extends TypeAwareTupleWriter {
// write field slots for variable length fields
// since the r-tree has fixed length keys, we don't actually need this?
- encDec.reset(targetBuf.array(), runner);
for (int i = startField; i < startField + refs.length; i++) {
if (!typeTraits[i].isFixedLength()) {
- encDec.encode(refs[i].getFieldLength(i));
+ runner += VarLenIntEncoderDecoder.encode(refs[i].getFieldLength(i), targetBuf.array(), runner);
}
}
- runner = encDec.getPos();
// write data
for (int i = 0; i < refs.length; i++) {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
index f79997b..b8f2166 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
@@ -164,8 +164,8 @@ public abstract class OrderedIndexExamplesTest {
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
@@ -324,8 +324,8 @@ public abstract class OrderedIndexExamplesTest {
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
@@ -408,8 +408,8 @@ public abstract class OrderedIndexExamplesTest {
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
@@ -514,8 +514,8 @@ public abstract class OrderedIndexExamplesTest {
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
index 160f9bf..e181710 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
@@ -120,7 +120,7 @@ public abstract class OrderedIndexMultiThreadTest {
@Test
public void oneStringKeyAndValue() throws InterruptedException, TreeIndexException, HyracksException {
ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
int numKeys = 1;
String dataMsg = "One String Key And Value";
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
index 0ec313b..b1e8a8c 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
@@ -124,8 +124,8 @@ public abstract class OrderedIndexTestDriver {
LOGGER.info("BTree " + getTestOpName() + " Test With One String Key And Value.");
}
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Range search in ["cbf", cc7"]
ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf");
@@ -142,8 +142,8 @@ public abstract class OrderedIndexTestDriver {
LOGGER.info("BTree " + getTestOpName() + " Test With Two String Keys.");
}
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Range search in ["cbf", "ddd", cc7", "eee"]
ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf", "ddd");
@@ -164,9 +164,9 @@ public abstract class OrderedIndexTestDriver {
LOGGER.info("BTree " + getTestOpName() + " Test With Two String Keys And Values.");
}
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Range search in ["cbf", "ddd", cc7", "eee"]
ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf", "ddd");
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
index 6cd81c3..a3029f8 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
@@ -210,7 +210,7 @@ public abstract class AbstractRTreeExamplesTest {
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
// Declare RTree keys.
int rtreeKeyFieldCount = 4;
@@ -350,7 +350,7 @@ public abstract class AbstractRTreeExamplesTest {
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
// Declare RTree keys.
int rtreeKeyFieldCount = 4;
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
index 49df30f..80a69c4 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
@@ -135,9 +135,9 @@ public class BloomFilterTest extends AbstractBloomFilterTest {
bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 5;
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
index 1d7aa90..3284f8d 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
@@ -109,7 +109,7 @@ public class MurmurHashForITupleReferenceTest extends AbstractBloomFilterTest {
}
int fieldCount = 2;
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
String s = randomString(100, rnd);
@@ -137,8 +137,8 @@ public class MurmurHashForITupleReferenceTest extends AbstractBloomFilterTest {
}
int fieldCount = 3;
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
String s1 = randomString(40, rnd);
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
index a7215a5..d537bf9 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
@@ -161,14 +161,14 @@ public class LSMBTreeTuplesTest {
testLSMBTreeTuple(intFields);
ISerializerDeserializer[] stringFields = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
testLSMBTreeTuple(stringFields);
ISerializerDeserializer[] mixedFields = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE };
testLSMBTreeTuple(mixedFields);
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
index f2896cb..11a57a2 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
@@ -17,40 +17,47 @@
! under the License.
!-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
- <parent>
- <artifactId>hyracks-tests</artifactId>
- <groupId>org.apache.hyracks</groupId>
- <version>0.2.17-SNAPSHOT</version>
- <relativePath>..</relativePath>
- </parent>
+ <parent>
+ <artifactId>hyracks-tests</artifactId>
+ <groupId>org.apache.hyracks</groupId>
+ <version>0.2.17-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-test-support</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-data-std</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
- <dependencies>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-test-support</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-data-std</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>test</scope>
- </dependency>
- </dependencies>
-
-</project>
+</project>
\ No newline at end of file