You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ct...@apache.org on 2019/07/22 21:31:12 UTC
[accumulo] branch 2.0 updated: Fix #1252 Created SequenceLexicoder
(#1257)
This is an automated email from the ASF dual-hosted git repository.
ctubbsii pushed a commit to branch 2.0
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.0 by this push:
new 632a210 Fix #1252 Created SequenceLexicoder (#1257)
632a210 is described below
commit 632a2100fcebc1d0a20e6fe608f8946ea9c37d73
Author: Richard W. Eggert II <ri...@gmail.com>
AuthorDate: Mon Jul 22 17:31:07 2019 -0400
Fix #1252 Created SequenceLexicoder (#1257)
created SequenceLexicoder that supports empty lists
---
.../core/client/lexicoder/ListLexicoder.java | 6 ++
.../core/client/lexicoder/SequenceLexicoder.java | 96 ++++++++++++++++++++++
.../core/client/lexicoder/ListLexicoderTest.java | 6 ++
.../client/lexicoder/SequenceLexicoderTest.java | 78 ++++++++++++++++++
4 files changed, 186 insertions(+)
diff --git a/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java
index 2e53976..a2fd35f 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java
@@ -30,6 +30,9 @@ import org.apache.accumulo.core.clientImpl.lexicoder.AbstractLexicoder;
* A lexicoder to encode/decode a Java List to/from a byte array where the concatenation of each
* encoded element sorts lexicographically.
*
+ * Note: Empty lists are not supported. See {@link SequenceLexicoder} for an encoding that supports
+ * empty lists.
+ *
* @since 1.6.0
*/
public class ListLexicoder<LT> extends AbstractLexicoder<List<LT>> {
@@ -47,6 +50,9 @@ public class ListLexicoder<LT> extends AbstractLexicoder<List<LT>> {
*/
@Override
public byte[] encode(List<LT> v) {
+ if (v.isEmpty()) {
+ throw new IllegalArgumentException("ListLexicoder does not support empty lists");
+ }
byte[][] encElements = new byte[v.size()][];
int index = 0;
diff --git a/core/src/main/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoder.java b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoder.java
new file mode 100644
index 0000000..8baa59b
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoder.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.lexicoder;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.concat;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.escape;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.split;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.unescape;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.accumulo.core.clientImpl.lexicoder.AbstractLexicoder;
+
+/**
+ * A Lexicoder to encode/decode a Java List to/from a byte array where the concatenation of each
+ * encoded element sorts lexicographically.
+ *
+ * Note: Unlike {@link ListLexicoder}, this implementation supports empty lists.
+ *
+ * The lists are encoded with the elements separated by null (0x0) bytes, which null bytes appearing
+ * in the elements escaped as two 0x1 bytes, and 0x1 bytes appearing in the elements escaped as 0x1
+ * and 0x2 bytes. The list is terminated with a final delimiter after the last element, with no
+ * bytes following it. An empty list is represented as an empty byte array, with no delimiter,
+ * whereas a list with a single empty element is represented as a single terminating delimiter.
+ *
+ * @since 2.0.0
+ * @param <E>
+ * list element type.
+ */
+public class SequenceLexicoder<E> extends AbstractLexicoder<List<E>> {
+
+ private static final byte[] EMPTY_ELEMENT = new byte[0];
+ private final Lexicoder<E> elementLexicoder;
+
+ /**
+ * Primary constructor.
+ *
+ * @param elementLexicoder
+ * Lexicoder to apply to elements.
+ */
+ public SequenceLexicoder(final Lexicoder<E> elementLexicoder) {
+ this.elementLexicoder = requireNonNull(elementLexicoder, "elementLexicoder");
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @return a byte array containing the concatenation of each element in the list encoded.
+ */
+ @Override
+ public byte[] encode(final List<E> v) {
+ final byte[][] encElements = new byte[v.size() + 1][];
+ int index = 0;
+ for (final E element : v) {
+ encElements[index++] = escape(elementLexicoder.encode(element));
+ }
+ encElements[v.size()] = EMPTY_ELEMENT;
+ return concat(encElements);
+ }
+
+ @Override
+ protected List<E> decodeUnchecked(final byte[] b, final int offset, final int len) {
+ final byte[][] escapedElements = split(b, offset, len);
+ assert escapedElements.length
+ > 0 : "ByteUtils.split always returns a minimum of 1 element, even for empty input";
+ // There should be no bytes after the final delimiter. Lack of delimiter indicates empty list.
+ final byte[] lastElement = escapedElements[escapedElements.length - 1];
+ if (lastElement.length > 0) {
+ throw new IllegalArgumentException(
+ lastElement.length + " trailing bytes found at end of list");
+ }
+ final ArrayList<E> decodedElements = new ArrayList<>(escapedElements.length - 1);
+ for (int i = 0; i < escapedElements.length - 1; i++) {
+ final byte[] escapedElement = escapedElements[i];
+ final byte[] unescapedElement = unescape(escapedElement);
+ decodedElements.add(elementLexicoder.decode(unescapedElement));
+ }
+ return decodedElements;
+ }
+}
diff --git a/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java
index 6f9dbed..352a0c3 100644
--- a/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.accumulo.core.client.lexicoder;
+import static java.util.Collections.emptyList;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
@@ -92,4 +93,9 @@ public class ListLexicoderTest extends AbstractLexicoderTest {
assertDecodes(new ListLexicoder<>(new LongLexicoder()), data4);
assertDecodes(new ListLexicoder<>(new LongLexicoder()), data5);
}
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testRejectsEmptyLists() {
+ new ListLexicoder<>(new LongLexicoder()).encode(emptyList());
+ }
}
diff --git a/core/src/test/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoderTest.java b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoderTest.java
new file mode 100644
index 0000000..9ef0984
--- /dev/null
+++ b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoderTest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.lexicoder;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.singletonList;
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.clientImpl.lexicoder.AbstractLexicoderTest;
+import org.apache.accumulo.core.util.TextUtil;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link SequenceLexicoder}.
+ */
+public class SequenceLexicoderTest extends AbstractLexicoderTest {
+
+ private final List<String> nodata = emptyList();
+ private final List<String> data0 = singletonList("");
+ private final List<String> data1 = asList("a", "b");
+ private final List<String> data2 = singletonList("a");
+ private final List<String> data3 = asList("a", "c");
+ private final List<String> data4 = asList("a", "b", "c");
+ private final List<String> data5 = asList("b", "a");
+
+ @Test
+ public void testSortOrder() {
+ // expected sort order
+ final List<List<String>> data = asList(nodata, data0, data2, data1, data4, data3, data5);
+ final TreeSet<Text> sortedEnc = new TreeSet<>();
+ final SequenceLexicoder<String> sequenceLexicoder =
+ new SequenceLexicoder<>(new StringLexicoder());
+ for (final List<String> list : data) {
+ sortedEnc.add(new Text(sequenceLexicoder.encode(list)));
+ }
+ final List<List<String>> unenc = new ArrayList<>();
+ for (final Text enc : sortedEnc) {
+ unenc.add(sequenceLexicoder.decode(TextUtil.getBytes(enc)));
+ }
+ assertEquals(data, unenc);
+ }
+
+ @Test
+ public void testDecodes() {
+ assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), nodata);
+ assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data0);
+ assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data1);
+ assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data2);
+ assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data3);
+ assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data4);
+ assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data5);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void tesRejectsTrailingBytes() {
+ new SequenceLexicoder<>(new StringLexicoder()).decode(new byte[] {10});
+ }
+}