You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ct...@apache.org on 2019/07/22 21:31:12 UTC

[accumulo] branch 2.0 updated: Fix #1252 Created SequenceLexicoder (#1257)

This is an automated email from the ASF dual-hosted git repository.

ctubbsii pushed a commit to branch 2.0
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.0 by this push:
     new 632a210  Fix #1252 Created SequenceLexicoder (#1257)
632a210 is described below

commit 632a2100fcebc1d0a20e6fe608f8946ea9c37d73
Author: Richard W. Eggert II <ri...@gmail.com>
AuthorDate: Mon Jul 22 17:31:07 2019 -0400

    Fix #1252 Created SequenceLexicoder (#1257)
    
    created SequenceLexicoder that supports empty lists
---
 .../core/client/lexicoder/ListLexicoder.java       |  6 ++
 .../core/client/lexicoder/SequenceLexicoder.java   | 96 ++++++++++++++++++++++
 .../core/client/lexicoder/ListLexicoderTest.java   |  6 ++
 .../client/lexicoder/SequenceLexicoderTest.java    | 78 ++++++++++++++++++
 4 files changed, 186 insertions(+)

diff --git a/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java
index 2e53976..a2fd35f 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/ListLexicoder.java
@@ -30,6 +30,9 @@ import org.apache.accumulo.core.clientImpl.lexicoder.AbstractLexicoder;
  * A lexicoder to encode/decode a Java List to/from a byte array where the concatenation of each
  * encoded element sorts lexicographically.
  *
+ * Note: Empty lists are not supported. See {@link SequenceLexicoder} for an encoding that supports
+ * empty lists.
+ *
  * @since 1.6.0
  */
 public class ListLexicoder<LT> extends AbstractLexicoder<List<LT>> {
@@ -47,6 +50,9 @@ public class ListLexicoder<LT> extends AbstractLexicoder<List<LT>> {
    */
   @Override
   public byte[] encode(List<LT> v) {
+    if (v.isEmpty()) {
+      throw new IllegalArgumentException("ListLexicoder does not support empty lists");
+    }
     byte[][] encElements = new byte[v.size()][];
 
     int index = 0;
diff --git a/core/src/main/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoder.java b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoder.java
new file mode 100644
index 0000000..8baa59b
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoder.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.lexicoder;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.concat;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.escape;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.split;
+import static org.apache.accumulo.core.clientImpl.lexicoder.ByteUtils.unescape;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.accumulo.core.clientImpl.lexicoder.AbstractLexicoder;
+
+/**
+ * A Lexicoder to encode/decode a Java List to/from a byte array where the concatenation of each
+ * encoded element sorts lexicographically.
+ *
+ * Note: Unlike {@link ListLexicoder}, this implementation supports empty lists.
+ *
+ * The lists are encoded with the elements separated by null (0x0) bytes, which null bytes appearing
+ * in the elements escaped as two 0x1 bytes, and 0x1 bytes appearing in the elements escaped as 0x1
+ * and 0x2 bytes. The list is terminated with a final delimiter after the last element, with no
+ * bytes following it. An empty list is represented as an empty byte array, with no delimiter,
+ * whereas a list with a single empty element is represented as a single terminating delimiter.
+ *
+ * @since 2.0.0
+ * @param <E>
+ *          list element type.
+ */
+public class SequenceLexicoder<E> extends AbstractLexicoder<List<E>> {
+
+  private static final byte[] EMPTY_ELEMENT = new byte[0];
+  private final Lexicoder<E> elementLexicoder;
+
+  /**
+   * Primary constructor.
+   *
+   * @param elementLexicoder
+   *          Lexicoder to apply to elements.
+   */
+  public SequenceLexicoder(final Lexicoder<E> elementLexicoder) {
+    this.elementLexicoder = requireNonNull(elementLexicoder, "elementLexicoder");
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @return a byte array containing the concatenation of each element in the list encoded.
+   */
+  @Override
+  public byte[] encode(final List<E> v) {
+    final byte[][] encElements = new byte[v.size() + 1][];
+    int index = 0;
+    for (final E element : v) {
+      encElements[index++] = escape(elementLexicoder.encode(element));
+    }
+    encElements[v.size()] = EMPTY_ELEMENT;
+    return concat(encElements);
+  }
+
+  @Override
+  protected List<E> decodeUnchecked(final byte[] b, final int offset, final int len) {
+    final byte[][] escapedElements = split(b, offset, len);
+    assert escapedElements.length
+        > 0 : "ByteUtils.split always returns a minimum of 1 element, even for empty input";
+    // There should be no bytes after the final delimiter. Lack of delimiter indicates empty list.
+    final byte[] lastElement = escapedElements[escapedElements.length - 1];
+    if (lastElement.length > 0) {
+      throw new IllegalArgumentException(
+          lastElement.length + " trailing bytes found at end of list");
+    }
+    final ArrayList<E> decodedElements = new ArrayList<>(escapedElements.length - 1);
+    for (int i = 0; i < escapedElements.length - 1; i++) {
+      final byte[] escapedElement = escapedElements[i];
+      final byte[] unescapedElement = unescape(escapedElement);
+      decodedElements.add(elementLexicoder.decode(unescapedElement));
+    }
+    return decodedElements;
+  }
+}
diff --git a/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java
index 6f9dbed..352a0c3 100644
--- a/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/ListLexicoderTest.java
@@ -16,6 +16,7 @@
  */
 package org.apache.accumulo.core.client.lexicoder;
 
+import static java.util.Collections.emptyList;
 import static org.junit.Assert.assertEquals;
 
 import java.util.ArrayList;
@@ -92,4 +93,9 @@ public class ListLexicoderTest extends AbstractLexicoderTest {
     assertDecodes(new ListLexicoder<>(new LongLexicoder()), data4);
     assertDecodes(new ListLexicoder<>(new LongLexicoder()), data5);
   }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testRejectsEmptyLists() {
+    new ListLexicoder<>(new LongLexicoder()).encode(emptyList());
+  }
 }
diff --git a/core/src/test/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoderTest.java b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoderTest.java
new file mode 100644
index 0000000..9ef0984
--- /dev/null
+++ b/core/src/test/java/org/apache/accumulo/core/client/lexicoder/SequenceLexicoderTest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.lexicoder;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.singletonList;
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.clientImpl.lexicoder.AbstractLexicoderTest;
+import org.apache.accumulo.core.util.TextUtil;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link SequenceLexicoder}.
+ */
+public class SequenceLexicoderTest extends AbstractLexicoderTest {
+
+  private final List<String> nodata = emptyList();
+  private final List<String> data0 = singletonList("");
+  private final List<String> data1 = asList("a", "b");
+  private final List<String> data2 = singletonList("a");
+  private final List<String> data3 = asList("a", "c");
+  private final List<String> data4 = asList("a", "b", "c");
+  private final List<String> data5 = asList("b", "a");
+
+  @Test
+  public void testSortOrder() {
+    // expected sort order
+    final List<List<String>> data = asList(nodata, data0, data2, data1, data4, data3, data5);
+    final TreeSet<Text> sortedEnc = new TreeSet<>();
+    final SequenceLexicoder<String> sequenceLexicoder =
+        new SequenceLexicoder<>(new StringLexicoder());
+    for (final List<String> list : data) {
+      sortedEnc.add(new Text(sequenceLexicoder.encode(list)));
+    }
+    final List<List<String>> unenc = new ArrayList<>();
+    for (final Text enc : sortedEnc) {
+      unenc.add(sequenceLexicoder.decode(TextUtil.getBytes(enc)));
+    }
+    assertEquals(data, unenc);
+  }
+
+  @Test
+  public void testDecodes() {
+    assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), nodata);
+    assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data0);
+    assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data1);
+    assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data2);
+    assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data3);
+    assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data4);
+    assertDecodes(new SequenceLexicoder<>(new StringLexicoder()), data5);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void tesRejectsTrailingBytes() {
+    new SequenceLexicoder<>(new StringLexicoder()).decode(new byte[] {10});
+  }
+}