You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2021/10/20 23:50:24 UTC

[datasketches-java] branch tuple_serial_ver3 created (now c07e970)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch tuple_serial_ver3
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git.


      at c07e970  serial version 3 for compatibility with C++

This branch includes the following new commits:

     new c07e970  serial version 3 for compatibility with C++

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-java] 01/01: serial version 3 for compatibility with C++

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_serial_ver3
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit c07e970029c354f7ee33d090b2c02475f24c8758
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Oct 20 16:50:12 2021 -0700

    serial version 3 for compatibility with C++
---
 src/main/java/org/apache/datasketches/Family.java  |   2 +-
 .../apache/datasketches/tuple/CompactSketch.java   | 176 ++++++++++++---------
 .../datasketches/tuple/QuickSelectSketch.java      |   2 +
 .../datasketches/tuple/SerializerDeserializer.java |   2 +-
 .../apache/datasketches/tuple/IntegerSummary.java  |  80 ++++++++++
 .../tuple/IntegerSummaryDeserializer.java          |  33 ++++
 .../datasketches/tuple/IntegerSummaryFactory.java  |  35 ++++
 .../datasketches/tuple/SerialVersion3Test.java     | 101 ++++++++++++
 .../TupleWithTestIntegerSummary4kTrimmedSerVer2.sk | Bin 0 -> 49169 bytes
 src/test/resources/tuple-int-empty-cpp.sk          | Bin 0 -> 8 bytes
 src/test/resources/tuple-int-est-trim-cpp.sk       | Bin 0 -> 49176 bytes
 src/test/resources/tuple-int-single-cpp.sk         | Bin 0 -> 20 bytes
 src/test/resources/tuple-int-two-cpp.sk            | Bin 0 -> 40 bytes
 13 files changed, 354 insertions(+), 77 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/Family.java b/src/main/java/org/apache/datasketches/Family.java
index e2aec51..3518266 100644
--- a/src/main/java/org/apache/datasketches/Family.java
+++ b/src/main/java/org/apache/datasketches/Family.java
@@ -100,7 +100,7 @@ public enum Family {
    * The Tuple family of sketches is a large family of sketches that are extensions of the
    * Theta Sketch Framework.
    */
-  TUPLE(9, "TUPLE", 1, 1),
+  TUPLE(9, "TUPLE", 1, 3),
 
   /**
    * The Frequency family of sketches. (Not part of TSF.)
diff --git a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
index 9a76587..70e1b7d 100644
--- a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
@@ -41,11 +41,14 @@ import org.apache.datasketches.memory.Memory;
  */
 public class CompactSketch<S extends Summary> extends Sketch<S> {
   private static final byte serialVersionWithSummaryClassNameUID = 1;
-  private static final byte serialVersionUID = 2;
+  private static final byte serialVersionUID2 = 2;
+  private static final byte serialVersionUID = 3;
+  private static final short defaultSeedHash = (short) 37836; // for compatibility with C++
   private long[] hashArr_;
   private S[] summaryArr_;
 
-  private enum Flags { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
+  private enum FlagsV2 { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
+  private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED }
 
   /**
    * Create a CompactSketch from correct components
@@ -67,7 +70,6 @@ public class CompactSketch<S extends Summary> extends Sketch<S> {
    * @param mem Memory object with serialized CompactSketch
    * @param deserializer the SummaryDeserializer
    */
-  @SuppressWarnings({"unchecked"})
   CompactSketch(final Memory mem, final SummaryDeserializer<S> deserializer) {
     int offset = 0;
     final byte preambleLongs = mem.getByte(offset++);
@@ -80,49 +82,82 @@ public class CompactSketch<S extends Summary> extends Sketch<S> {
     }
     SerializerDeserializer
       .validateType(mem.getByte(offset++), SerializerDeserializer.SketchType.CompactSketch);
-    final byte flags = mem.getByte(offset++);
-    final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0;
-    if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
-      throw new SketchesArgumentException("Byte order mismatch");
-    }
-    empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
-    final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0;
-    if (isThetaIncluded) {
-      thetaLong_ = mem.getLong(offset);
-      offset += Long.BYTES;
-    } else {
-      thetaLong_ = Long.MAX_VALUE;
-    }
-    final boolean hasEntries = (flags & 1 << Flags.HAS_ENTRIES.ordinal()) > 0;
-    if (hasEntries) {
-      int classNameLength = 0;
-      if (version == serialVersionWithSummaryClassNameUID) {
-        classNameLength = mem.getByte(offset++);
+    if (version <= serialVersionUID2) { // legacy serial format
+      final byte flags = mem.getByte(offset++);
+      final boolean isBigEndian = (flags & 1 << FlagsV2.IS_BIG_ENDIAN.ordinal()) > 0;
+      if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+        throw new SketchesArgumentException("Byte order mismatch");
+      }
+      empty_ = (flags & 1 << FlagsV2.IS_EMPTY.ordinal()) > 0;
+      final boolean isThetaIncluded = (flags & 1 << FlagsV2.IS_THETA_INCLUDED.ordinal()) > 0;
+      if (isThetaIncluded) {
+        thetaLong_ = mem.getLong(offset);
+        offset += Long.BYTES;
+      } else {
+        thetaLong_ = Long.MAX_VALUE;
+      }
+      final boolean hasEntries = (flags & 1 << FlagsV2.HAS_ENTRIES.ordinal()) > 0;
+      if (hasEntries) {
+        int classNameLength = 0;
+        if (version == serialVersionWithSummaryClassNameUID) {
+          classNameLength = mem.getByte(offset++);
+        }
+        final int count = mem.getInt(offset);
+        offset += Integer.BYTES;
+        if (version == serialVersionWithSummaryClassNameUID) {
+          offset += classNameLength;
+        }
+        hashArr_ = new long[count];
+        for (int i = 0; i < count; i++) {
+          hashArr_[i] = mem.getLong(offset);
+          offset += Long.BYTES;
+        }
+        for (int i = 0; i < count; i++) {
+          offset += readSummary(mem, offset, i, count, deserializer);
+        }
       }
-      final int count = mem.getInt(offset);
-      offset += Integer.BYTES;
-      if (version == serialVersionWithSummaryClassNameUID) {
-        offset += classNameLength;
+    } else { // current serial format
+      offset++; // unused
+      final byte flags = mem.getByte(offset++);
+      offset += 2; // usused
+      empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
+      thetaLong_ = Long.MAX_VALUE;
+      int count = 0;
+      if (!empty_) {
+        if (preambleLongs == 1) {
+          count = 1;
+        } else {
+          count = mem.getInt(offset);
+          offset += Integer.BYTES;
+          offset += 4; // unused
+          if (preambleLongs > 2) {
+            thetaLong_ = mem.getLong(offset);
+            offset += Long.BYTES;
+          }
+        }
       }
       hashArr_ = new long[count];
       for (int i = 0; i < count; i++) {
         hashArr_[i] = mem.getLong(offset);
         offset += Long.BYTES;
-      }
-      for (int i = 0; i < count; i++) {
-        final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
-        final DeserializeResult<S> result = deserializer.heapifySummary(memRegion);
-        final S summary = result.getObject();
-        final Class<S> summaryType = (Class<S>) result.getObject().getClass();
-        offset += result.getSize();
-        if (summaryArr_ == null) {
-          summaryArr_ = (S[]) Array.newInstance(summaryType, count);
-        }
-        summaryArr_[i] = summary;
+        offset += readSummary(mem, offset, i, count, deserializer);
       }
     }
   }
 
+  @SuppressWarnings({"unchecked"})
+  private int readSummary(final Memory mem, final int offset, final int i, final int count, final SummaryDeserializer<S> deserializer) {
+    final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
+    final DeserializeResult<S> result = deserializer.heapifySummary(memRegion);
+    final S summary = result.getObject();
+    final Class<S> summaryType = (Class<S>) result.getObject().getClass();
+    if (summaryArr_ == null) {
+      summaryArr_ = (S[]) Array.newInstance(summaryType, count);
+    }
+    summaryArr_[i] = summary;
+    return result.getSize();
+  }
+
   @Override
   public CompactSketch<S> compact() {
     return this;
@@ -150,65 +185,56 @@ public class CompactSketch<S extends Summary> extends Sketch<S> {
   // Long || Start Byte Adr:
   // Adr:
   //      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
-  //  0   ||                          |  Flags | SkType | FamID  | SerVer |  Preamble_Longs    |
+  //  0   ||    seed hash    |  Flags | unused | SkType | FamID  | SerVer |  Preamble_Longs    |
   @SuppressWarnings("null")
   @Override
   public byte[] toByteArray() {
-    int summariesBytesLength = 0;
+	final int count = getRetainedEntries();
+    final boolean isSingleItem = count == 1 && !isEstimationMode();
+    final int preambleLongs = isEmpty() || isSingleItem ? 1 : isEstimationMode() ? 3 : 2;
+
+	int summariesSizeBytes = 0;
     byte[][] summariesBytes = null;
-    final int count = getRetainedEntries();
     if (count > 0) {
       summariesBytes = new byte[count][];
       for (int i = 0; i < count; i++) {
         summariesBytes[i] = summaryArr_[i].toByteArray();
-        summariesBytesLength += summariesBytes[i].length;
+        summariesSizeBytes += summariesBytes[i].length;
       }
     }
 
-    int sizeBytes =
-        Byte.BYTES // preamble longs
-      + Byte.BYTES // serial version
-      + Byte.BYTES // family id
-      + Byte.BYTES // sketch type
-      + Byte.BYTES; // flags
-    final boolean isThetaIncluded = thetaLong_ < Long.MAX_VALUE;
-    if (isThetaIncluded) {
-      sizeBytes += Long.BYTES; // theta
-    }
-    if (count > 0) {
-      sizeBytes +=
-        + Integer.BYTES // count
-        + Long.BYTES * count + summariesBytesLength;
-    }
+    int sizeBytes = Long.BYTES * preambleLongs + Long.BYTES * count + summariesSizeBytes;
     final byte[] bytes = new byte[sizeBytes];
     int offset = 0;
-    bytes[offset++] = PREAMBLE_LONGS;
+    bytes[offset++] = (byte) preambleLongs;
     bytes[offset++] = serialVersionUID;
     bytes[offset++] = (byte) Family.TUPLE.getID();
     bytes[offset++] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal();
-    final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+    offset++; // unused
     bytes[offset++] = (byte) (
-      (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
-      | (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0)
-      | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0)
-      | (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0)
+        (1 << Flags.IS_COMPACT.ordinal())
+      | (1 << Flags.IS_READ_ONLY.ordinal())
+      | (isEmpty() ? 1 << Flags.IS_EMPTY.ordinal() : 0)
     );
-    if (isThetaIncluded) {
-      ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
-      offset += Long.BYTES;
-    }
-    if (count > 0) {
-      ByteArrayUtil.putIntLE(bytes, offset, getRetainedEntries());
-      offset += Integer.BYTES;
-      for (int i = 0; i < count; i++) {
-        ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]);
-        offset += Long.BYTES;
-      }
-      for (int i = 0; i < count; i++) {
-        System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
-        offset += summariesBytes[i].length;
+    ByteArrayUtil.putShortLE(bytes, offset, defaultSeedHash);
+    offset += Short.BYTES;
+    if (!isEmpty()) {
+      if (!isSingleItem) {
+        ByteArrayUtil.putIntLE(bytes, offset, count);
+        offset += Integer.BYTES;
+        offset += 4; // unused
+        if (isEstimationMode()) {
+    	  ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
+    	  offset += Long.BYTES;
+        }
       }
     }
+    for (int i = 0; i < count; i++) {
+      ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]);
+      offset += Long.BYTES;
+      System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
+      offset += summariesBytes[i].length;
+    }
     return bytes;
   }
 
diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
index fd56b06..9295456 100644
--- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
@@ -141,6 +141,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
    * @param deserializer the SummaryDeserializer
    * @param summaryFactory the SummaryFactory
    */
+  @Deprecated
   QuickSelectSketch(
       final Memory mem,
       final SummaryDeserializer<S> deserializer,
@@ -313,6 +314,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
   // Adr:
   //      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
   //  0   ||   RF   |  lgArr | lgNom  |  Flags | SkType | FamID  | SerVer |  Preamble_Longs    |
+  @Deprecated
   @SuppressWarnings("null")
   @Override
   public byte[] toByteArray() {
diff --git a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java
index 161d0ce..025058c 100644
--- a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java
+++ b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java
@@ -45,7 +45,7 @@ public final class SerializerDeserializer {
   public static void validateFamily(final byte familyId, final byte preambleLongs) {
     final Family family = Family.idToFamily(familyId);
     if (family.equals(Family.TUPLE)) {
-      if (preambleLongs != Family.TUPLE.getMinPreLongs()) {
+      if (preambleLongs < Family.TUPLE.getMinPreLongs() || preambleLongs > Family.TUPLE.getMaxPreLongs()) {
         throw new SketchesArgumentException(
             "Possible corruption: Invalid PreambleLongs value for family TUPLE: " + preambleLongs);
       }
diff --git a/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java b/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java
new file mode 100644
index 0000000..3d27c2b
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple;
+
+import org.apache.datasketches.ByteArrayUtil;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.DeserializeResult;
+import org.apache.datasketches.tuple.UpdatableSummary;
+
+/**
+ * Summary for generic tuple sketches of type Integer.
+ * This summary keeps an Integer value.
+ */
+public class IntegerSummary implements UpdatableSummary<Integer> {
+  private int value_;
+
+  /**
+   * Creates an instance of IntegerSummary with a given starting value.
+   * @param value starting value
+   */
+  public IntegerSummary(final int value) {
+    value_ = value;
+  }
+
+  @Override
+  public IntegerSummary update(final Integer value) {
+    value_ += value;
+    return this;
+  }
+
+  @Override
+  public IntegerSummary copy() {
+    return new IntegerSummary(value_);
+  }
+
+  /**
+   * @return current value of the IntegerSummary
+   */
+  public int getValue() {
+    return value_;
+  }
+
+  private static final int SERIALIZED_SIZE_BYTES = 4;
+  private static final int VALUE_INDEX = 0;
+
+  @Override
+  public byte[] toByteArray() {
+    final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES];
+    ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_);
+    return bytes;
+  }
+
+  /**
+   * Creates an instance of the IntegerSummary given a serialized representation
+   * @param mem Memory object with serialized IntegerSummary
+   * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes
+   * read from the Memory
+   */
+  public static DeserializeResult<IntegerSummary> fromMemory(final Memory mem) {
+    return new DeserializeResult<>(new IntegerSummary(mem.getInt(VALUE_INDEX)), SERIALIZED_SIZE_BYTES);
+  }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java
new file mode 100644
index 0000000..a7caa85
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.DeserializeResult;
+import org.apache.datasketches.tuple.SummaryDeserializer;
+
+public class IntegerSummaryDeserializer implements SummaryDeserializer<IntegerSummary> {
+
+  @Override
+  public DeserializeResult<IntegerSummary> heapifySummary(final Memory mem) {
+    return IntegerSummary.fromMemory(mem);
+  }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java
new file mode 100644
index 0000000..30313a3
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple;
+
+import org.apache.datasketches.tuple.IntegerSummary;
+import org.apache.datasketches.tuple.SummaryFactory;
+
+/**
+ * Factory for IntegerSummary.
+ */
+public class IntegerSummaryFactory implements SummaryFactory<IntegerSummary> {
+
+  @Override
+  public IntegerSummary newSummary() {
+    return new IntegerSummary(0);
+  }
+
+}
diff --git a/src/test/java/org/apache/datasketches/tuple/SerialVersion3Test.java b/src/test/java/org/apache/datasketches/tuple/SerialVersion3Test.java
new file mode 100644
index 0000000..9808308
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/SerialVersion3Test.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import static org.apache.datasketches.Util.getResourceBytes;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.IntegerSummary;
+
+@SuppressWarnings("javadoc")
+public class SerialVersion3Test {
+
+  @Test
+  public void version2Compatibility() throws Exception {
+    byte[] bytes = getResourceBytes("TupleWithTestIntegerSummary4kTrimmedSerVer2.sk");
+    Sketch<IntegerSummary> sketch1 = Sketches.heapifySketch(Memory.wrap(bytes),
+        new IntegerSummaryDeserializer());
+
+    // construct the same way
+    final int lgK = 12;
+    final int K = 1 << lgK;
+    final UpdatableSketchBuilder<Integer, IntegerSummary> builder =
+            new UpdatableSketchBuilder<>(new IntegerSummaryFactory());
+    final UpdatableSketch<Integer, IntegerSummary> updatableSketch = builder.build();
+    for (int i = 0; i < 2 * K; i++) {
+      updatableSketch.update(i, 1);
+    }
+    updatableSketch.trim();
+    Sketch<IntegerSummary> sketch2 = updatableSketch.compact();
+
+    Assert.assertEquals(sketch1.getRetainedEntries(), sketch2.getRetainedEntries());
+    Assert.assertEquals(sketch1.getThetaLong(), sketch2.getThetaLong());
+    Assert.assertEquals(sketch1.isEmpty(), sketch2.isEmpty());
+    Assert.assertEquals(sketch1.isEstimationMode(), sketch2.isEstimationMode());
+  }
+
+  @Test
+  public void emptyFromCpp() {
+    byte[] bytes = getResourceBytes("tuple-int-empty-cpp.sk");
+    Sketch<IntegerSummary> sketch = Sketches.heapifySketch(Memory.wrap(bytes),
+        new IntegerSummaryDeserializer());
+    Assert.assertTrue(sketch.isEmpty());
+    Assert.assertFalse(sketch.isEstimationMode());
+    Assert.assertEquals(sketch.getRetainedEntries(), 0);
+    Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
+  }
+
+  @Test
+  public void singleItemFromCpp() {
+    byte[] bytes = getResourceBytes("tuple-int-single-cpp.sk");
+    Sketch<IntegerSummary> sketch = Sketches.heapifySketch(Memory.wrap(bytes),
+        new IntegerSummaryDeserializer());
+    Assert.assertFalse(sketch.isEmpty());
+    Assert.assertFalse(sketch.isEstimationMode());
+    Assert.assertEquals(sketch.getRetainedEntries(), 1);
+    Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
+  }
+
+  @Test
+  public void exactModeFromCpp() {
+    byte[] bytes = getResourceBytes("tuple-int-two-cpp.sk");
+    Sketch<IntegerSummary> sketch = Sketches.heapifySketch(Memory.wrap(bytes),
+        new IntegerSummaryDeserializer());
+    Assert.assertFalse(sketch.isEmpty());
+    Assert.assertFalse(sketch.isEstimationMode());
+    Assert.assertEquals(sketch.getRetainedEntries(), 2);
+    Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
+  }
+
+  @Test
+  public void estimationModeFromCpp() {
+    byte[] bytes = getResourceBytes("tuple-int-est-trim-cpp.sk");
+    Sketch<IntegerSummary> sketch = Sketches.heapifySketch(Memory.wrap(bytes),
+        new IntegerSummaryDeserializer());
+    Assert.assertFalse(sketch.isEmpty());
+    Assert.assertTrue(sketch.isEstimationMode());
+    Assert.assertEquals(sketch.getRetainedEntries(), 4096);
+    Assert.assertTrue(sketch.getThetaLong() < Long.MAX_VALUE);
+  }
+
+}
diff --git a/src/test/resources/TupleWithTestIntegerSummary4kTrimmedSerVer2.sk b/src/test/resources/TupleWithTestIntegerSummary4kTrimmedSerVer2.sk
new file mode 100644
index 0000000..41374f7
Binary files /dev/null and b/src/test/resources/TupleWithTestIntegerSummary4kTrimmedSerVer2.sk differ
diff --git a/src/test/resources/tuple-int-empty-cpp.sk b/src/test/resources/tuple-int-empty-cpp.sk
new file mode 100644
index 0000000..a95f163
Binary files /dev/null and b/src/test/resources/tuple-int-empty-cpp.sk differ
diff --git a/src/test/resources/tuple-int-est-trim-cpp.sk b/src/test/resources/tuple-int-est-trim-cpp.sk
new file mode 100644
index 0000000..749cb8d
Binary files /dev/null and b/src/test/resources/tuple-int-est-trim-cpp.sk differ
diff --git a/src/test/resources/tuple-int-single-cpp.sk b/src/test/resources/tuple-int-single-cpp.sk
new file mode 100644
index 0000000..346759d
Binary files /dev/null and b/src/test/resources/tuple-int-single-cpp.sk differ
diff --git a/src/test/resources/tuple-int-two-cpp.sk b/src/test/resources/tuple-int-two-cpp.sk
new file mode 100644
index 0000000..229d92a
Binary files /dev/null and b/src/test/resources/tuple-int-two-cpp.sk differ

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org