You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by jo...@apache.org on 2018/12/10 12:49:43 UTC

[tinkerpop] branch TINKERPOP-1942 updated: Char serializer

This is an automated email from the ASF dual-hosted git repository.

jorgebg pushed a commit to branch TINKERPOP-1942
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git


The following commit(s) were added to refs/heads/TINKERPOP-1942 by this push:
     new 2ab861f  Char serializer
2ab861f is described below

commit 2ab861f860217bf152af80125038ea2acb3e42da
Author: Jorge Bay Gondra <jo...@gmail.com>
AuthorDate: Mon Dec 10 13:49:39 2018 +0100

    Char serializer
---
 docs/src/dev/io/graphbinary.asciidoc               |  2 +-
 .../driver/ser/binary/TypeSerializerRegistry.java  | 41 +------------
 .../driver/ser/binary/types/CharSerializer.java    | 68 +++++++++++++++++++++
 .../GraphBinaryReaderWriterRoundTripTest.java      |  1 +
 .../ser/binary/types/CharSerializerTest.java       | 70 ++++++++++++++++++++++
 5 files changed, 142 insertions(+), 40 deletions(-)

diff --git a/docs/src/dev/io/graphbinary.asciidoc b/docs/src/dev/io/graphbinary.asciidoc
index 6ffb428..74bf5ec 100644
--- a/docs/src/dev/io/graphbinary.asciidoc
+++ b/docs/src/dev/io/graphbinary.asciidoc
@@ -537,7 +537,7 @@ Format: one to four bytes representing a single UTF8 char, according to the Unic
 
 For characters `0x00`-`0x7F`, UTF-8 encodes the character as a single byte.
 
-For characters `0x80`-`0x7FF`, UTF-8 uses 2 bytes: the first byte is binary `110` followed by the 5 high bits of the
+For characters `0x80`-`0x07FF`, UTF-8 uses 2 bytes: the first byte is binary `110` followed by the 5 high bits of the
 character, while the second byte is binary 10 followed by the 6 low bits of the character.
 
 The 3 and 4-byte encodings are similar to the 2-byte encoding, except that the first byte of the 3-byte encoding starts
diff --git a/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java
index ad90f11..ca6e407 100644
--- a/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java
+++ b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java
@@ -19,43 +19,7 @@
 package org.apache.tinkerpop.gremlin.driver.ser.binary;
 
 import org.apache.tinkerpop.gremlin.driver.ser.SerializationException;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.BigDecimalSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.BigIntegerSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.BindingSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ByteBufferSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ByteCodeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ClassSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.CustomTypeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.DateSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.DurationSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.EdgeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.EnumSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.GraphSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.InetAddressSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.InstantSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LambdaSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ListSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LocalDateSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LocalDateTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LocalTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.MapSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.MonthDaySerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.OffsetDateTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.OffsetTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PathSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PeriodSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PropertySerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.SetSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.SingleTypeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.StringSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.TraverserSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.UUIDSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.VertexPropertySerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.VertexSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.YearMonthSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ZoneOffsetSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ZonedDateTimeSerializer;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.types.*;
 import org.apache.tinkerpop.gremlin.process.traversal.Bytecode;
 import org.apache.tinkerpop.gremlin.process.traversal.Operator;
 import org.apache.tinkerpop.gremlin.process.traversal.Order;
@@ -164,8 +128,7 @@ public class TypeSerializerRegistry {
                 new RegistryEntry<>(Short.class, SingleTypeSerializer.ShortSerializer),
                 new RegistryEntry<>(Boolean.class, SingleTypeSerializer.BooleanSerializer),
 
-                // TODO: char
-
+                new RegistryEntry<>(Character.class, new CharSerializer()),
                 new RegistryEntry<>(Duration.class, new DurationSerializer()),
                 new RegistryEntry<>(InetAddress.class, new InetAddressSerializer()),
                 new RegistryEntry<>(Inet4Address.class, new InetAddressSerializer<>()),
diff --git a/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializer.java b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializer.java
new file mode 100644
index 0000000..773dd41
--- /dev/null
+++ b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializer.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.driver.ser.binary.types;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufAllocator;
+import io.netty.buffer.Unpooled;
+import org.apache.tinkerpop.gremlin.driver.ser.SerializationException;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.DataType;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.GraphBinaryReader;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.GraphBinaryWriter;
+
+import java.nio.charset.StandardCharsets;
+
+public class CharSerializer extends SimpleTypeSerializer<Character> {
+    public CharSerializer() {
+        super(DataType.CHAR);
+    }
+
+    @Override
+    Character readValue(ByteBuf buffer, GraphBinaryReader context) throws SerializationException {
+        final int firstByte = buffer.readByte() & 0xff;
+        int byteLength = 1;
+        // A byte with the first byte ON (10000000) signals that more bytes are needed to represent the UTF-8 char
+        if ((firstByte & 0x80) > 0) {
+            if ((firstByte & 0xf0) == 0xf0) { // 0xf0 = 11110000
+                byteLength = 4;
+            } else if ((firstByte & 0xe0) == 0xe0) { //11100000
+                byteLength = 3;
+            } else if ((firstByte & 0xc0) == 0xc0) { //11000000
+                byteLength = 2;
+            }
+        }
+
+        byte[] byteArray;
+        if (byteLength == 1) {
+            byteArray = new byte[] { (byte)firstByte };
+        } else {
+            byteArray = new byte[byteLength];
+            byteArray[0] = (byte)firstByte;
+            buffer.readBytes(byteArray, 1, byteLength - 1);
+        }
+
+        return new String(byteArray, StandardCharsets.UTF_8).charAt(0);
+    }
+
+    @Override
+    public ByteBuf writeValue(Character value, ByteBufAllocator allocator, GraphBinaryWriter context) throws SerializationException {
+        final String stringValue = Character.toString(value);
+        return Unpooled.wrappedBuffer(stringValue.getBytes(StandardCharsets.UTF_8));
+    }
+}
diff --git a/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java
index 468b77a..4871583 100644
--- a/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java
+++ b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java
@@ -105,6 +105,7 @@ public class GraphBinaryReaderWriterRoundTripTest {
 
         return Arrays.asList(
                 new Object[] {"String", "ABC", null},
+                new Object[] {"Char", '£', null},
 
                 // numerics
                 new Object[] {"Byte", 1, null},
diff --git a/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializerTest.java b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializerTest.java
new file mode 100644
index 0000000..839ab96
--- /dev/null
+++ b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializerTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.driver.ser.binary.types;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufAllocator;
+import io.netty.buffer.Unpooled;
+import org.apache.tinkerpop.gremlin.driver.ser.SerializationException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class CharSerializerTest {
+    private final ByteBufAllocator allocator = ByteBufAllocator.DEFAULT;
+    private static final CharSerializer serializer = new CharSerializer();
+
+    @Parameterized.Parameters(name = "Character={0}")
+    public static Collection input() {
+        return Arrays.asList(
+                new Object[] {'a', new byte[]{ 0x61 }},
+                new Object[] {'b', new byte[]{ 0x62 }},
+                new Object[] {'$', new byte[]{ 0x24 }},
+                new Object[] {'¢', new byte[]{ (byte)0xc2, (byte)0xa2 }},
+                new Object[] {'€', new byte[]{ (byte)0xe2, (byte)0x82, (byte)0xac }},
+                new Object[] {'ह', new byte[]{ (byte)0xe0, (byte)0xa4, (byte)0xb9 }});
+    }
+
+    @Parameterized.Parameter(value = 0)
+    public char charValue;
+
+    @Parameterized.Parameter(value = 1)
+    public byte[] byteArray;
+
+    @Test
+    public void readValueTest() throws SerializationException {
+        final Character actual = serializer.readValue(Unpooled.wrappedBuffer(byteArray), null);
+        assertEquals(charValue, actual.charValue());
+    }
+
+    @Test
+    public void writeValueTest() throws SerializationException {
+        final ByteBuf actual= serializer.writeValue(charValue, allocator, null);
+        final byte[] actualBytes = new byte[byteArray.length];
+        actual.readBytes(actualBytes);
+        assertTrue(Arrays.deepEquals(new byte[][]{byteArray}, new byte[][]{actualBytes}));
+    }
+}