You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by jo...@apache.org on 2018/12/10 12:49:43 UTC
[tinkerpop] branch TINKERPOP-1942 updated: Char serializer
This is an automated email from the ASF dual-hosted git repository.
jorgebg pushed a commit to branch TINKERPOP-1942
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
The following commit(s) were added to refs/heads/TINKERPOP-1942 by this push:
new 2ab861f Char serializer
2ab861f is described below
commit 2ab861f860217bf152af80125038ea2acb3e42da
Author: Jorge Bay Gondra <jo...@gmail.com>
AuthorDate: Mon Dec 10 13:49:39 2018 +0100
Char serializer
---
docs/src/dev/io/graphbinary.asciidoc | 2 +-
.../driver/ser/binary/TypeSerializerRegistry.java | 41 +------------
.../driver/ser/binary/types/CharSerializer.java | 68 +++++++++++++++++++++
.../GraphBinaryReaderWriterRoundTripTest.java | 1 +
.../ser/binary/types/CharSerializerTest.java | 70 ++++++++++++++++++++++
5 files changed, 142 insertions(+), 40 deletions(-)
diff --git a/docs/src/dev/io/graphbinary.asciidoc b/docs/src/dev/io/graphbinary.asciidoc
index 6ffb428..74bf5ec 100644
--- a/docs/src/dev/io/graphbinary.asciidoc
+++ b/docs/src/dev/io/graphbinary.asciidoc
@@ -537,7 +537,7 @@ Format: one to four bytes representing a single UTF8 char, according to the Unic
For characters `0x00`-`0x7F`, UTF-8 encodes the character as a single byte.
-For characters `0x80`-`0x7FF`, UTF-8 uses 2 bytes: the first byte is binary `110` followed by the 5 high bits of the
+For characters `0x80`-`0x07FF`, UTF-8 uses 2 bytes: the first byte is binary `110` followed by the 5 high bits of the
character, while the second byte is binary 10 followed by the 6 low bits of the character.
The 3 and 4-byte encodings are similar to the 2-byte encoding, except that the first byte of the 3-byte encoding starts
diff --git a/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java
index ad90f11..ca6e407 100644
--- a/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java
+++ b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/TypeSerializerRegistry.java
@@ -19,43 +19,7 @@
package org.apache.tinkerpop.gremlin.driver.ser.binary;
import org.apache.tinkerpop.gremlin.driver.ser.SerializationException;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.BigDecimalSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.BigIntegerSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.BindingSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ByteBufferSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ByteCodeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ClassSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.CustomTypeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.DateSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.DurationSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.EdgeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.EnumSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.GraphSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.InetAddressSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.InstantSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LambdaSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ListSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LocalDateSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LocalDateTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.LocalTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.MapSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.MonthDaySerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.OffsetDateTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.OffsetTimeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PathSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PeriodSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.PropertySerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.SetSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.SingleTypeSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.StringSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.TraverserSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.UUIDSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.VertexPropertySerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.VertexSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.YearMonthSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ZoneOffsetSerializer;
-import org.apache.tinkerpop.gremlin.driver.ser.binary.types.ZonedDateTimeSerializer;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.types.*;
import org.apache.tinkerpop.gremlin.process.traversal.Bytecode;
import org.apache.tinkerpop.gremlin.process.traversal.Operator;
import org.apache.tinkerpop.gremlin.process.traversal.Order;
@@ -164,8 +128,7 @@ public class TypeSerializerRegistry {
new RegistryEntry<>(Short.class, SingleTypeSerializer.ShortSerializer),
new RegistryEntry<>(Boolean.class, SingleTypeSerializer.BooleanSerializer),
- // TODO: char
-
+ new RegistryEntry<>(Character.class, new CharSerializer()),
new RegistryEntry<>(Duration.class, new DurationSerializer()),
new RegistryEntry<>(InetAddress.class, new InetAddressSerializer()),
new RegistryEntry<>(Inet4Address.class, new InetAddressSerializer<>()),
diff --git a/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializer.java b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializer.java
new file mode 100644
index 0000000..773dd41
--- /dev/null
+++ b/gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializer.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.driver.ser.binary.types;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufAllocator;
+import io.netty.buffer.Unpooled;
+import org.apache.tinkerpop.gremlin.driver.ser.SerializationException;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.DataType;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.GraphBinaryReader;
+import org.apache.tinkerpop.gremlin.driver.ser.binary.GraphBinaryWriter;
+
+import java.nio.charset.StandardCharsets;
+
+public class CharSerializer extends SimpleTypeSerializer<Character> {
+ public CharSerializer() {
+ super(DataType.CHAR);
+ }
+
+ @Override
+ Character readValue(ByteBuf buffer, GraphBinaryReader context) throws SerializationException {
+ final int firstByte = buffer.readByte() & 0xff;
+ int byteLength = 1;
+ // A byte with the first byte ON (10000000) signals that more bytes are needed to represent the UTF-8 char
+ if ((firstByte & 0x80) > 0) {
+ if ((firstByte & 0xf0) == 0xf0) { // 0xf0 = 11110000
+ byteLength = 4;
+ } else if ((firstByte & 0xe0) == 0xe0) { //11100000
+ byteLength = 3;
+ } else if ((firstByte & 0xc0) == 0xc0) { //11000000
+ byteLength = 2;
+ }
+ }
+
+ byte[] byteArray;
+ if (byteLength == 1) {
+ byteArray = new byte[] { (byte)firstByte };
+ } else {
+ byteArray = new byte[byteLength];
+ byteArray[0] = (byte)firstByte;
+ buffer.readBytes(byteArray, 1, byteLength - 1);
+ }
+
+ return new String(byteArray, StandardCharsets.UTF_8).charAt(0);
+ }
+
+ @Override
+ public ByteBuf writeValue(Character value, ByteBufAllocator allocator, GraphBinaryWriter context) throws SerializationException {
+ final String stringValue = Character.toString(value);
+ return Unpooled.wrappedBuffer(stringValue.getBytes(StandardCharsets.UTF_8));
+ }
+}
diff --git a/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java
index 468b77a..4871583 100644
--- a/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java
+++ b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/GraphBinaryReaderWriterRoundTripTest.java
@@ -105,6 +105,7 @@ public class GraphBinaryReaderWriterRoundTripTest {
return Arrays.asList(
new Object[] {"String", "ABC", null},
+ new Object[] {"Char", '£', null},
// numerics
new Object[] {"Byte", 1, null},
diff --git a/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializerTest.java b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializerTest.java
new file mode 100644
index 0000000..839ab96
--- /dev/null
+++ b/gremlin-driver/src/test/java/org/apache/tinkerpop/gremlin/driver/ser/binary/types/CharSerializerTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.driver.ser.binary.types;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufAllocator;
+import io.netty.buffer.Unpooled;
+import org.apache.tinkerpop.gremlin.driver.ser.SerializationException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class CharSerializerTest {
+ private final ByteBufAllocator allocator = ByteBufAllocator.DEFAULT;
+ private static final CharSerializer serializer = new CharSerializer();
+
+ @Parameterized.Parameters(name = "Character={0}")
+ public static Collection input() {
+ return Arrays.asList(
+ new Object[] {'a', new byte[]{ 0x61 }},
+ new Object[] {'b', new byte[]{ 0x62 }},
+ new Object[] {'$', new byte[]{ 0x24 }},
+ new Object[] {'¢', new byte[]{ (byte)0xc2, (byte)0xa2 }},
+ new Object[] {'€', new byte[]{ (byte)0xe2, (byte)0x82, (byte)0xac }},
+ new Object[] {'ह', new byte[]{ (byte)0xe0, (byte)0xa4, (byte)0xb9 }});
+ }
+
+ @Parameterized.Parameter(value = 0)
+ public char charValue;
+
+ @Parameterized.Parameter(value = 1)
+ public byte[] byteArray;
+
+ @Test
+ public void readValueTest() throws SerializationException {
+ final Character actual = serializer.readValue(Unpooled.wrappedBuffer(byteArray), null);
+ assertEquals(charValue, actual.charValue());
+ }
+
+ @Test
+ public void writeValueTest() throws SerializationException {
+ final ByteBuf actual= serializer.writeValue(charValue, allocator, null);
+ final byte[] actualBytes = new byte[byteArray.length];
+ actual.readBytes(actualBytes);
+ assertTrue(Arrays.deepEquals(new byte[][]{byteArray}, new byte[][]{actualBytes}));
+ }
+}