You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by si...@apache.org on 2019/06/17 19:38:23 UTC

[pulsar] branch master updated: [schema] record charset in properties for string schema (#4542)

This is an automated email from the ASF dual-hosted git repository.

sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pulsar.git


The following commit(s) were added to refs/heads/master by this push:
     new 9eb7cc6  [schema] record charset in properties for string schema (#4542)
9eb7cc6 is described below

commit 9eb7cc63771486902fc84356f22357422a8ac04d
Author: Sijie Guo <si...@apache.org>
AuthorDate: Mon Jun 17 12:38:18 2019 -0700

    [schema] record charset in properties for string schema (#4542)
    
    ### Motivation
    
    Make sure we record charset in the properties. So that we know which charset to be used for encoding and decoding strings.
    
    ### Modifications
    
    Add charset to the properties
---
 .../pulsar/client/impl/schema/StringSchema.java    |  34 +++++-
 .../client/impl/schema/StringSchemaTest.java       | 122 +++++++++++++++++++++
 2 files changed, 152 insertions(+), 4 deletions(-)

diff --git a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java
index 11b5c5f..6859503 100644
--- a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java
+++ b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java
@@ -18,6 +18,10 @@
  */
 package org.apache.pulsar.client.impl.schema;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.util.HashMap;
+import java.util.Map;
 import org.apache.pulsar.client.api.Schema;
 import org.apache.pulsar.common.schema.SchemaInfo;
 import org.apache.pulsar.common.schema.SchemaType;
@@ -30,25 +34,47 @@ import java.nio.charset.StandardCharsets;
  */
 public class StringSchema implements Schema<String> {
 
+    static final String CHARSET_KEY = "__charset";
+
     public static StringSchema utf8() {
         return UTF8;
     }
 
-    private static final StringSchema UTF8 = new StringSchema(StandardCharsets.UTF_8);
-    private static final SchemaInfo SCHEMA_INFO = new SchemaInfo()
+    public static StringSchema fromSchemaInfo(SchemaInfo schemaInfo) {
+        checkArgument(SchemaType.STRING == schemaInfo.getType(), "Not a string schema");
+        String charsetName = schemaInfo.getProperties().get(CHARSET_KEY);
+        if (null == charsetName) {
+            return UTF8;
+        } else {
+            return new StringSchema(Charset.forName(charsetName));
+        }
+    }
+
+    private static final SchemaInfo DEFAULT_SCHEMA_INFO = new SchemaInfo()
         .setName("String")
         .setType(SchemaType.STRING)
         .setSchema(new byte[0]);
+    private static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
 
+    // make sure other static fields are initialized before this field
+    private static final StringSchema UTF8 = new StringSchema(StandardCharsets.UTF_8);
     private final Charset charset;
-    private static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
+    private final SchemaInfo schemaInfo;
 
     public StringSchema() {
         this.charset = DEFAULT_CHARSET;
+        this.schemaInfo = DEFAULT_SCHEMA_INFO;
     }
 
     public StringSchema(Charset charset) {
         this.charset = charset;
+        Map<String, String> properties = new HashMap<>();
+        properties.put(CHARSET_KEY, charset.name());
+        this.schemaInfo = new SchemaInfo()
+            .setName(DEFAULT_SCHEMA_INFO.getName())
+            .setType(SchemaType.STRING)
+            .setSchema(DEFAULT_SCHEMA_INFO.getSchema())
+            .setProperties(properties);
     }
 
     public byte[] encode(String message) {
@@ -68,6 +94,6 @@ public class StringSchema implements Schema<String> {
     }
 
     public SchemaInfo getSchemaInfo() {
-        return SCHEMA_INFO;
+        return schemaInfo;
     }
 }
diff --git a/pulsar-client/src/test/java/org/apache/pulsar/client/impl/schema/StringSchemaTest.java b/pulsar-client/src/test/java/org/apache/pulsar/client/impl/schema/StringSchemaTest.java
new file mode 100644
index 0000000..6aa9dd7
--- /dev/null
+++ b/pulsar-client/src/test/java/org/apache/pulsar/client/impl/schema/StringSchemaTest.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pulsar.client.impl.schema;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pulsar.common.schema.SchemaInfo;
+import org.apache.pulsar.common.schema.SchemaType;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Unit test {@link StringSchema}.
+ */
+public class StringSchemaTest {
+
+    @Test
+    public void testUtf8Charset() {
+        StringSchema schema = new StringSchema();
+        SchemaInfo si = schema.getSchemaInfo();
+        assertFalse(si.getProperties().containsKey(StringSchema.CHARSET_KEY));
+
+        String myString = "my string for test";
+        byte[] data = schema.encode(myString);
+        assertArrayEquals(data, myString.getBytes(UTF_8));
+
+        String decodedString = schema.decode(data);
+        assertEquals(decodedString, myString);
+    }
+
+    @Test
+    public void testAsciiCharset() {
+        StringSchema schema = new StringSchema(US_ASCII);
+        SchemaInfo si = schema.getSchemaInfo();
+        assertTrue(si.getProperties().containsKey(StringSchema.CHARSET_KEY));
+        assertEquals(
+            si.getProperties().get(StringSchema.CHARSET_KEY),
+            US_ASCII.name()
+        );
+
+        String myString = "my string for test";
+        byte[] data = schema.encode(myString);
+        assertArrayEquals(data, myString.getBytes(US_ASCII));
+
+        String decodedString = schema.decode(data);
+        assertEquals(decodedString, myString);
+    }
+
+    @Test
+    public void testSchemaInfoWithoutCharset() {
+        SchemaInfo si = new SchemaInfo()
+            .setName("test-schema-info-without-charset")
+            .setType(SchemaType.STRING)
+            .setSchema(new byte[0])
+            .setProperties(Collections.emptyMap());
+        StringSchema schema = StringSchema.fromSchemaInfo(si);
+
+        String myString = "my string for test";
+        byte[] data = schema.encode(myString);
+        assertArrayEquals(data, myString.getBytes(UTF_8));
+
+        String decodedString = schema.decode(data);
+        assertEquals(decodedString, myString);
+    }
+
+    @DataProvider(name = "charsets")
+    public Object[][] charsets() {
+        return new Object[][] {
+            {
+                UTF_8
+            },
+            {
+                US_ASCII
+            }
+        };
+    }
+
+    @Test(dataProvider = "charsets")
+    public void testSchemaInfoWithCharset(Charset charset) {
+        Map<String, String> properties = new HashMap<>();
+        properties.put(StringSchema.CHARSET_KEY, charset.name());
+        SchemaInfo si = new SchemaInfo()
+            .setName("test-schema-info-without-charset")
+            .setType(SchemaType.STRING)
+            .setSchema(new byte[0])
+            .setProperties(properties);
+        StringSchema schema = StringSchema.fromSchemaInfo(si);
+
+        String myString = "my string for test";
+        byte[] data = schema.encode(myString);
+        assertArrayEquals(data, myString.getBytes(charset));
+
+        String decodedString = schema.decode(data);
+        assertEquals(decodedString, myString);
+    }
+
+}