You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by si...@apache.org on 2019/06/17 19:38:23 UTC
[pulsar] branch master updated: [schema] record charset in
properties for string schema (#4542)
This is an automated email from the ASF dual-hosted git repository.
sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pulsar.git
The following commit(s) were added to refs/heads/master by this push:
new 9eb7cc6 [schema] record charset in properties for string schema (#4542)
9eb7cc6 is described below
commit 9eb7cc63771486902fc84356f22357422a8ac04d
Author: Sijie Guo <si...@apache.org>
AuthorDate: Mon Jun 17 12:38:18 2019 -0700
[schema] record charset in properties for string schema (#4542)
### Motivation
Make sure we record charset in the properties. So that we know which charset to be used for encoding and decoding strings.
### Modifications
Add charset to the properties
---
.../pulsar/client/impl/schema/StringSchema.java | 34 +++++-
.../client/impl/schema/StringSchemaTest.java | 122 +++++++++++++++++++++
2 files changed, 152 insertions(+), 4 deletions(-)
diff --git a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java
index 11b5c5f..6859503 100644
--- a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java
+++ b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/schema/StringSchema.java
@@ -18,6 +18,10 @@
*/
package org.apache.pulsar.client.impl.schema;
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.util.HashMap;
+import java.util.Map;
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.common.schema.SchemaInfo;
import org.apache.pulsar.common.schema.SchemaType;
@@ -30,25 +34,47 @@ import java.nio.charset.StandardCharsets;
*/
public class StringSchema implements Schema<String> {
+ static final String CHARSET_KEY = "__charset";
+
public static StringSchema utf8() {
return UTF8;
}
- private static final StringSchema UTF8 = new StringSchema(StandardCharsets.UTF_8);
- private static final SchemaInfo SCHEMA_INFO = new SchemaInfo()
+ public static StringSchema fromSchemaInfo(SchemaInfo schemaInfo) {
+ checkArgument(SchemaType.STRING == schemaInfo.getType(), "Not a string schema");
+ String charsetName = schemaInfo.getProperties().get(CHARSET_KEY);
+ if (null == charsetName) {
+ return UTF8;
+ } else {
+ return new StringSchema(Charset.forName(charsetName));
+ }
+ }
+
+ private static final SchemaInfo DEFAULT_SCHEMA_INFO = new SchemaInfo()
.setName("String")
.setType(SchemaType.STRING)
.setSchema(new byte[0]);
+ private static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
+ // make sure other static fields are initialized before this field
+ private static final StringSchema UTF8 = new StringSchema(StandardCharsets.UTF_8);
private final Charset charset;
- private static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
+ private final SchemaInfo schemaInfo;
public StringSchema() {
this.charset = DEFAULT_CHARSET;
+ this.schemaInfo = DEFAULT_SCHEMA_INFO;
}
public StringSchema(Charset charset) {
this.charset = charset;
+ Map<String, String> properties = new HashMap<>();
+ properties.put(CHARSET_KEY, charset.name());
+ this.schemaInfo = new SchemaInfo()
+ .setName(DEFAULT_SCHEMA_INFO.getName())
+ .setType(SchemaType.STRING)
+ .setSchema(DEFAULT_SCHEMA_INFO.getSchema())
+ .setProperties(properties);
}
public byte[] encode(String message) {
@@ -68,6 +94,6 @@ public class StringSchema implements Schema<String> {
}
public SchemaInfo getSchemaInfo() {
- return SCHEMA_INFO;
+ return schemaInfo;
}
}
diff --git a/pulsar-client/src/test/java/org/apache/pulsar/client/impl/schema/StringSchemaTest.java b/pulsar-client/src/test/java/org/apache/pulsar/client/impl/schema/StringSchemaTest.java
new file mode 100644
index 0000000..6aa9dd7
--- /dev/null
+++ b/pulsar-client/src/test/java/org/apache/pulsar/client/impl/schema/StringSchemaTest.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pulsar.client.impl.schema;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pulsar.common.schema.SchemaInfo;
+import org.apache.pulsar.common.schema.SchemaType;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Unit test {@link StringSchema}.
+ */
+public class StringSchemaTest {
+
+ @Test
+ public void testUtf8Charset() {
+ StringSchema schema = new StringSchema();
+ SchemaInfo si = schema.getSchemaInfo();
+ assertFalse(si.getProperties().containsKey(StringSchema.CHARSET_KEY));
+
+ String myString = "my string for test";
+ byte[] data = schema.encode(myString);
+ assertArrayEquals(data, myString.getBytes(UTF_8));
+
+ String decodedString = schema.decode(data);
+ assertEquals(decodedString, myString);
+ }
+
+ @Test
+ public void testAsciiCharset() {
+ StringSchema schema = new StringSchema(US_ASCII);
+ SchemaInfo si = schema.getSchemaInfo();
+ assertTrue(si.getProperties().containsKey(StringSchema.CHARSET_KEY));
+ assertEquals(
+ si.getProperties().get(StringSchema.CHARSET_KEY),
+ US_ASCII.name()
+ );
+
+ String myString = "my string for test";
+ byte[] data = schema.encode(myString);
+ assertArrayEquals(data, myString.getBytes(US_ASCII));
+
+ String decodedString = schema.decode(data);
+ assertEquals(decodedString, myString);
+ }
+
+ @Test
+ public void testSchemaInfoWithoutCharset() {
+ SchemaInfo si = new SchemaInfo()
+ .setName("test-schema-info-without-charset")
+ .setType(SchemaType.STRING)
+ .setSchema(new byte[0])
+ .setProperties(Collections.emptyMap());
+ StringSchema schema = StringSchema.fromSchemaInfo(si);
+
+ String myString = "my string for test";
+ byte[] data = schema.encode(myString);
+ assertArrayEquals(data, myString.getBytes(UTF_8));
+
+ String decodedString = schema.decode(data);
+ assertEquals(decodedString, myString);
+ }
+
+ @DataProvider(name = "charsets")
+ public Object[][] charsets() {
+ return new Object[][] {
+ {
+ UTF_8
+ },
+ {
+ US_ASCII
+ }
+ };
+ }
+
+ @Test(dataProvider = "charsets")
+ public void testSchemaInfoWithCharset(Charset charset) {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(StringSchema.CHARSET_KEY, charset.name());
+ SchemaInfo si = new SchemaInfo()
+ .setName("test-schema-info-without-charset")
+ .setType(SchemaType.STRING)
+ .setSchema(new byte[0])
+ .setProperties(properties);
+ StringSchema schema = StringSchema.fromSchemaInfo(si);
+
+ String myString = "my string for test";
+ byte[] data = schema.encode(myString);
+ assertArrayEquals(data, myString.getBytes(charset));
+
+ String decodedString = schema.decode(data);
+ assertEquals(decodedString, myString);
+ }
+
+}