You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2022/03/08 14:18:05 UTC

[GitHub] [daffodil] mbeckerle commented on a change in pull request #768: Daffodil 2663 pulggable charsets

mbeckerle commented on a change in pull request #768:
URL: https://github.com/apache/daffodil/pull/768#discussion_r821681000



##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvEncoding.scala
##########
@@ -95,10 +96,23 @@ abstract class CharsetEvBase(encodingEv: EncodingEvBase, tci: DPathCompileInfo)
     val encString = encodingEv.evaluate(state)
     val cs = CharsetUtils.getCharset(encString)
     if (cs == null) {
-      tci.schemaDefinitionError("Unsupported encoding: %s. Supported encodings: %s", encString, CharsetUtils.supportedEncodingsString)
+      val cs2 = CharsetCompilerRegistry
+        .find(encString, tci)
+        .compileCharset
+        .newInstance()
+      if (cs2 == null) {
+        tci.schemaDefinitionError(
+          "Unsupported encoding: %s. Supported encodings: %s",

Review comment:
       I think you should just `Assert.invariant(cs2 ne null)` because the find() call above will SDE if the encoding cannot be found, and the other calls are never supposed to return null.  So this schemaDefinitionError call will never be hit. 
   
   That will get rid of your codeCov errors. 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/charsets/CharsetCompilerRegistry.scala
##########
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.daffodil.charsets
+
+import org.apache.daffodil.exceptions.ThrowsSDE
+import org.apache.daffodil.util.SimpleNamedServiceLoader
+import org.apache.daffodil.processors.charset.CharsetUtils
+
+
+/**
+ * Transformers have factories. This lets you find the transformer factory
+ * by the name obtained from dfdlx:layerTransform.
+ */
+object CharsetCompilerRegistry {
+
+  private lazy val charsetCompilerMap: Map[String, CharsetCompiler] =
+    SimpleNamedServiceLoader.loadClass[CharsetCompiler](classOf[CharsetCompiler])
+
+  /**
+   * Given name, finds the factory for the transformer. SDE otherwise.
+   *
+   * The state is passed in order to provide diagnostic context if not found.
+   */
+  def find(name: String, context: ThrowsSDE): CharsetCompiler = {
+    val maybeCompiler: Option[CharsetCompiler] = charsetCompilerMap.get(name)
+    if (maybeCompiler.isEmpty) {
+      val choices = charsetCompilerMap.keySet.mkString(", ")
+      context.SDE("The charset '%s' was not found. Available choices are: %s", name, choices + ", " + CharsetUtils.supportedEncodingsString)
+    } else {
+      maybeCompiler.get
+    }
+  }
+}

Review comment:
       missing final NL

##########
File path: daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.charsets.CharsetCompiler
##########
@@ -0,0 +1,31 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+# PLEASE NOTE:
+#
+# These are in src/test/resources not src/main/resources because they're not
+# "real" in the sense of being fully featured. E.g., AIS only does the payload
+# armoring, not everything else about AIS. IPv4 just does the IPv4 header, not
+# TCP, UDP, ICMP, etc. all of which have related checksum algorithms that should
+# share code. A real CheckDigits has to have different layer length kinds supported
+# so that one can compute check digits from elements with different
+# of length kinds.
+#
+# These are partial/subsets of "real" functionality that give us test coverage
+# of the layering features of Daffodil. They can be reused to create "real"
+# layering, but one should expect to have to adapt their code substantially.
+#
+org.apache.daffodil.charsets.CustomNonByteSizeCharsetCompiler
+org.apache.daffodil.charsets.CustomJavaCharsetCompiler

Review comment:
       Add missing final line ending

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/charsets/CharsetTransformer.scala
##########
@@ -0,0 +1,33 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.charsets
+
+import org.apache.daffodil.processors.charset.BitsCharset
+
+/**
+ * Shared functionality of all CharsetTransformers.
+ *
+ * A charset transformer is created at runtime as part of a single parse/unparse call.
+ * Hence, they can be stateful without causing thread-safety issues.
+ */
+abstract class CharsetTransformer(charsetName: String){

Review comment:
       I think you can collapse CharsetTransformer and have a CharsetTransformer just be a BitsCharset. 
   
   Unless there is a need for CharsetTransformer, the added indirection is misleading, as people will do what I just did, which is search for the reason it is needed. 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/charsets/CharsetCompilerRegistry.scala
##########
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.daffodil.charsets
+
+import org.apache.daffodil.exceptions.ThrowsSDE
+import org.apache.daffodil.util.SimpleNamedServiceLoader
+import org.apache.daffodil.processors.charset.CharsetUtils
+
+
+/**
+ * Transformers have factories. This lets you find the transformer factory
+ * by the name obtained from dfdlx:layerTransform.
+ */
+object CharsetCompilerRegistry {
+
+  private lazy val charsetCompilerMap: Map[String, CharsetCompiler] =
+    SimpleNamedServiceLoader.loadClass[CharsetCompiler](classOf[CharsetCompiler])
+
+  /**
+   * Given name, finds the factory for the transformer. SDE otherwise.
+   *
+   * The state is passed in order to provide diagnostic context if not found.
+   */
+  def find(name: String, context: ThrowsSDE): CharsetCompiler = {
+    val maybeCompiler: Option[CharsetCompiler] = charsetCompilerMap.get(name)

Review comment:
       We have the standard scala Option type, and our own more efficient but less rich Maybe type.
   In the fast paths of the runtime we try to use Maybe, but elsewhere we use Option. You are using Option here which is right. I just suggest name this variable optCompiler for consistency. 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvEncoding.scala
##########
@@ -95,10 +96,23 @@ abstract class CharsetEvBase(encodingEv: EncodingEvBase, tci: DPathCompileInfo)
     val encString = encodingEv.evaluate(state)
     val cs = CharsetUtils.getCharset(encString)

Review comment:
       I'd like to get rid of all the built-in charsets, and implement them all as CharsetCompiler (with meta-inf, etc.) so there is only one way a charset is found/resolved, etc. 
   
   They would still all be part of daffodil-runtime1, just they would be detected and loaded using the dynamic load SPI mechanism, just like any user-supplied charset. 
   
   It is quite confusing to have two such mechanisms, when there is no good reason not to just have one. 
   
   So CharsetUtils.getCharset will go away, or perhaps become the code that does what you are doing here. 
   DaffodilCharsetProvider would also go away, or morph similarly. 

##########
File path: daffodil-test/src/test/scala/org/apache/daffodil/charsets/CustomJavaCharset.scala
##########
@@ -0,0 +1,50 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.charsets
+
+import org.apache.daffodil.processors.charset.BitsCharsetJava
+import org.apache.daffodil.processors.charset.BitsCharsetDecoderByteSize
+import org.apache.daffodil.io.InputSourceDataInputStream
+import org.apache.daffodil.io.FormatInfo
+
+final class CustomJavaCharset(
+  name2: String)
+extends  CharsetTransformer(
+  name2){
+
+def bitsCharset = BitsCharsetISO885913

Review comment:
       So this is named CustomJavaCharset fot the test, but the bitsCharset is wired to BitsCharsetISO885913 ?
   
   I think CharsetTransformers should go away entirely, but until then I would named this BitsCharsetISO885913Transformer, and why is name2 being ignored? 
   
   If you ignore an arg like this, probably a comment in the code is required. 

##########
File path: daffodil-test/src/test/scala/org/apache/daffodil/charsets/CustomJavaCharset.scala
##########
@@ -0,0 +1,50 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.charsets
+
+import org.apache.daffodil.processors.charset.BitsCharsetJava
+import org.apache.daffodil.processors.charset.BitsCharsetDecoderByteSize
+import org.apache.daffodil.io.InputSourceDataInputStream
+import org.apache.daffodil.io.FormatInfo
+
+final class CustomJavaCharset(
+  name2: String)

Review comment:
       Strange indentation. 
   
   When there are only a small number of args, we tend not to do the thing where args each get a line, as it reduces code density for no adantage. At 3+ args, sure, because it will line wrap anyway. But just 1 or 2 args keep args on one line. 

##########
File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/charsets/CharsetCompilerRegistry.scala
##########
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.daffodil.charsets
+
+import org.apache.daffodil.exceptions.ThrowsSDE
+import org.apache.daffodil.util.SimpleNamedServiceLoader
+import org.apache.daffodil.processors.charset.CharsetUtils
+
+
+/**
+ * Transformers have factories. This lets you find the transformer factory
+ * by the name obtained from dfdlx:layerTransform.
+ */
+object CharsetCompilerRegistry {
+
+  private lazy val charsetCompilerMap: Map[String, CharsetCompiler] =
+    SimpleNamedServiceLoader.loadClass[CharsetCompiler](classOf[CharsetCompiler])
+
+  /**
+   * Given name, finds the factory for the transformer. SDE otherwise.
+   *
+   * The state is passed in order to provide diagnostic context if not found.
+   */
+  def find(name: String, context: ThrowsSDE): CharsetCompiler = {
+    val maybeCompiler: Option[CharsetCompiler] = charsetCompilerMap.get(name)
+    if (maybeCompiler.isEmpty) {
+      val choices = charsetCompilerMap.keySet.mkString(", ")
+      context.SDE("The charset '%s' was not found. Available choices are: %s", name, choices + ", " + CharsetUtils.supportedEncodingsString)

Review comment:
       Messages should not use the term "charset", but either "encoding" or "charset encoding". The XML term for 'charset' is 'encoding' and DFDL schema authors know the concept by that name. 

##########
File path: daffodil-test/src/test/scala/org/apache/daffodil/charsets/CustomJavaCharset.scala
##########
@@ -0,0 +1,50 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.charsets
+
+import org.apache.daffodil.processors.charset.BitsCharsetJava
+import org.apache.daffodil.processors.charset.BitsCharsetDecoderByteSize
+import org.apache.daffodil.io.InputSourceDataInputStream
+import org.apache.daffodil.io.FormatInfo
+
+final class CustomJavaCharset(
+  name2: String)
+extends  CharsetTransformer(
+  name2){
+
+def bitsCharset = BitsCharsetISO885913
+
+}
+
+object BitsCharsetISO885913 extends {
+  override val name = "ISO-8859-13"
+} with BitsCharsetJava {
+
+  override def newDecoder() = new BitsCharsetDecoderISO885913()
+
+}
+
+class BitsCharsetDecoderISO885913
+  extends BitsCharsetDecoderByteSize {
+
+  protected override def decodeOneChar(dis: InputSourceDataInputStream, finfo: FormatInfo): Char = {
+    val byte = getByte(dis, 0)
+    byte.toChar
+  }
+}

Review comment:
       Final line ending




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@daffodil.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org