You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2021/01/11 13:13:52 UTC

[GitHub] [incubator-daffodil] stevedlawrence commented on a change in pull request #463: Embedded Schematron

stevedlawrence commented on a change in pull request #463:
URL: https://github.com/apache/incubator-daffodil/pull/463#discussion_r555024283



##########
File path: LICENSE
##########
@@ -385,3 +385,28 @@ subcomponents is subject to the terms and conditions of the following licenses.
   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   SOFTWARE.
+
+  This product bundles content from the Schematron converters, including
+  the following files:
+    - daffodil-schematron/src/main/resources/iso-schematron-xslt2/ExtractSchFromXSD-2.xsl
+  The content is available under the MIT License:
+
+  Copyright (c) 2002-2010 Rick Jelliffe and Topologi Pty. Ltd.
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.

Review comment:
       Very minor, but we've been indenting the license 2 more spaces than the "This product bundels..." stuff so it's more clear where the license content is. Same with where the other license files.

##########
File path: daffodil-schematron/src/main/scala/org/apache/daffodil/validation/schematron/ClassPathUriResolver.scala
##########
@@ -31,9 +31,15 @@ final class ClassPathUriResolver(rulesDir: String, fallback: Option[URIResolver]
     Option(getClass.getClassLoader.getResourceAsStream(path)) match {
       case Some(is) => new StreamSource(is)
       case None =>
-        fallback.map(_.resolve(href, base)).getOrElse(
-          throw ValidatorInitializationException(s"schematron resource not found at $path")
-        )
+        // fallback #1;; try the raw classpath without the prefix
+        Option(getClass.getClassLoader.getResourceAsStream(href)) match {

Review comment:
       Just curious, what's the case were we need to resolve without the template dir? Is this because a DFDL schema might xs:include/xs:import other schemas that might have embedded schematron annotations? 

##########
File path: daffodil-cli/bin.LICENSE
##########
@@ -876,3 +876,28 @@ subcomponents is subject to the terms and conditions of the following licenses.
   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   SOFTWARE.
+
+  This product bundles content from the Schematron converters, including
+  the following files:
+    - daffodil-schematron/src/main/resources/iso-schematron-xslt2/ExtractSchFromXSD-2.xsl

Review comment:
       For the CLI license, the path to this file is actually slightly different and inside a jar. So this should be something like this:
   
   ```
   iso-schematron-xslt2/ExtractSchFromXSD-2.xsl in lib/org.apache.daffodil.daffodil-schematron-<VERSION>.jar
   ```
   
   See the W3C license part near the top of the license file for an example

##########
File path: daffodil-schematron/src/test/scala/org/apache/daffodil/validation/schematron/EmbeddedTesting.scala
##########
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.validation.schematron
+
+import org.apache.daffodil.sapi.Daffodil
+import org.apache.daffodil.sapi.DataProcessor
+import org.apache.daffodil.sapi.Diagnostic
+import org.apache.daffodil.sapi.ParseResult
+import org.apache.daffodil.sapi.infoset.XMLTextInfosetOutputter
+import org.apache.daffodil.sapi.io.InputSourceDataInputStream
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.validation.schematron.SchSource.Xsd
+import org.junit.Assert.assertFalse
+import org.junit.Assert.assertTrue
+
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
+import java.io.File
+
+trait EmbeddedTesting {
+  def withJpgSchema(f: Validation => Unit): Unit = withSchema("xsd/jpeg.dfdl.xsd")(f)
+  def withBmpSchema(f: Validation => Unit): Unit = withSchema("xsd/bmp.dfdl.xsd")(f)
+  def withGifSchema(f: Validation => Unit): Unit = withSchema("xsd/gif.dfdl.xsd")(f)

Review comment:
       We really shouldn't rely on external schemas for testing. I'd prefer we just remove this and ensure our included test files provide sufficient test coverage. Though I assume this means embedded schematron does work with work with those schemas, which is good to hear.

##########
File path: daffodil-schematron/src/main/resources/META-INF/LICENSE
##########
@@ -237,3 +237,28 @@ subcomponents is subject to the terms and conditions of the following licenses.
   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   SOFTWARE.
+
+  This product bundles content from the Schematron converters, including
+  the following files:
+    - daffodil-schematron/src/main/resources/iso-schematron-xslt2/ExtractSchFromXSD-2.xsl

Review comment:
       Like the above comment, this file has a different path since it's inside the daffodil-schematron jar. Unlike the CLI license, you don't need to include the jar since this LICENSE file is already in the jar. So this should just be:
   ```
   iso-schematron-xslt2/ExtractSchFromXSD-2.xsl
   ```
   

##########
File path: daffodil-schematron/src/test/resources/xsd/always-fails-1.dfdl.xsd
##########
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
+           xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
+           xmlns:sch="http://purl.oclc.org/dsdl/schematron">
+
+    <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+    <xs:annotation>
+        <xs:appinfo source="http://www.ogf.org/dfdl/">
+            <dfdl:format ref="GeneralFormat"/>
+        </xs:appinfo>
+    </xs:annotation>
+
+    <xs:element name="always-fails" type="xs:string" dfdl:lengthKind="delimited" dfdl:terminator="">
+        <xs:annotation>
+            <xs:appinfo>

Review comment:
       Interesting, so embdded schematron rules don't require a special appinfo like DFDL does? I guess the XSLT that extracts these just looks for any any elements that have the schematron namespace?

##########
File path: daffodil-schematron/src/test/scala/org/apache/daffodil/validation/schematron/TestEmbeddedSchematron.scala
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.validation.schematron
+
+import org.junit.Assert.assertFalse
+import org.junit.Assert.assertTrue
+import org.junit.Test
+
+import java.util.UUID
+
+class TestEmbeddedSchematron extends EmbeddedTesting {
+  @Test def variation1(): Unit = withSchema("xsd/embedded-1.dfdl.xsd") { f =>
+    assertTrue(f.parse(UUID.randomUUID.toString).validated)
+    assertFalse(f.parse(UUID.randomUUID.toString.drop(1)).validated)
+  }
+
+  @Test def variation2(): Unit = withSchema("xsd/embedded-2.dfdl.xsd") { f =>
+    assertTrue(f.parse(UUID.randomUUID.toString).validated)
+    assertFalse(f.parse(UUID.randomUUID.toString.drop(1)).validated)
+  }
+
+  @Test def variation3(): Unit = withSchema("xsd/embedded-3.dfdl.xsd") { f =>
+    f.parse(UUID.randomUUID.toString).diagnostics.foreach(println)
+    assertTrue(f.parse(UUID.randomUUID.toString).validated)
+    f.parse(UUID.randomUUID.toString).diagnostics.foreach(println)
+    assertFalse(f.parse(UUID.randomUUID.toString.drop(1)).validated)
+  }
+
+  @Test def testNeverFails(): Unit = withSchema("xsd/never-fails-1.dfdl.xsd") { f =>
+    val good = UUID.randomUUID.toString
+    assertTrue(f.parse(good).validated)
+  }
+
+  @Test def testAlwaysFails(): Unit = withSchema("xsd/always-fails-1.dfdl.xsd") { f =>
+    val good = UUID.randomUUID.toString
+    assertFalse(f.parse(good).validated)
+  }
+
+  @Test def testExtends(): Unit = withSchema("xsd/extends-1.dfdl.xsd") { f =>
+    assertTrue(f.parse("bob;l;smith").validated)
+    assertTrue(f.parse("bob;;smith").validated)
+    assertFalse(f.parse(";;smith").validated)
+    assertFalse(f.parse("bob;l;").validated)
+    assertFalse(f.parse(";l;").validated)
+  }
+
+  @Test def testNoNs1(): Unit = withSchema("xsd/without-ns-1.dfdl.xsd") { f =>
+    assertTrue(f.parse("0;1", Always).validated)
+    assertFalse(f.parse("2;1").validated)
+    assertFalse(f.parse("0;0").validated)
+  }
+
+  @Test def testWithNs1(): Unit = withSchema("xsd/with-ns-1.dfdl.xsd") { f =>
+    assertTrue(f.parse("0;1", Always).validated)
+    assertFalse(f.parse("2;1").validated)
+    assertFalse(f.parse("0;0").validated)
+  }
+}

Review comment:
       I would also like to see testing of the CLI for this feature. First so that it ensures the CLI works and can find things the same way the API can, but also because it acts as a good example of expected usage for the CLI. Could you add some tests in daffodil-cli to test this functionality?

##########
File path: daffodil-schematron/src/test/scala/org/apache/daffodil/validation/schematron/EmbeddedTesting.scala
##########
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.validation.schematron
+
+import org.apache.daffodil.sapi.Daffodil
+import org.apache.daffodil.sapi.DataProcessor
+import org.apache.daffodil.sapi.Diagnostic
+import org.apache.daffodil.sapi.ParseResult
+import org.apache.daffodil.sapi.infoset.XMLTextInfosetOutputter
+import org.apache.daffodil.sapi.io.InputSourceDataInputStream
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.validation.schematron.SchSource.Xsd
+import org.junit.Assert.assertFalse
+import org.junit.Assert.assertTrue
+
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
+import java.io.File
+
+trait EmbeddedTesting {
+  def withJpgSchema(f: Validation => Unit): Unit = withSchema("xsd/jpeg.dfdl.xsd")(f)
+  def withBmpSchema(f: Validation => Unit): Unit = withSchema("xsd/bmp.dfdl.xsd")(f)
+  def withGifSchema(f: Validation => Unit): Unit = withSchema("xsd/gif.dfdl.xsd")(f)
+
+
+  case class PR(r: ParseResult) {
+    def validated: Boolean = !r.isValidationError()
+    def diagnostics: Seq[Diagnostic] = r.getDiagnostics
+  }
+
+  sealed trait PrintInfosetMode
+  case object Quiet extends PrintInfosetMode
+  case object AnyError extends PrintInfosetMode
+  case object ValError extends PrintInfosetMode
+  case object ProcError extends PrintInfosetMode
+  case object Always extends PrintInfosetMode
+
+  case class Validation(dp: DataProcessor) {
+    def parse(str: String, verbose: PrintInfosetMode = Quiet): PR = withBytes(str.getBytes, verbose)
+
+    def withBytes(bytes: Array[Byte], verbose: PrintInfosetMode = Quiet): PR = {
+      val bos = new ByteArrayOutputStream()
+      val r1 = dp.parse(
+        new InputSourceDataInputStream(new ByteArrayInputStream(bytes)),
+        new XMLTextInfosetOutputter(bos, true))
+
+//      println(bos.toString)

Review comment:
       Remove println




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org