You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by kw...@apache.org on 2022/02/21 14:18:03 UTC
[jackrabbit-filevault] branch master updated: JCRVLT-357 add method to DocViewParser for converting file name to root (#211)

This is an automated email from the ASF dual-hosted git repository.

kwin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/jackrabbit-filevault.git


The following commit(s) were added to refs/heads/master by this push:
     new 4f7683e  JCRVLT-357 add method to DocViewParser for converting file name to root (#211)
4f7683e is described below

commit 4f7683ec0dce669f3c84d6f95ede85a97503265f
Author: Konrad Windszus <kw...@apache.org>
AuthorDate: Mon Feb 21 15:17:10 2022 +0100

    JCRVLT-357 add method to DocViewParser for converting file name to root (#211)
    
    node path
---
 .../jackrabbit/vault/fs/io/DocViewParser.java      | 73 +++++++++++++++++++++-
 .../jackrabbit/vault/fs/io/DocViewParserTest.java  | 66 +++++++++++++++++++
 .../org/apache/jackrabbit/vault/fs/io/docview.xml  | 30 +++++++++
 .../apache/jackrabbit/vault/fs/io/non-docview.xml  | 18 ++++++
 .../spi/impl/DocumentViewParserValidator.java      | 56 +----------------
 .../spi/impl/DocumentViewParserValidatorTest.java  | 41 ------------
 6 files changed, 187 insertions(+), 97 deletions(-)

diff --git a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java
index a9438c0..570438f 100644
--- a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java
+++ b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java
@@ -16,10 +16,14 @@
  */
 package org.apache.jackrabbit.vault.fs.io;
 
+import java.io.BufferedInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.Objects;
 
 import javax.jcr.Session;
@@ -28,20 +32,26 @@ import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 
+import org.apache.commons.io.FilenameUtils;
 import org.apache.jackrabbit.spi.commons.namespace.NamespaceResolver;
 import org.apache.jackrabbit.spi.commons.namespace.SessionNamespaceResolver;
 import org.apache.jackrabbit.vault.fs.impl.io.DocViewSAXHandler;
+import org.apache.jackrabbit.vault.util.Constants;
+import org.apache.jackrabbit.vault.util.PlatformNameFormat;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
+import org.osgi.annotation.versioning.ProviderType;
 import org.xml.sax.InputSource;
 import org.xml.sax.Locator;
 import org.xml.sax.SAXException;
 
 /**
- * This is a thread-safe SAX parser which deals with docview files and passes them to a given {@link DocViewParserHandler}.
+ * This is a thread-safe SAX parser which deals with <a href="https://jackrabbit.apache.org/filevault/docview.html">FileVault Document View XML files</a>
+ * and passes them to a given {@link DocViewParserHandler}.
  * 
  */
-public class DocViewParser {
+@ProviderType
+public final class DocViewParser {
 
     private final @Nullable NamespaceResolver resolver;
 
@@ -126,6 +136,7 @@ public class DocViewParser {
 
     /**
      *
+     * Checks if the given {@link InputSource} is complying with the Document View XML format.
      * @param source the source to analyze
      * @return {@code true} in case the given source is Document View XML format
      * @throws IOException if an I/O error occurs
@@ -139,12 +150,13 @@ public class DocViewParser {
     }
 
     /**
+     * Checks if the given {@link Reader} is complying with the Document View XML format.
      * Don't forget to reset the reader or use a new reader before parsing the xml.
      * @param reader the reader from which to read the XML
      * @return {@code true} in case the given source is Document View XML format
      * @throws IOException
      */
-    public static boolean isDocView(Reader reader) throws IOException {
+    private static boolean isDocView(Reader reader) throws IOException {
         // read a couple of chars...1024 should be enough
         char[] buffer = new char[1024];
         int pos = 0;
@@ -160,6 +172,61 @@ public class DocViewParser {
         return str.contains("<jcr:root") && str.contains("\"http://www.jcp.org/jcr/1.0\"");
     }
 
+
+    /**
+     * Converts the given file path to the absolute root node path given that {@link InputStream} is complying with the Document View XML format.
+     * @param input the given input is automatically reset after this method returns
+     * @param filePath the file path of the file containing the potential docview xml, must be relative to the jcr_root directory
+     * @return either the absolute repository path of the root node of the given docview xml or {@code null} if no docview xml given
+     * @throws IOException */
+    public static @Nullable String getDocumentViewXmlRootNodePath(InputStream input, Path filePath) throws IOException {
+        if (filePath.isAbsolute()) {
+            throw new IllegalArgumentException("The filePath parameter must be given as relative path!");
+        }
+        
+        if (!(input instanceof BufferedInputStream)) {
+            input = new BufferedInputStream(input, 1024);
+        }
+        Path name = filePath.getFileName();
+        Path rootPath = null;
+        int nameCount = filePath.getNameCount();
+        if (name.equals(Paths.get(Constants.DOT_CONTENT_XML))) {
+            // get parent path
+            if (nameCount > 1) {
+                rootPath = filePath.getParent();
+            } else {
+                rootPath = Paths.get("");
+            } 
+            // correct suffix matching
+        } else if (name.toString().endsWith(".xml")) {
+
+            // we need to rely on a buffered input stream to be able to reset it later
+            input.mark(1024);
+            // analyze content
+            // this closes the input source internally, therefore protect against closing
+            // make sure to initialize the SLF4J logger appropriately (for the XmlAnalyzer)
+            try {
+                if (DocViewParser.isDocView(new InputStreamReader(input, StandardCharsets.UTF_8))) {
+                    //  remove .xml extension
+                    String fileName = filePath.getFileName().toString();
+                    fileName = fileName.substring(0, fileName.length() - ".xml".length());
+                    if (nameCount > 1) {
+                        rootPath = filePath.getParent().resolve(fileName);
+                    } else {
+                        rootPath = Paths.get(fileName);
+                    }
+                }
+            } finally {
+                input.reset();
+            }
+        }
+        if (rootPath == null) {
+            return null;
+        }
+        String platformPath = FilenameUtils.separatorsToUnix(rootPath.toString());
+        return "/" + PlatformNameFormat.getRepositoryPath(platformPath, true);
+    }
+
     private SAXParser createSaxParser() throws ParserConfigurationException, SAXException {
         SAXParserFactory factory = SAXParserFactory.newInstance();
         factory.setNamespaceAware(true);
diff --git a/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/io/DocViewParserTest.java b/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/io/DocViewParserTest.java
new file mode 100644
index 0000000..9d71587
--- /dev/null
+++ b/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/io/DocViewParserTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.vault.fs.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class DocViewParserTest {
+
+    @Test
+    public void testGetDocumentViewXmlRootPathFromContentXml() throws IOException {
+        Path filePath = Paths.get("test", "parent", ".content.xml");
+        Assert.assertEquals("/test/parent", DocViewParser.getDocumentViewXmlRootNodePath(null, filePath));
+    }
+
+    @Test
+    public void testGetDocumentViewXmlRootPathFromContentXmlBelowDotDir() throws IOException {
+        // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+        Path filePath = Paths.get("test", "parent.dir", ".content.xml");
+        Assert.assertEquals("/test/parent", DocViewParser.getDocumentViewXmlRootNodePath(null, filePath));
+    }
+
+    @Test
+    public void testGetDocumentViewXmlRootPathFromEscapedFilename() throws IOException {
+        // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+        Path filePath = Paths.get("test", "parent", "_cq_test%3aimage.xml");
+        try (InputStream inputStream = getClass().getResourceAsStream("docview.xml")) {
+            Assert.assertEquals("/test/parent/cq:test:image", DocViewParser.getDocumentViewXmlRootNodePath(inputStream, filePath));
+        }
+    }
+
+    @Test
+    public void testGetDocumentViewXmlRootPathFromNonXmlFile() throws IOException {
+        // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+        Path filePath = Paths.get("test", "parent", "test.jpg");
+        Assert.assertNull(DocViewParser.getDocumentViewXmlRootNodePath(null, filePath));
+    }
+
+    @Test
+    public void testGetDocumentViewXmlRootPathFromNonDocviewXmlFile() throws IOException {
+        // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+        Path filePath = Paths.get("test", "parent", "test.xml");
+        try (InputStream inputStream = getClass().getResourceAsStream("non-docview.xml")) {
+            Assert.assertNull(DocViewParser.getDocumentViewXmlRootNodePath(inputStream, filePath));
+        }
+    }
+}
diff --git a/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/docview.xml b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/docview.xml
new file mode 100644
index 0000000..ab1236a
--- /dev/null
+++ b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/docview.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<jcr:root xmlns:jcr="http://www.jcp.org/jcr/1.0"
+    jcr:primaryType="nt:unstructured"
+    jcr:title="Test node">
+    <!-- My Comment -->
+    <testChild
+        jcr:primaryType="nt:unstructured"
+        foo="bar"
+        TestProperty="test"
+        testProperty="test">
+        <!-- MyComment2 -->
+        <grandchild/>
+    </testChild>
+</jcr:root>
diff --git a/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/non-docview.xml b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/non-docview.xml
new file mode 100644
index 0000000..b4c5a55
--- /dev/null
+++ b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/non-docview.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<some-element></some-element>
\ No newline at end of file
diff --git a/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java b/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java
index 7b66d33..c915041 100644
--- a/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java
+++ b/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java
@@ -16,13 +16,9 @@
  */
 package org.apache.jackrabbit.vault.validation.spi.impl;
 
-import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedList;
@@ -35,7 +31,6 @@ import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.jackrabbit.spi.commons.namespace.NamespaceResolver;
 import org.apache.jackrabbit.vault.fs.io.DocViewParser;
 import org.apache.jackrabbit.vault.fs.io.DocViewParser.XmlParseException;
-import org.apache.jackrabbit.vault.util.Constants;
 import org.apache.jackrabbit.vault.validation.ValidationExecutor;
 import org.apache.jackrabbit.vault.validation.ValidationViolation;
 import org.apache.jackrabbit.vault.validation.impl.util.EnhancedBufferedInputStream;
@@ -100,9 +95,9 @@ public class DocumentViewParserValidator implements GenericJcrDataValidator {
         // wrap input stream as buffered input stream (to be able to reset it and for performance reasons)
         final EnhancedBufferedInputStream bufferedInput = new EnhancedBufferedInputStream(input);
 
-        Path documentViewXmlRootPath = getDocumentViewXmlRootPath(bufferedInput, filePath);
-        if (documentViewXmlRootPath != null) {
-            messages.addAll(validateDocumentViewXml(bufferedInput, filePath, basePath, ValidationExecutor.filePathToNodePath(documentViewXmlRootPath),
+        String documentViewXmlRootNodePath = DocViewParser.getDocumentViewXmlRootNodePath(bufferedInput, filePath);
+        if (documentViewXmlRootNodePath != null) {
+            messages.addAll(validateDocumentViewXml(bufferedInput, filePath, basePath, documentViewXmlRootNodePath,
                             nodePathsAndLineNumbers));
             
         } else {
@@ -113,51 +108,6 @@ public class DocumentViewParserValidator implements GenericJcrDataValidator {
        return messages;
     }
 
-    /** @param input the given input stream must be reset later on
-     * @param path
-     * @return either the path of the root node of the given docview xml or {@code null} if no docview xml given
-     * @throws IOException */
-    static Path getDocumentViewXmlRootPath(BufferedInputStream input, Path path) throws IOException {
-        Path name = path.getFileName();
-        Path rootPath = null;
-
-        int nameCount = path.getNameCount();
-        if (name.equals(Paths.get(Constants.DOT_CONTENT_XML))) {
-            if (nameCount > 1) {
-                rootPath = path.subpath(0, nameCount - 1);
-                // fix root mapping for http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
-                if (rootPath.toString().endsWith(EXTENDED_FILE_AGGREGATE_FOLDER_SUFFIX)) {
-                    rootPath = Paths.get(rootPath.toString().substring(0, rootPath.toString().length() - EXTENDED_FILE_AGGREGATE_FOLDER_SUFFIX.length()));
-                }
-            } else {
-                rootPath = Paths.get("");
-            }
-            // correct suffix matching
-        } else if (name.toString().endsWith(".xml")) {
-
-            // we need to rely on a buffered input stream to be able to reset it later
-            input.mark(1024);
-            // analyze content
-            // this closes the input source internally, therefore protect against closing
-            // make sure to initialize the SLF4J logger appropriately (for the XmlAnalyzer)
-            try {
-                if (DocViewParser.isDocView(new InputStreamReader(input, StandardCharsets.UTF_8))) {
-                    //  remove .xml extension
-                    String fileName = path.getFileName().toString();
-                    fileName = fileName.substring(0, fileName.length() - ".xml".length());
-                    if (nameCount > 1) {
-                        rootPath = path.subpath(0, nameCount - 1).resolve(fileName);
-                    } else {
-                        rootPath = Paths.get(fileName);
-                    }
-                }
-            } finally {
-                input.reset();
-            }
-        }
-        return rootPath;
-    }
-
     protected Collection<ValidationMessage> validateDocumentViewXml(InputStream input, @NotNull Path filePath, @NotNull Path basePath, String rootNodePath,
             Map<String, Integer> nodePathsAndLineNumbers) throws IOException {
         List<ValidationMessage> enrichedMessages = new LinkedList<>();
diff --git a/vault-validation/src/test/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidatorTest.java b/vault-validation/src/test/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidatorTest.java
deleted file mode 100644
index 6dbdb9f..0000000
--- a/vault-validation/src/test/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidatorTest.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.jackrabbit.vault.validation.spi.impl;
-
-import java.io.IOException;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-
-public class DocumentViewParserValidatorTest {
-
-    @Test
-    public void testGetDocumentViewXmlRootPathFromContentXml() throws IOException {
-        Path filePath = Paths.get("test", "parent", ".content.xml");
-        Assert.assertEquals(Paths.get("test", "parent"), DocumentViewParserValidator.getDocumentViewXmlRootPath(null, filePath));
-    }
-
-    @Test
-    public void testGetDocumentViewXmlRootPathFromContentXmlBelowDotDir() throws IOException {
-        // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
-        Path filePath = Paths.get("test", "parent.dir", ".content.xml");
-        Assert.assertEquals(Paths.get("test", "parent"), DocumentViewParserValidator.getDocumentViewXmlRootPath(null, filePath));
-    }
-}