You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by kw...@apache.org on 2022/02/21 14:18:03 UTC
[jackrabbit-filevault] branch master updated: JCRVLT-357 add method to DocViewParser for converting file name to root (#211)
This is an automated email from the ASF dual-hosted git repository.
kwin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/jackrabbit-filevault.git
The following commit(s) were added to refs/heads/master by this push:
new 4f7683e JCRVLT-357 add method to DocViewParser for converting file name to root (#211)
4f7683e is described below
commit 4f7683ec0dce669f3c84d6f95ede85a97503265f
Author: Konrad Windszus <kw...@apache.org>
AuthorDate: Mon Feb 21 15:17:10 2022 +0100
JCRVLT-357 add method to DocViewParser for converting file name to root (#211)
node path
---
.../jackrabbit/vault/fs/io/DocViewParser.java | 73 +++++++++++++++++++++-
.../jackrabbit/vault/fs/io/DocViewParserTest.java | 66 +++++++++++++++++++
.../org/apache/jackrabbit/vault/fs/io/docview.xml | 30 +++++++++
.../apache/jackrabbit/vault/fs/io/non-docview.xml | 18 ++++++
.../spi/impl/DocumentViewParserValidator.java | 56 +----------------
.../spi/impl/DocumentViewParserValidatorTest.java | 41 ------------
6 files changed, 187 insertions(+), 97 deletions(-)
diff --git a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java
index a9438c0..570438f 100644
--- a/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java
+++ b/vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/DocViewParser.java
@@ -16,10 +16,14 @@
*/
package org.apache.jackrabbit.vault.fs.io;
+import java.io.BufferedInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Objects;
import javax.jcr.Session;
@@ -28,20 +32,26 @@ import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
+import org.apache.commons.io.FilenameUtils;
import org.apache.jackrabbit.spi.commons.namespace.NamespaceResolver;
import org.apache.jackrabbit.spi.commons.namespace.SessionNamespaceResolver;
import org.apache.jackrabbit.vault.fs.impl.io.DocViewSAXHandler;
+import org.apache.jackrabbit.vault.util.Constants;
+import org.apache.jackrabbit.vault.util.PlatformNameFormat;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
+import org.osgi.annotation.versioning.ProviderType;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
/**
- * This is a thread-safe SAX parser which deals with docview files and passes them to a given {@link DocViewParserHandler}.
+ * This is a thread-safe SAX parser which deals with <a href="https://jackrabbit.apache.org/filevault/docview.html">FileVault Document View XML files</a>
+ * and passes them to a given {@link DocViewParserHandler}.
*
*/
-public class DocViewParser {
+@ProviderType
+public final class DocViewParser {
private final @Nullable NamespaceResolver resolver;
@@ -126,6 +136,7 @@ public class DocViewParser {
/**
*
+ * Checks if the given {@link InputSource} is complying with the Document View XML format.
* @param source the source to analyze
* @return {@code true} in case the given source is Document View XML format
* @throws IOException if an I/O error occurs
@@ -139,12 +150,13 @@ public class DocViewParser {
}
/**
+ * Checks if the given {@link Reader} is complying with the Document View XML format.
* Don't forget to reset the reader or use a new reader before parsing the xml.
* @param reader the reader from which to read the XML
* @return {@code true} in case the given source is Document View XML format
* @throws IOException
*/
- public static boolean isDocView(Reader reader) throws IOException {
+ private static boolean isDocView(Reader reader) throws IOException {
// read a couple of chars...1024 should be enough
char[] buffer = new char[1024];
int pos = 0;
@@ -160,6 +172,61 @@ public class DocViewParser {
return str.contains("<jcr:root") && str.contains("\"http://www.jcp.org/jcr/1.0\"");
}
+
+ /**
+ * Converts the given file path to the absolute root node path given that {@link InputStream} is complying with the Document View XML format.
+ * @param input the given input is automatically reset after this method returns
+ * @param filePath the file path of the file containing the potential docview xml, must be relative to the jcr_root directory
+ * @return either the absolute repository path of the root node of the given docview xml or {@code null} if no docview xml given
+ * @throws IOException */
+ public static @Nullable String getDocumentViewXmlRootNodePath(InputStream input, Path filePath) throws IOException {
+ if (filePath.isAbsolute()) {
+ throw new IllegalArgumentException("The filePath parameter must be given as relative path!");
+ }
+
+ if (!(input instanceof BufferedInputStream)) {
+ input = new BufferedInputStream(input, 1024);
+ }
+ Path name = filePath.getFileName();
+ Path rootPath = null;
+ int nameCount = filePath.getNameCount();
+ if (name.equals(Paths.get(Constants.DOT_CONTENT_XML))) {
+ // get parent path
+ if (nameCount > 1) {
+ rootPath = filePath.getParent();
+ } else {
+ rootPath = Paths.get("");
+ }
+ // correct suffix matching
+ } else if (name.toString().endsWith(".xml")) {
+
+ // we need to rely on a buffered input stream to be able to reset it later
+ input.mark(1024);
+ // analyze content
+ // this closes the input source internally, therefore protect against closing
+ // make sure to initialize the SLF4J logger appropriately (for the XmlAnalyzer)
+ try {
+ if (DocViewParser.isDocView(new InputStreamReader(input, StandardCharsets.UTF_8))) {
+ // remove .xml extension
+ String fileName = filePath.getFileName().toString();
+ fileName = fileName.substring(0, fileName.length() - ".xml".length());
+ if (nameCount > 1) {
+ rootPath = filePath.getParent().resolve(fileName);
+ } else {
+ rootPath = Paths.get(fileName);
+ }
+ }
+ } finally {
+ input.reset();
+ }
+ }
+ if (rootPath == null) {
+ return null;
+ }
+ String platformPath = FilenameUtils.separatorsToUnix(rootPath.toString());
+ return "/" + PlatformNameFormat.getRepositoryPath(platformPath, true);
+ }
+
private SAXParser createSaxParser() throws ParserConfigurationException, SAXException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
diff --git a/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/io/DocViewParserTest.java b/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/io/DocViewParserTest.java
new file mode 100644
index 0000000..9d71587
--- /dev/null
+++ b/vault-core/src/test/java/org/apache/jackrabbit/vault/fs/io/DocViewParserTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.vault.fs.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class DocViewParserTest {
+
+ @Test
+ public void testGetDocumentViewXmlRootPathFromContentXml() throws IOException {
+ Path filePath = Paths.get("test", "parent", ".content.xml");
+ Assert.assertEquals("/test/parent", DocViewParser.getDocumentViewXmlRootNodePath(null, filePath));
+ }
+
+ @Test
+ public void testGetDocumentViewXmlRootPathFromContentXmlBelowDotDir() throws IOException {
+ // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+ Path filePath = Paths.get("test", "parent.dir", ".content.xml");
+ Assert.assertEquals("/test/parent", DocViewParser.getDocumentViewXmlRootNodePath(null, filePath));
+ }
+
+ @Test
+ public void testGetDocumentViewXmlRootPathFromEscapedFilename() throws IOException {
+ // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+ Path filePath = Paths.get("test", "parent", "_cq_test%3aimage.xml");
+ try (InputStream inputStream = getClass().getResourceAsStream("docview.xml")) {
+ Assert.assertEquals("/test/parent/cq:test:image", DocViewParser.getDocumentViewXmlRootNodePath(inputStream, filePath));
+ }
+ }
+
+ @Test
+ public void testGetDocumentViewXmlRootPathFromNonXmlFile() throws IOException {
+ // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+ Path filePath = Paths.get("test", "parent", "test.jpg");
+ Assert.assertNull(DocViewParser.getDocumentViewXmlRootNodePath(null, filePath));
+ }
+
+ @Test
+ public void testGetDocumentViewXmlRootPathFromNonDocviewXmlFile() throws IOException {
+ // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
+ Path filePath = Paths.get("test", "parent", "test.xml");
+ try (InputStream inputStream = getClass().getResourceAsStream("non-docview.xml")) {
+ Assert.assertNull(DocViewParser.getDocumentViewXmlRootNodePath(inputStream, filePath));
+ }
+ }
+}
diff --git a/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/docview.xml b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/docview.xml
new file mode 100644
index 0000000..ab1236a
--- /dev/null
+++ b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/docview.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<jcr:root xmlns:jcr="http://www.jcp.org/jcr/1.0"
+ jcr:primaryType="nt:unstructured"
+ jcr:title="Test node">
+ <!-- My Comment -->
+ <testChild
+ jcr:primaryType="nt:unstructured"
+ foo="bar"
+ TestProperty="test"
+ testProperty="test">
+ <!-- MyComment2 -->
+ <grandchild/>
+ </testChild>
+</jcr:root>
diff --git a/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/non-docview.xml b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/non-docview.xml
new file mode 100644
index 0000000..b4c5a55
--- /dev/null
+++ b/vault-core/src/test/resources/org/apache/jackrabbit/vault/fs/io/non-docview.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<some-element></some-element>
\ No newline at end of file
diff --git a/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java b/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java
index 7b66d33..c915041 100644
--- a/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java
+++ b/vault-validation/src/main/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidator.java
@@ -16,13 +16,9 @@
*/
package org.apache.jackrabbit.vault.validation.spi.impl;
-import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
-import java.nio.file.Paths;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
@@ -35,7 +31,6 @@ import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.jackrabbit.spi.commons.namespace.NamespaceResolver;
import org.apache.jackrabbit.vault.fs.io.DocViewParser;
import org.apache.jackrabbit.vault.fs.io.DocViewParser.XmlParseException;
-import org.apache.jackrabbit.vault.util.Constants;
import org.apache.jackrabbit.vault.validation.ValidationExecutor;
import org.apache.jackrabbit.vault.validation.ValidationViolation;
import org.apache.jackrabbit.vault.validation.impl.util.EnhancedBufferedInputStream;
@@ -100,9 +95,9 @@ public class DocumentViewParserValidator implements GenericJcrDataValidator {
// wrap input stream as buffered input stream (to be able to reset it and for performance reasons)
final EnhancedBufferedInputStream bufferedInput = new EnhancedBufferedInputStream(input);
- Path documentViewXmlRootPath = getDocumentViewXmlRootPath(bufferedInput, filePath);
- if (documentViewXmlRootPath != null) {
- messages.addAll(validateDocumentViewXml(bufferedInput, filePath, basePath, ValidationExecutor.filePathToNodePath(documentViewXmlRootPath),
+ String documentViewXmlRootNodePath = DocViewParser.getDocumentViewXmlRootNodePath(bufferedInput, filePath);
+ if (documentViewXmlRootNodePath != null) {
+ messages.addAll(validateDocumentViewXml(bufferedInput, filePath, basePath, documentViewXmlRootNodePath,
nodePathsAndLineNumbers));
} else {
@@ -113,51 +108,6 @@ public class DocumentViewParserValidator implements GenericJcrDataValidator {
return messages;
}
- /** @param input the given input stream must be reset later on
- * @param path
- * @return either the path of the root node of the given docview xml or {@code null} if no docview xml given
- * @throws IOException */
- static Path getDocumentViewXmlRootPath(BufferedInputStream input, Path path) throws IOException {
- Path name = path.getFileName();
- Path rootPath = null;
-
- int nameCount = path.getNameCount();
- if (name.equals(Paths.get(Constants.DOT_CONTENT_XML))) {
- if (nameCount > 1) {
- rootPath = path.subpath(0, nameCount - 1);
- // fix root mapping for http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
- if (rootPath.toString().endsWith(EXTENDED_FILE_AGGREGATE_FOLDER_SUFFIX)) {
- rootPath = Paths.get(rootPath.toString().substring(0, rootPath.toString().length() - EXTENDED_FILE_AGGREGATE_FOLDER_SUFFIX.length()));
- }
- } else {
- rootPath = Paths.get("");
- }
- // correct suffix matching
- } else if (name.toString().endsWith(".xml")) {
-
- // we need to rely on a buffered input stream to be able to reset it later
- input.mark(1024);
- // analyze content
- // this closes the input source internally, therefore protect against closing
- // make sure to initialize the SLF4J logger appropriately (for the XmlAnalyzer)
- try {
- if (DocViewParser.isDocView(new InputStreamReader(input, StandardCharsets.UTF_8))) {
- // remove .xml extension
- String fileName = path.getFileName().toString();
- fileName = fileName.substring(0, fileName.length() - ".xml".length());
- if (nameCount > 1) {
- rootPath = path.subpath(0, nameCount - 1).resolve(fileName);
- } else {
- rootPath = Paths.get(fileName);
- }
- }
- } finally {
- input.reset();
- }
- }
- return rootPath;
- }
-
protected Collection<ValidationMessage> validateDocumentViewXml(InputStream input, @NotNull Path filePath, @NotNull Path basePath, String rootNodePath,
Map<String, Integer> nodePathsAndLineNumbers) throws IOException {
List<ValidationMessage> enrichedMessages = new LinkedList<>();
diff --git a/vault-validation/src/test/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidatorTest.java b/vault-validation/src/test/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidatorTest.java
deleted file mode 100644
index 6dbdb9f..0000000
--- a/vault-validation/src/test/java/org/apache/jackrabbit/vault/validation/spi/impl/DocumentViewParserValidatorTest.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.jackrabbit.vault.validation.spi.impl;
-
-import java.io.IOException;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-
-public class DocumentViewParserValidatorTest {
-
- @Test
- public void testGetDocumentViewXmlRootPathFromContentXml() throws IOException {
- Path filePath = Paths.get("test", "parent", ".content.xml");
- Assert.assertEquals(Paths.get("test", "parent"), DocumentViewParserValidator.getDocumentViewXmlRootPath(null, filePath));
- }
-
- @Test
- public void testGetDocumentViewXmlRootPathFromContentXmlBelowDotDir() throws IOException {
- // http://jackrabbit.apache.org/filevault/vaultfs.html#Extended_File_aggregates
- Path filePath = Paths.get("test", "parent.dir", ".content.xml");
- Assert.assertEquals(Paths.get("test", "parent"), DocumentViewParserValidator.getDocumentViewXmlRootPath(null, filePath));
- }
-}