You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by ro...@apache.org on 2017/11/07 09:46:57 UTC

[sling-org-apache-sling-jcr-contentloader] 31/36: SLING-857 Apply patch by Vidar Ramdal (Thanks alot) to support XSLT for XML imports

This is an automated email from the ASF dual-hosted git repository.

rombert pushed a commit to annotated tag org.apache.sling.jcr.contentloader-2.0.4-incubator
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-jcr-contentloader.git

commit 3e3e882b821fef938030ea62fe264a65990ac6be
Author: Felix Meschberger <fm...@apache.org>
AuthorDate: Fri Feb 27 20:04:57 2009 +0000

    SLING-857 Apply patch by Vidar Ramdal (Thanks alot) to support XSLT
      for XML imports
    
    git-svn-id: https://svn.apache.org/repos/asf/incubator/sling/trunk/bundles/jcr/contentloader@748677 13f79535-47bb-0310-9956-ffa450edef68
---
 .../jcr/contentloader/internal/ContentReader.java  |  10 +-
 .../sling/jcr/contentloader/internal/Loader.java   |  23 +--
 .../contentloader/internal/readers/JsonReader.java |  20 ++-
 .../contentloader/internal/readers/XmlReader.java  | 185 +++++++++++++++++++--
 .../contentloader/internal/readers/ZipReader.java  |  55 +++---
 5 files changed, 232 insertions(+), 61 deletions(-)

diff --git a/src/main/java/org/apache/sling/jcr/contentloader/internal/ContentReader.java b/src/main/java/org/apache/sling/jcr/contentloader/internal/ContentReader.java
index 2d5aaf1..fef05d9 100644
--- a/src/main/java/org/apache/sling/jcr/contentloader/internal/ContentReader.java
+++ b/src/main/java/org/apache/sling/jcr/contentloader/internal/ContentReader.java
@@ -19,7 +19,7 @@
 package org.apache.sling.jcr.contentloader.internal;
 
 import java.io.IOException;
-import java.io.InputStream;
+import java.net.URL;
 
 import javax.jcr.RepositoryException;
 
@@ -30,13 +30,11 @@ import javax.jcr.RepositoryException;
 public interface ContentReader {
 
     /**
-     * Read the content from the input stream and create the
+     * Read the content from the URL and create the
      * content throught the provided content creator.
-     * The content reader should not close the input stream, this is
-     * done by the calling component!
-     * @param ins The input stream.
+     * @param url The input stream.
      * @throws IOException
      */
-    void parse(InputStream ins, ContentCreator creator) throws IOException, RepositoryException;
+    void parse(URL url, ContentCreator creator) throws IOException, RepositoryException;
 
 }
diff --git a/src/main/java/org/apache/sling/jcr/contentloader/internal/Loader.java b/src/main/java/org/apache/sling/jcr/contentloader/internal/Loader.java
index 8db1b94..ef134f6 100644
--- a/src/main/java/org/apache/sling/jcr/contentloader/internal/Loader.java
+++ b/src/main/java/org/apache/sling/jcr/contentloader/internal/Loader.java
@@ -482,8 +482,7 @@ public class Loader {
             }
 
             this.contentCreator.prepareParsing(parent, toPlainName(name));
-            ins = resourceUrl.openStream();
-            nodeReader.parse(ins, this.contentCreator);
+            nodeReader.parse(resourceUrl, this.contentCreator);
 
             return this.contentCreator.getRootNode();
         } catch (RepositoryException re) {
@@ -690,16 +689,16 @@ public class Loader {
             // the xml might not be System or Document View export, fall back
             // to old-style XML reading
             log.info(
-                "importSystemView: XML {} does not seem to be system view export, trying old style",
-                nodeXML);
+                "importSystemView: XML {} does not seem to be system view export, trying old style; cause: {}",
+                nodeXML, isde.toString());
             return null;
 
         } catch (RepositoryException re) {
 
             // any other repository related issue...
             log.info(
-                "importSystemView: Repository issue loading XML {}, trying old style",
-                nodeXML);
+                "importSystemView: Repository issue loading XML {}, trying old style; cause: {}",
+                nodeXML, re.toString());
             return null;
 
         } finally {
@@ -760,25 +759,15 @@ public class Loader {
             return null;
         }
 
-        InputStream ins = null;
         try {
-
-            ins = descriptor.rootNodeDescriptor.openStream();
             this.contentCreator.prepareParsing(session.getRootNode(), null);
-            descriptor.nodeReader.parse(ins, this.contentCreator);
+            descriptor.nodeReader.parse(descriptor.rootNodeDescriptor, this.contentCreator);
 
             return descriptor.rootNodeDescriptor;
         } catch (RepositoryException re) {
             throw re;
         } catch (Throwable t) {
             throw new RepositoryException(t.getMessage(), t);
-        } finally {
-            if (ins != null) {
-                try {
-                    ins.close();
-                } catch (IOException ignore) {
-                }
-            }
         }
 
     }
diff --git a/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/JsonReader.java b/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/JsonReader.java
index 19d7277..8a4c441 100644
--- a/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/JsonReader.java
+++ b/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/JsonReader.java
@@ -68,10 +68,25 @@ public class JsonReader implements ContentReader {
     };
 
     /**
-     * @see org.apache.sling.jcr.contentloader.internal.ContentReader#parse(java.io.InputStream, org.apache.sling.jcr.contentloader.internal.ContentCreator)
+     * @see org.apache.sling.jcr.contentloader.internal.ContentReader#parse(java.net.URL, org.apache.sling.jcr.contentloader.internal.ContentCreator)
      */
-    public void parse(InputStream ins, ContentCreator contentCreator)
+    public void parse(java.net.URL url, ContentCreator contentCreator)
     throws IOException, RepositoryException {
+        InputStream ins = null;
+        try {
+            ins = url.openStream();
+            parse(ins, contentCreator);
+        } finally {
+            if (ins != null) {
+                try {
+                    ins.close();
+                } catch (IOException ignore) {
+                }
+            }
+        }
+    }
+
+    public void parse(InputStream ins, ContentCreator contentCreator) throws IOException, RepositoryException {
         try {
             String jsonString = toString(ins).trim();
             if (!jsonString.startsWith("{")) {
@@ -80,7 +95,6 @@ public class JsonReader implements ContentReader {
 
             JSONObject json = new JSONObject(jsonString);
             this.createNode(null, json, contentCreator);
-
         } catch (JSONException je) {
             throw (IOException) new IOException(je.getMessage()).initCause(je);
         }
diff --git a/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/XmlReader.java b/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/XmlReader.java
index af3fa6a..6b6fcaf 100644
--- a/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/XmlReader.java
+++ b/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/XmlReader.java
@@ -18,13 +18,29 @@
  */
 package org.apache.sling.jcr.contentloader.internal.readers;
 
+import java.io.BufferedInputStream;
+import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+import java.net.URL;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import javax.jcr.PropertyType;
 import javax.jcr.RepositoryException;
+import javax.xml.transform.Source;
+import javax.xml.transform.Templates;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
 
 import org.apache.sling.jcr.contentloader.internal.ContentCreator;
 import org.apache.sling.jcr.contentloader.internal.ContentReader;
@@ -73,6 +89,7 @@ public class XmlReader implements ContentReader {
      * --> </properties> </node>
      */
 
+    /** default log */
     private static final String ELEM_NODE = "node";
 
     private static final String ELEM_PRIMARY_NODE_TYPE = "primaryNodeType";
@@ -89,6 +106,10 @@ public class XmlReader implements ContentReader {
 
     private static final String ELEM_TYPE = "type";
 
+    private static final String XML_STYLESHEET_PROCESSING_INSTRUCTION = "xml-stylesheet";
+
+    private static final String HREF_ATTRIBUTE = "href";
+
     public static final ImportProvider PROVIDER = new ImportProvider() {
         private XmlReader xmlReader;
 
@@ -103,7 +124,6 @@ public class XmlReader implements ContentReader {
             return xmlReader;
         }
     };
-
     private KXmlParser xmlParser;
 
     XmlReader() {
@@ -114,24 +134,31 @@ public class XmlReader implements ContentReader {
 
 
     /**
-     * @see org.apache.sling.jcr.contentloader.internal.ContentReader#parse(java.io.InputStream, org.apache.sling.jcr.contentloader.internal.ContentCreator)
+     * @see org.apache.sling.jcr.contentloader.internal.ContentReader#parse(java.net.URL, org.apache.sling.jcr.contentloader.internal.ContentCreator)
      */
-    public synchronized void parse(InputStream ins, ContentCreator creator)
-    throws IOException, RepositoryException {
+    public synchronized void parse(java.net.URL url, ContentCreator creator)
+            throws IOException, RepositoryException {
+        BufferedInputStream bufferedInput = null;
         try {
-            this.parseInternal(ins, creator);
+            // We need to buffer input, so that we can reset the stream if we encounter an XSL stylesheet reference
+            bufferedInput = new BufferedInputStream(url.openStream());
+            parseInternal(bufferedInput, creator, url);
         } catch (XmlPullParserException xppe) {
             throw (IOException) new IOException(xppe.getMessage()).initCause(xppe);
+        } finally {
+            closeStream(bufferedInput);
         }
     }
 
-    private void parseInternal(InputStream ins, ContentCreator creator)
-    throws IOException, XmlPullParserException, RepositoryException {
+    private void parseInternal(InputStream bufferedInput, ContentCreator creator, java.net.URL xmlLocation) throws XmlPullParserException, IOException, RepositoryException {
         final StringBuffer contentBuffer = new StringBuffer();
-
+        // Mark the beginning of the stream. We assume that if there's an XSL processing instruction,
+        // it will occur in the first gulp - which makes sense, as processing instructions must be
+        // specified before the root elemeent of an XML file.
+        bufferedInput.mark(bufferedInput.available());
         // set the parser input, use null encoding to force detection with
         // <?xml?>
-        this.xmlParser.setInput(ins, null);
+        this.xmlParser.setInput(bufferedInput, null);
 
         NodeDescription.SHARED.clear();
         PropertyDescription.SHARED.clear();
@@ -140,8 +167,26 @@ public class XmlReader implements ContentReader {
         PropertyDescription currentProperty = null;
         String currentElement;
 
+
         int eventType = this.xmlParser.getEventType();
         while (eventType != XmlPullParser.END_DOCUMENT) {
+            if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) {
+                ProcessingInstruction pi = new ProcessingInstruction(this.xmlParser.getText());
+                // Look for a reference to an XSL stylesheet
+                if (pi.getName().equals(XML_STYLESHEET_PROCESSING_INSTRUCTION)) {
+                    // Rewind the input stream to the beginning, so that it can be transformed with XSL
+                    bufferedInput.reset();
+                    // Pipe the XML input through the XSL transformer
+                    XslTransformerStream transformerStream = new XslTransformerStream(bufferedInput, pi.getAttribute(HREF_ATTRIBUTE), xmlLocation);
+                    // Start the transformer thread
+                    transformerStream.startTransform();
+                    // Re-run the XML parser, now with the transformed XML
+                    parseInternal(transformerStream, creator, xmlLocation);
+                    transformerStream.close();
+                    return;
+
+                }
+            }
             if (eventType == XmlPullParser.START_TAG) {
 
                 currentElement = this.xmlParser.getName();
@@ -196,11 +241,90 @@ public class XmlReader implements ContentReader {
                     currentNode.addMixinType(content);
                 }
 
-            } else if (eventType == XmlPullParser.TEXT) {
+            } else if (eventType == XmlPullParser.TEXT || eventType == XmlPullParser.CDSECT) {
                 contentBuffer.append(this.xmlParser.getText());
             }
 
-            eventType = this.xmlParser.next();
+            eventType = this.xmlParser.nextToken();
+        }
+    }
+
+    /**
+     * Takes an XML input stream and pipes it through an XSL transformer.
+     * Callers should call {@link #startTransform} before trying to use the stream, or the caller will wait indefinately for input.
+     */
+    private static class XslTransformerStream extends PipedInputStream {
+        private InputStream inputXml;
+        private String xslHref;
+        private Thread transformerThread;
+        private PipedOutputStream pipedOut;
+        private URL xmlLocation;
+
+        /**
+         * Instantiate the XslTransformerStream.
+         * @param inputXml XML to be transformed.
+         * @param xslHref Path to an XSL stylesheet
+         * @param xmlLocation
+         * @throws IOException
+         */
+        public XslTransformerStream(InputStream inputXml, String xslHref, URL xmlLocation) throws IOException {
+            super();
+            this.inputXml = inputXml;
+            this.xslHref = xslHref;
+            this.transformerThread = null;
+            this.pipedOut = new PipedOutputStream(this);
+            this.xmlLocation = xmlLocation;
+        }
+
+        /**
+         * Starts the XSL transformer in a new thread, so that it can pipe its output to our <code>PipedInputStream</code>.
+         * @throws IOException
+         */
+        public void startTransform() throws IOException {
+            final URL xslResource = new java.net.URL(xmlLocation, this.xslHref);
+
+/*
+            if (xslResource == null) {
+                throw new IOException("Could not find " + xslHref);
+            }
+*/
+
+            transformerThread = new Thread(
+                    new Runnable() {
+                        public void run() {
+                            try {
+                                Source xml = new StreamSource(inputXml);
+                                Source xsl = new StreamSource(xslResource.toExternalForm());
+                                final StreamResult streamResult;
+                                final Templates templates = TransformerFactory.newInstance().newTemplates(xsl);
+                                streamResult = new StreamResult(pipedOut);
+                                templates.newTransformer().transform(xml, streamResult);
+                            } catch (TransformerConfigurationException e) {
+                                throw new RuntimeException("Error initializing XSL transformer", e);
+                            } catch (TransformerException e) {
+                                throw new RuntimeException("Error transforming", e);
+                            } finally {
+                                closeStream(pipedOut);
+                            }
+                        }
+                    }
+                    , "XslTransformerThread");
+            transformerThread.start();
+        }
+
+
+    }
+
+    /**
+     * Utility function to close a stream if it is still open.
+     * @param closeable Stream to close
+     */
+    private static void closeStream(Closeable closeable) {
+        if (closeable != null) {
+            try {
+                closeable.close();
+            } catch (IOException ignore) {
+            }
         }
     }
 
@@ -213,7 +337,7 @@ public class XmlReader implements ContentReader {
         public List<String> mixinTypes;
 
         public static NodeDescription create(NodeDescription desc, ContentCreator creator)
-        throws RepositoryException {
+                throws RepositoryException {
             if ( desc != null ) {
                 creator.createNode(desc.name, desc.primaryNodeType, desc.getMixinTypes());
                 desc.clear();
@@ -250,7 +374,7 @@ public class XmlReader implements ContentReader {
         public static PropertyDescription SHARED = new PropertyDescription();
 
         public static PropertyDescription create(PropertyDescription desc, ContentCreator creator)
-        throws RepositoryException {
+                throws RepositoryException {
             int type = (desc.type == null ? PropertyType.STRING : PropertyType.valueFromName(desc.type));
             if ( desc.isMultiValue ) {
                 creator.createProperty(desc.name, type, desc.getPropertyValues());
@@ -293,4 +417,39 @@ public class XmlReader implements ContentReader {
             this.isMultiValue = false;
         }
     }
+
+    /**
+     * Represents an XML processing instruction.<br />
+     * A processing instruction like <code>&lt;?xml-stylesheet href="stylesheet.xsl" type="text/css"?&gt</code>
+     * will have <code>name</code> == <code>"xml-stylesheet"</code> and two attributes: <code>href</code> and <code>type</code>.
+     */
+    private static class ProcessingInstruction {
+
+        private Map<String, String> attributes = new HashMap<String, String>();
+        private static final Pattern ATTRIBUTE_PATTERN = Pattern.compile("\\s(.[^=\\s]*)\\s?=\\s?\"(.[^\"]*)\"");
+        private static final Pattern NAME_PATTERN = Pattern.compile("^(.[^\\s\\?>]*)");
+        private String name;
+
+        public ProcessingInstruction(String text) throws IOException {
+            final Matcher nameMatcher = NAME_PATTERN.matcher(text);
+            if (!nameMatcher.find()) {
+                throw new IOException("Malformed processing instruction: " + text);
+            }
+
+            this.name = nameMatcher.group(1);
+            final Matcher attributeMatcher = ATTRIBUTE_PATTERN.matcher(text);
+            while (attributeMatcher.find()) {
+                attributes.put(attributeMatcher.group(1), attributeMatcher.group(2));
+            }
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public String getAttribute(String key) {
+            return this.attributes.get(key);
+        }
+
+    }
 }
diff --git a/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/ZipReader.java b/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/ZipReader.java
index 4308f4d..e4debee 100644
--- a/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/ZipReader.java
+++ b/src/main/java/org/apache/sling/jcr/contentloader/internal/readers/ZipReader.java
@@ -70,34 +70,45 @@ public class ZipReader implements ContentReader {
     }
 
     /**
-     * @see org.apache.sling.jcr.contentloader.internal.ContentReader#parse(java.io.InputStream, org.apache.sling.jcr.contentloader.internal.ContentCreator)
+     * @see org.apache.sling.jcr.contentloader.internal.ContentReader#parse(java.net.URL, org.apache.sling.jcr.contentloader.internal.ContentCreator)
      */
-    public void parse(InputStream ins, ContentCreator creator)
+    public void parse(java.net.URL url, ContentCreator creator)
     throws IOException, RepositoryException {
-        creator.createNode(null, NT_FOLDER, null);
-        final ZipInputStream zis = new ZipInputStream(ins);
-        ZipEntry entry;
-        do {
-            entry = zis.getNextEntry();
-            if ( entry != null ) {
-                if ( !entry.isDirectory() ) {
-                    String name = entry.getName();
-                    int pos = name.lastIndexOf('/');
-                    if ( pos != -1 ) {
-                        creator.switchCurrentNode(name.substring(0, pos), NT_FOLDER);
-                    }
-                    creator.createFileAndResourceNode(name, new CloseShieldInputStream(zis), null, entry.getTime());
-                    creator.finishNode();
-                    creator.finishNode();
-                    if ( pos != -1 ) {
+        InputStream ins = null;
+        try {
+            ins = url.openStream();
+            creator.createNode(null, NT_FOLDER, null);
+            final ZipInputStream zis = new ZipInputStream(ins);
+            ZipEntry entry;
+            do {
+                entry = zis.getNextEntry();
+                if ( entry != null ) {
+                    if ( !entry.isDirectory() ) {
+                        String name = entry.getName();
+                        int pos = name.lastIndexOf('/');
+                        if ( pos != -1 ) {
+                            creator.switchCurrentNode(name.substring(0, pos), NT_FOLDER);
+                        }
+                        creator.createFileAndResourceNode(name, new CloseShieldInputStream(zis), null, entry.getTime());
+                        creator.finishNode();
                         creator.finishNode();
+                        if ( pos != -1 ) {
+                            creator.finishNode();
+                        }
                     }
+                    zis.closeEntry();
                 }
-                zis.closeEntry();
-            }
 
-        } while ( entry != null );
-        creator.finishNode();
+            } while ( entry != null );
+            creator.finishNode();
+        } finally {
+            if (ins != null) {
+                try {
+                    ins.close();
+                } catch (IOException ignore) {
+                }
+            }
+        }
     }
 
 }

-- 
To stop receiving notification emails like this one, please contact
"commits@sling.apache.org" <co...@sling.apache.org>.