You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@lucene.apache.org by GitBox <gi...@apache.org> on 2021/02/05 22:54:17 UTC

[GitHub] [lucene-solr] muse-dev[bot] commented on a change in pull request #2306: SOLR-15121: Move XSLT (tr param) to scripting contrib

muse-dev[bot] commented on a change in pull request #2306:
URL: https://github.com/apache/lucene-solr/pull/2306#discussion_r571294563



##########
File path: solr/contrib/scripting/src/java/org/apache/solr/scripting/util/xslt/XSLTLoader.java
##########
@@ -0,0 +1,534 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.scripting.util.xslt;
+
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.dom.DOMResult;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.sax.SAXSource;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.io.IOUtils;
+import org.apache.solr.common.EmptyEntityResolver;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.UpdateParams;
+import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.common.util.XMLErrorLogger;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.handler.RequestHandlerUtils;
+import org.apache.solr.handler.UpdateRequestHandler;
+import org.apache.solr.handler.loader.ContentStreamLoader;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.CommitUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+import org.apache.solr.update.RollbackUpdateCommand;
+import org.apache.solr.update.processor.UpdateRequestProcessor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+
+import static org.apache.solr.common.params.CommonParams.ID;
+import static org.apache.solr.common.params.CommonParams.NAME;
+
+
+public class XSLTLoader extends ContentStreamLoader {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean();
+  static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
+  
+  public static final String CONTEXT_TRANSFORMER_KEY = "xsltupdater.transformer";
+
+  private static final String XSLT_CACHE_PARAM = "xsltCacheLifetimeSeconds"; 
+
+  public static final int XSLT_CACHE_DEFAULT = 60;
+  
+  int xsltCacheLifetimeSeconds;
+  XMLInputFactory inputFactory;
+  SAXParserFactory saxFactory;
+
+  @Override
+  public XSLTLoader init(SolrParams args) {
+    // Init StAX parser:
+    inputFactory = XMLInputFactory.newInstance();
+    EmptyEntityResolver.configureXMLInputFactory(inputFactory);
+    inputFactory.setXMLReporter(xmllog);
+    try {
+      // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
+      // XMLInputFactory, as that implementation tries to cache and reuse the
+      // XMLStreamReader.  Setting the parser-specific "reuse-instance" property to false
+      // prevents this.
+      // All other known open-source stax parsers (and the bea ref impl)
+      // have thread-safe factories.
+      inputFactory.setProperty("reuse-instance", Boolean.FALSE);
+    } catch (IllegalArgumentException ex) {
+      // Other implementations will likely throw this exception since "reuse-instance"
+      // isimplementation specific.
+      log.debug("Unable to set the 'reuse-instance' property for the input chain: {}", inputFactory);
+    }
+    
+    // Init SAX parser (for XSL):
+    saxFactory = SAXParserFactory.newInstance();
+    saxFactory.setNamespaceAware(true); // XSL needs this!
+    EmptyEntityResolver.configureSAXParserFactory(saxFactory);
+    
+    xsltCacheLifetimeSeconds = XSLT_CACHE_DEFAULT;
+    if(args != null) {
+      xsltCacheLifetimeSeconds = args.getInt(XSLT_CACHE_PARAM,XSLT_CACHE_DEFAULT);
+      log.debug("xsltCacheLifetimeSeconds={}", xsltCacheLifetimeSeconds);
+    }
+    return this;
+  }
+
+  @Override
+  public String getDefaultWT() {
+    return "xml";
+  }
+
+  @Override
+  public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception {
+    final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+    
+    InputStream is = null;
+    XMLStreamReader parser = null;
+
+    String tr = req.getParams().get(XSLTParams.TR,null);
+    if(tr!=null) {
+      if (req.getCore().getCoreDescriptor().isConfigSetTrusted() == false) {
+          throw new SolrException(ErrorCode.UNAUTHORIZED, "The configset for this collection was uploaded without any authentication in place,"
+                  + " and this operation is not available for collections with untrusted configsets. To use this feature, re-upload the configset"
+                  + " after enabling authentication and authorization.");
+      }
+
+      final Transformer t = getTransformer(tr,req);
+      final DOMResult result = new DOMResult();
+      
+      // first step: read XML and build DOM using Transformer (this is no overhead, as XSL always produces
+      // an internal result DOM tree, we just access it directly as input for StAX):
+      try {
+        is = stream.getStream();
+        final InputSource isrc = new InputSource(is);
+        isrc.setEncoding(charset);
+        final XMLReader xmlr = saxFactory.newSAXParser().getXMLReader();
+        xmlr.setErrorHandler(xmllog);
+        xmlr.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
+        final SAXSource source = new SAXSource(xmlr, isrc);
+        t.transform(source, result);
+      } catch(TransformerException te) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, te.getMessage(), te);
+      } finally {
+        IOUtils.closeQuietly(is);
+      }
+      // second step: feed the intermediate DOM tree into StAX parser:
+      try {
+        parser = inputFactory.createXMLStreamReader(new DOMSource(result.getNode()));

Review comment:
       *XXE_XMLSTREAMREADER:*  The XML parsing is vulnerable to XML External Entity attacks [(details)](https://find-sec-bugs.github.io/bugs.htm#XXE_XMLSTREAMREADER)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@lucene.apache.org
For additional commands, e-mail: issues-help@lucene.apache.org