You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ds...@apache.org on 2022/09/07 21:43:54 UTC

[solr] branch branch_9x updated: SOLR-16296: Use SafeXMLParsing in XmlConfigFile and QueryElevationComponent (#962)

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 2e55183c86f SOLR-16296: Use SafeXMLParsing in XmlConfigFile and QueryElevationComponent (#962)
2e55183c86f is described below

commit 2e55183c86ff6b3f562f887be64bc826708dbbd5
Author: Haythem <ha...@yahoo.fr>
AuthorDate: Wed Sep 7 18:27:32 2022 +0200

    SOLR-16296: Use SafeXMLParsing in XmlConfigFile and QueryElevationComponent (#962)
    
    Really a refactoring; no change in behavior or actual safety.
    
    Co-authored-by: David Smiley <ds...@apache.org>
---
 solr/CHANGES.txt                                   |  2 +
 .../java/org/apache/solr/core/XmlConfigFile.java   | 66 +++++-----------------
 .../handler/component/QueryElevationComponent.java | 24 ++++----
 .../java/org/apache/solr/util/SafeXMLParsing.java  | 35 +++++++++---
 4 files changed, 59 insertions(+), 68 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 00ae9c8cd1a..95fa6de2506 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -153,6 +153,8 @@ Other Changes
 
 * SOLR-16324: Upgrade commons-configuration2 to 2.8.0 and commons-text to 1.8 (Kevin Risden)
 
+* SOLR-16296: XmlConfigFile and QueryElevationComponent now use SafeXMLParsing. (Haythem Khiri)
+
 Build
 ---------------------
 * SOLR-16204: Change Lucene dependency to Lucene 9.1.0 (Elia Porciani via Alessandro Benedetti)
diff --git a/solr/core/src/java/org/apache/solr/core/XmlConfigFile.java b/solr/core/src/java/org/apache/solr/core/XmlConfigFile.java
index 1e88fa9bf38..080e2ee0bd6 100644
--- a/solr/core/src/java/org/apache/solr/core/XmlConfigFile.java
+++ b/solr/core/src/java/org/apache/solr/core/XmlConfigFile.java
@@ -22,6 +22,7 @@ import java.lang.invoke.MethodHandles;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Properties;
 import java.util.Set;
 import java.util.SortedMap;
@@ -30,18 +31,14 @@ import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.function.Function;
 import javax.xml.namespace.QName;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpressionException;
 import javax.xml.xpath.XPathFactory;
-import org.apache.commons.io.IOUtils;
 import org.apache.solr.cloud.ZkSolrResourceLoader;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.DOMUtil;
-import org.apache.solr.common.util.XMLErrorLogger;
+import org.apache.solr.util.SafeXMLParsing;
 import org.apache.solr.util.SystemIdResolver;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -59,37 +56,23 @@ import org.xml.sax.SAXException;
  */
 public class XmlConfigFile { // formerly simply "Config"
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  private static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
 
   static final XPathFactory xpathFactory = XPathFactory.newInstance();
 
   private final Document doc;
-  private final Document origDoc; // with unsubstituted properties
   private final String prefix;
   private final String name;
   private final SolrResourceLoader loader;
   private final Properties substituteProperties;
   private int zkVersion = -1;
 
-  /** Builds a config from a resource name with no xpath prefix. Does no property substitution. */
-  public XmlConfigFile(SolrResourceLoader loader, String name)
-      throws ParserConfigurationException, IOException, SAXException {
-    this(loader, name, null, null);
-  }
-
-  /** Builds a config. Does no property substitution. */
-  public XmlConfigFile(SolrResourceLoader loader, String name, InputSource is, String prefix)
-      throws ParserConfigurationException, IOException, SAXException {
-    this(loader, name, is, prefix, null);
-  }
-
   public XmlConfigFile(
       SolrResourceLoader loader,
       String name,
       InputSource is,
       String prefix,
       Properties substituteProps)
-      throws ParserConfigurationException, IOException, SAXException {
+      throws IOException {
     this(
         loader,
         s -> {
@@ -106,7 +89,7 @@ public class XmlConfigFile { // formerly simply "Config"
   }
 
   /**
-   * Builds a config:
+   * Builds a config.
    *
    * <p>Note that the 'name' parameter is used to obtain a valid input stream if no valid one is
    * provided through 'is'. If no valid stream is provided, a valid SolrResourceLoader instance
@@ -131,16 +114,13 @@ public class XmlConfigFile { // formerly simply "Config"
       String prefix,
       Properties substituteProps)
       throws IOException {
-    if (null == loader) throw new NullPointerException("loader");
-    this.loader = loader;
-
+    this.loader = Objects.requireNonNull(loader);
     this.substituteProperties = substituteProps;
     this.name = name;
     this.prefix = (prefix != null && !prefix.endsWith("/")) ? prefix + '/' : prefix;
-    try {
-      javax.xml.parsers.DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
 
-      if (is == null) {
+    try {
+      if (is == null && fileSupplier != null) {
         InputStream in = fileSupplier.apply(name);
         if (in instanceof ZkSolrResourceLoader.ZkByteArrayInputStream) {
           zkVersion = ((ZkSolrResourceLoader.ZkByteArrayInputStream) in).getStat().getVersion();
@@ -150,34 +130,18 @@ public class XmlConfigFile { // formerly simply "Config"
         is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(name));
       }
 
-      // only enable xinclude, if a SystemId is available
-      if (is.getSystemId() != null) {
-        try {
-          dbf.setXIncludeAware(true);
-          dbf.setNamespaceAware(true);
-        } catch (UnsupportedOperationException e) {
-          log.warn("{} XML parser doesn't support XInclude option", name);
-        }
-      }
-
-      final DocumentBuilder db = dbf.newDocumentBuilder();
-      db.setEntityResolver(new SystemIdResolver(loader));
-      db.setErrorHandler(xmllog);
-      try {
-        doc = db.parse(is);
-        origDoc = doc;
-      } finally {
-        // some XML parsers are broken and don't close the byte stream (but they should according to
-        // spec)
-        IOUtils.closeQuietly(is.getByteStream());
-      }
-      if (substituteProps != null) {
-        DOMUtil.substituteProperties(doc, getSubstituteProperties());
+      if (is != null) {
+        doc = SafeXMLParsing.parseConfigXML(log, loader, is);
+      } else {
+        doc = SafeXMLParsing.parseConfigXML(log, loader, name);
       }
-    } catch (ParserConfigurationException | SAXException e) {
+    } catch (SAXException e) {
       SolrException.log(log, "Exception during parsing file: " + name, e);
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
     }
+    if (substituteProps != null) {
+      DOMUtil.substituteProperties(doc, getSubstituteProperties());
+    }
   }
 
   /*
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
index 6caf39f2bd8..711dd4d0cb8 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
@@ -44,6 +44,7 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.Queue;
 import java.util.Set;
 import java.util.SortedSet;
@@ -79,7 +80,6 @@ import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrResourceNotFoundException;
-import org.apache.solr.core.XmlConfigFile;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.transform.ElevatedMarkerFactory;
 import org.apache.solr.response.transform.ExcludedMarkerFactory;
@@ -90,11 +90,14 @@ import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.search.SortSpec;
 import org.apache.solr.search.grouping.GroupingSpecification;
 import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.SafeXMLParsing;
 import org.apache.solr.util.plugin.SolrCoreAware;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
 
 /**
  * A component to elevate some documents to the top of the result set.
@@ -376,10 +379,10 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
    *
    * @return The loaded {@link ElevationProvider}; not null.
    */
-  private ElevationProvider loadElevationProvider(SolrCore core) throws Exception {
-    XmlConfigFile cfg;
+  private ElevationProvider loadElevationProvider(SolrCore core) throws IOException, SAXException {
+    Document xmlDocument;
     try {
-      cfg = new XmlConfigFile(core.getResourceLoader(), configFileName);
+      xmlDocument = SafeXMLParsing.parseConfigXML(log, core.getResourceLoader(), configFileName);
     } catch (SolrResourceNotFoundException e) {
       String msg = "Missing config file \"" + configFileName + "\"";
       if (Files.exists(Path.of(core.getDataDir(), configFileName))) {
@@ -402,9 +405,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
       }
       throw e;
     }
-    ElevationProvider elevationProvider = loadElevationProvider(cfg);
-    assert elevationProvider != null;
-    return elevationProvider;
+    return Objects.requireNonNull(loadElevationProvider(xmlDocument));
   }
 
   /**
@@ -413,17 +414,20 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
    * @throws RuntimeException If the config does not provide an XML content of the expected format
    *     (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}).
    */
-  protected ElevationProvider loadElevationProvider(XmlConfigFile config) {
+  protected ElevationProvider loadElevationProvider(Document doc) {
+    if (!doc.getDocumentElement().getNodeName().equals("elevate")) {
+      throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST, "Root element must be <elevate>");
+    }
     Map<ElevatingQuery, ElevationBuilder> elevationBuilderMap = new LinkedHashMap<>();
+    NodeList nodes = doc.getDocumentElement().getElementsByTagName("query");
     XPath xpath = XPathFactory.newInstance().newXPath();
-    NodeList nodes = (NodeList) config.evaluate("elevate/query", XPathConstants.NODESET);
     for (int i = 0; i < nodes.getLength(); i++) {
       Node node = nodes.item(i);
       String queryString = DOMUtil.getAttr(node, "text", "missing query 'text'");
       String matchString = DOMUtil.getAttr(node, "match");
       ElevatingQuery elevatingQuery =
           new ElevatingQuery(queryString, isSubsetMatchPolicy(matchString));
-
       NodeList children;
       try {
         children = (NodeList) xpath.evaluate("doc", node, XPathConstants.NODESET);
diff --git a/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java b/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java
index d9bce120a06..783fa038ee8 100644
--- a/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java
+++ b/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
+import java.util.Objects;
 import javax.xml.XMLConstants;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -51,36 +52,56 @@ public final class SafeXMLParsing {
   private SafeXMLParsing() {}
 
   /**
-   * Parses a config file from ResourceLoader. Xinclude and external entities are enabled, but
-   * cannot escape the resource loader.
+   * Parses a config file from a Solr config based on ResourceLoader. Xinclude and external entities
+   * are enabled, but cannot escape the resource loader.
    */
   public static Document parseConfigXML(Logger log, ResourceLoader loader, String file)
       throws SAXException, IOException {
     try (InputStream in = loader.openResource(file)) {
+      DocumentBuilder db = configDocumentBuilder(loader, log);
+      return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file));
+    }
+  }
+
+  /**
+   * Parses a config file from a Solr config based on InputSource. Xinclude and external entities
+   * are enabled, but cannot escape the resource loader.
+   */
+  public static Document parseConfigXML(Logger log, ResourceLoader loader, InputSource is)
+      throws SAXException, IOException {
+    return configDocumentBuilder(loader, log).parse(is);
+  }
+
+  private static DocumentBuilder configDocumentBuilder(ResourceLoader loader, Logger log) {
+    try {
+      Objects.requireNonNull(loader);
       final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
-      dbf.setValidating(false);
       dbf.setNamespaceAware(true);
       trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true);
+      // only enable xinclude, if systemId is available. this assumes it is the case as loader
+      // provides one.
       try {
         dbf.setXIncludeAware(true);
       } catch (UnsupportedOperationException e) {
         throw new SolrException(
             SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't support XInclude option", e);
       }
-
       final DocumentBuilder db = dbf.newDocumentBuilder();
       db.setEntityResolver(new SystemIdResolver(loader));
       db.setErrorHandler(new XMLErrorLogger(log));
-      return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file));
+      return db;
     } catch (ParserConfigurationException pce) {
       throw new SolrException(
           SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be configured", pce);
+    } catch (UnsupportedOperationException e) {
+      throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't support XInclude option", e);
     }
   }
 
   /**
-   * Parses the given InputStream as XML, disabling any external entities with secure processing
-   * enabled. The given InputStream is not closed.
+   * Parses non-Solr configs as XML based on the given InputStream, disabling any external entities
+   * with secure processing enabled. The given InputStream is not closed.
    */
   public static Document parseUntrustedXML(Logger log, InputStream in)
       throws SAXException, IOException {