You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@lucene.apache.org by GitBox <gi...@apache.org> on 2021/02/06 19:09:50 UTC

[GitHub] [lucene-solr] TomMD commented on a change in pull request #2306: SOLR-15121: Move XSLT (tr param) to scripting contrib

TomMD commented on a change in pull request #2306:
URL: https://github.com/apache/lucene-solr/pull/2306#discussion_r571474014



##########
File path: solr/contrib/scripting/src/java/org/apache/solr/scripting/xslt/XSLTLoader.java
##########
@@ -0,0 +1,534 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.scripting.xslt;
+
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.dom.DOMResult;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.sax.SAXSource;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.io.IOUtils;
+import org.apache.solr.common.EmptyEntityResolver;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.UpdateParams;
+import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.common.util.XMLErrorLogger;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.handler.RequestHandlerUtils;
+import org.apache.solr.handler.UpdateRequestHandler;
+import org.apache.solr.handler.loader.ContentStreamLoader;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.CommitUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+import org.apache.solr.update.RollbackUpdateCommand;
+import org.apache.solr.update.processor.UpdateRequestProcessor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+
+import static org.apache.solr.common.params.CommonParams.ID;
+import static org.apache.solr.common.params.CommonParams.NAME;
+
+
+public class XSLTLoader extends ContentStreamLoader {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean();
+  static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
+  
+  public static final String CONTEXT_TRANSFORMER_KEY = "xsltupdater.transformer";
+
+  private static final String XSLT_CACHE_PARAM = "xsltCacheLifetimeSeconds"; 
+
+  public static final int XSLT_CACHE_DEFAULT = 60;
+  
+  int xsltCacheLifetimeSeconds;
+  XMLInputFactory inputFactory;
+  SAXParserFactory saxFactory;
+
+  @Override
+  public XSLTLoader init(SolrParams args) {
+    // Init StAX parser:
+    inputFactory = XMLInputFactory.newInstance();
+    EmptyEntityResolver.configureXMLInputFactory(inputFactory);
+    inputFactory.setXMLReporter(xmllog);
+    try {
+      // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
+      // XMLInputFactory, as that implementation tries to cache and reuse the
+      // XMLStreamReader.  Setting the parser-specific "reuse-instance" property to false
+      // prevents this.
+      // All other known open-source stax parsers (and the bea ref impl)
+      // have thread-safe factories.
+      inputFactory.setProperty("reuse-instance", Boolean.FALSE);
+    } catch (IllegalArgumentException ex) {
+      // Other implementations will likely throw this exception since "reuse-instance"
+      // isimplementation specific.
+      log.debug("Unable to set the 'reuse-instance' property for the input chain: {}", inputFactory);
+    }
+    
+    // Init SAX parser (for XSL):
+    saxFactory = SAXParserFactory.newInstance();
+    saxFactory.setNamespaceAware(true); // XSL needs this!
+    EmptyEntityResolver.configureSAXParserFactory(saxFactory);
+    
+    xsltCacheLifetimeSeconds = XSLT_CACHE_DEFAULT;
+    if(args != null) {
+      xsltCacheLifetimeSeconds = args.getInt(XSLT_CACHE_PARAM,XSLT_CACHE_DEFAULT);
+      log.debug("xsltCacheLifetimeSeconds={}", xsltCacheLifetimeSeconds);
+    }
+    return this;
+  }
+
+  @Override
+  public String getDefaultWT() {
+    return "xml";
+  }
+
+  @Override
+  public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception {
+    final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+    
+    InputStream is = null;
+    XMLStreamReader parser = null;
+
+    String tr = req.getParams().get(XSLTParams.TR,null);
+    if(tr!=null) {
+      if (req.getCore().getCoreDescriptor().isConfigSetTrusted() == false) {
+          throw new SolrException(ErrorCode.UNAUTHORIZED, "The configset for this collection was uploaded without any authentication in place,"
+                  + " and this operation is not available for collections with untrusted configsets. To use this feature, re-upload the configset"
+                  + " after enabling authentication and authorization.");
+      }
+
+      final Transformer t = getTransformer(tr,req);
+      final DOMResult result = new DOMResult();
+      
+      // first step: read XML and build DOM using Transformer (this is no overhead, as XSL always produces
+      // an internal result DOM tree, we just access it directly as input for StAX):
+      try {
+        is = stream.getStream();
+        final InputSource isrc = new InputSource(is);
+        isrc.setEncoding(charset);
+        final XMLReader xmlr = saxFactory.newSAXParser().getXMLReader();
+        xmlr.setErrorHandler(xmllog);
+        xmlr.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
+        final SAXSource source = new SAXSource(xmlr, isrc);
+        t.transform(source, result);
+      } catch(TransformerException te) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, te.getMessage(), te);
+      } finally {
+        IOUtils.closeQuietly(is);
+      }
+      // second step: feed the intermediate DOM tree into StAX parser:
+      try {
+        parser = inputFactory.createXMLStreamReader(new DOMSource(result.getNode()));

Review comment:
       Oof, yeah @uschindler we feel your pain and are thinking about how to solve the problem.
   
   The bot is a bit like a dog with a bone. Once it finds and issue you can be sure any commit that "moves" the issue in a way that changes the identifier (ex changing the function name) will make it appear again as a "new" issue.  We are thinking on how to make better stable names so the above dismissal would also cover this case.
   
   A new `ignore bug` command is available if you comment with only `ignore bug` then muse will consider the issue resolved for the purpose of the below status bar and we'll use this data in the ML.  At this time I don't think github has an API to automatically resolve the comment but that is on our minds too.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@lucene.apache.org
For additional commands, e-mail: issues-help@lucene.apache.org