You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ge...@apache.org on 2020/05/22 14:08:38 UTC

[lucene-solr] branch master updated: SOLR-14443: Make SolrLogPostTool resilient to odd requests (#1525)

This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 78f4a5b  SOLR-14443: Make SolrLogPostTool resilient to odd requests (#1525)
78f4a5b is described below

commit 78f4a5b8ff854861ac6ad17c27016e222463e54c
Author: Jason Gerlowski <ge...@apache.org>
AuthorDate: Fri May 22 10:08:26 2020 -0400

    SOLR-14443: Make SolrLogPostTool resilient to odd requests (#1525)
---
 .../java/org/apache/solr/util/SolrLogPostTool.java | 182 +++++++++++----------
 .../org/apache/solr/util/SolrLogPostToolTest.java  |  18 ++
 2 files changed, 114 insertions(+), 86 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java b/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
index a1b67dd..4e20f2c 100644
--- a/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
+++ b/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
@@ -27,6 +27,8 @@ import java.util.TreeMap;
 import java.util.UUID;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
+
+import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.request.UpdateRequest;
@@ -96,13 +98,11 @@ public class SolrLogPostTool {
 
             rec++;
             UUID id = UUID.randomUUID();
-            doc.addField("id", id.toString());
-            doc.addField("file_s", fileName);
+            doc.setField("id", id.toString());
+            doc.setField("file_s", fileName);
             request.add(doc);
             if (rec == 300) {
-              CLIO.out("Sending batch of 300 log records...");
-              request.process(client);
-              CLIO.out("Batch sent");
+              sendBatch(client, request, false /* normal batch */);
               request = new UpdateRequest();
               rec = 0;
             }
@@ -113,17 +113,35 @@ public class SolrLogPostTool {
       }
 
       if (rec > 0) {
-        //Process last batch
-        CLIO.out("Sending last batch ...");
-        request.process(client);
-        client.commit();
-        CLIO.out("Committed");
+        sendBatch(client, request, true /* last batch */);
       }
     } finally {
       client.close();
     }
   }
 
+  private static void sendBatch(SolrClient client, UpdateRequest request, boolean lastRequest) throws SolrServerException, IOException {
+    final String beginMessage = lastRequest ? "Sending last batch ..." : "Sending batch of 300 log records...";
+    CLIO.out(beginMessage);
+    try {
+      request.process(client);
+      CLIO.out("Batch sent");
+    } catch (Exception e) {
+      CLIO.err("Batch sending failed: " + e.getMessage());
+      e.printStackTrace(CLIO.getErrStream());
+    }
+
+    if (lastRequest) {
+      try {
+        client.commit();
+        CLIO.out("Committed");
+      } catch (Exception e) {
+        CLIO.err("Unable to commit documents: " + e.getMessage());
+        e.printStackTrace(CLIO.getErrStream());
+      }
+    }
+  }
+
   static void gatherFiles(File rootFile, List<File> files) {
 
     if(rootFile.isFile()) {
@@ -228,50 +246,48 @@ public class SolrLogPostTool {
       return null;
     }
 
+    private void setFieldIfUnset(SolrInputDocument doc, String fieldName, String fieldValue) {
+      if (doc.containsKey(fieldName)) return;
+
+      doc.setField(fieldName, fieldValue);
+    }
+
     private SolrInputDocument parseError(String line, String trace) throws IOException {
       SolrInputDocument doc = new SolrInputDocument();
-      doc.addField("date_dt", parseDate(line));
-      doc.addField("type_s", "error");
-      doc.addField("line_t", line);
+      doc.setField("date_dt", parseDate(line));
+      doc.setField("type_s", "error");
+      doc.setField("line_t", line);
 
       //Don't include traces that have only the %html header.
       if(trace != null && trace.length() > 6) {
-        doc.addField("stack_t", trace);
+        doc.setField("stack_t", trace);
       }
 
       if(this.cause != null) {
-        doc.addField("root_cause_t", cause.replace("Caused by:", "").trim());
+        doc.setField("root_cause_t", cause.replace("Caused by:", "").trim());
       }
 
-      doc.addField("collection_s", parseCollection(line));
-      doc.addField("core_s", parseCore(line));
-      doc.addField("shard_s", parseShard(line));
-      doc.addField("replica_s", parseReplica(line));
+      doc.setField("collection_s", parseCollection(line));
+      doc.setField("core_s", parseCore(line));
+      doc.setField("shard_s", parseShard(line));
+      doc.setField("replica_s", parseReplica(line));
 
       return doc;
     }
 
     private SolrInputDocument parseCommit(String line) throws IOException {
       SolrInputDocument doc = new SolrInputDocument();
-      doc.addField("date_dt", parseDate(line));
-      doc.addField("type_s", "commit");
-      doc.addField("line_t", line);
-      if(line.contains("softCommit=true")) {
-        doc.addField("soft_commit_s", "true");
-      } else {
-        doc.addField("soft_commit_s", "false");
-      }
+      doc.setField("date_dt", parseDate(line));
+      doc.setField("type_s", "commit");
+      doc.setField("line_t", line);
+      doc.setField("soft_commit_s", Boolean.toString(line.contains("softCommit=true")));
 
-      if(line.contains("openSearcher=true")) {
-        doc.addField("open_searcher_s", "true");
-      } else {
-        doc.addField("open_searcher_s", "false");
-      }
+      doc.setField("open_searcher_s", Boolean.toString(line.contains("openSearcher=true")));
 
-      doc.addField("collection_s", parseCollection(line));
-      doc.addField("core_s", parseCore(line));
-      doc.addField("shard_s", parseShard(line));
-      doc.addField("replica_s", parseReplica(line));
+      doc.setField("collection_s", parseCollection(line));
+      doc.setField("core_s", parseCore(line));
+      doc.setField("shard_s", parseShard(line));
+      doc.setField("replica_s", parseReplica(line));
 
       return doc;
     }
@@ -279,36 +295,36 @@ public class SolrLogPostTool {
     private SolrInputDocument parseQueryRecord(String line) {
 
       SolrInputDocument doc = new SolrInputDocument();
-      doc.addField("date_dt", parseDate(line));
-      doc.addField("qtime_i", parseQTime(line));
-      doc.addField("status_s", parseStatus(line));
+      doc.setField("date_dt", parseDate(line));
+      doc.setField("qtime_i", parseQTime(line));
+      doc.setField("status_s", parseStatus(line));
 
       String path = parsePath(line);
-      doc.addField("path_s", path);
+      doc.setField("path_s", path);
 
       if(line.contains("hits=")) {
-        doc.addField("hits_l", parseHits(line));
+        doc.setField("hits_l", parseHits(line));
       }
 
       String params = parseParams(line);
-      doc.addField("params_t", params);
+      doc.setField("params_t", params);
       addParams(doc, params);
 
-      doc.addField("collection_s", parseCollection(line));
-      doc.addField("core_s", parseCore(line));
-      doc.addField("node_s", parseNode(line));
-      doc.addField("shard_s", parseShard(line));
-      doc.addField("replica_s", parseReplica(line));
+      doc.setField("collection_s", parseCollection(line));
+      doc.setField("core_s", parseCore(line));
+      doc.setField("node_s", parseNode(line));
+      doc.setField("shard_s", parseShard(line));
+      doc.setField("replica_s", parseReplica(line));
 
 
       if(path != null && path.contains("/admin")) {
-        doc.addField("type_s", "admin");
+        doc.setField("type_s", "admin");
       } else if(path != null && params.contains("/replication")) {
-        doc.addField("type_s", "replication");
+        doc.setField("type_s", "replication");
       } else if (path != null && path.contains("/get")) {
-        doc.addField("type_s", "get");
+        doc.setField("type_s", "get");
       } else {
-        doc.addField("type_s", "query");
+        doc.setField("type_s", "query");
       }
 
       return doc;
@@ -318,10 +334,10 @@ public class SolrLogPostTool {
     private SolrInputDocument parseNewSearch(String line) {
 
       SolrInputDocument doc = new SolrInputDocument();
-      doc.addField("date_dt", parseDate(line));
-      doc.addField("core_s", parseNewSearcherCore(line));
-      doc.addField("type_s", "newSearcher");
-      doc.addField("line_t", line);
+      doc.setField("date_dt", parseDate(line));
+      doc.setField("core_s", parseNewSearcherCore(line));
+      doc.setField("type_s", "newSearcher");
+      doc.setField("line_t", line);
 
       return doc;
     }
@@ -338,21 +354,21 @@ public class SolrLogPostTool {
 
     private SolrInputDocument parseUpdate(String line) {
       SolrInputDocument doc = new SolrInputDocument();
-      doc.addField("date_dt", parseDate(line));
+      doc.setField("date_dt", parseDate(line));
 
       if(line.contains("deleteByQuery=")) {
-        doc.addField("type_s", "deleteByQuery");
+        doc.setField("type_s", "deleteByQuery");
       } else if(line.contains("delete=")) {
-        doc.addField("type_s", "delete");
+        doc.setField("type_s", "delete");
       } else {
-        doc.addField("type_s", "update");
+        doc.setField("type_s", "update");
       }
 
-      doc.addField("collection_s", parseCollection(line));
-      doc.addField("core_s", parseCore(line));
-      doc.addField("shard_s", parseShard(line));
-      doc.addField("replica_s", parseReplica(line));
-      doc.addField("line_t", line);
+      doc.setField("collection_s", parseCollection(line));
+      doc.setField("core_s", parseCore(line));
+      doc.setField("shard_s", parseShard(line));
+      doc.setField("replica_s", parseReplica(line));
+      doc.setField("line_t", line);
 
       return doc;
     }
@@ -474,47 +490,51 @@ public class SolrLogPostTool {
       return builder.toString();
     }
 
+    private void addOrReplaceFieldValue(SolrInputDocument doc, String fieldName, String fieldValue) {
+      doc.setField(fieldName, fieldValue);
+    }
+
     private void addParams(SolrInputDocument doc,  String params) {
       String[] pairs = params.split("&");
       for(String pair : pairs) {
         String[] parts = pair.split("=");
         if(parts.length == 2 && parts[0].equals("q")) {
           String dq = URLDecoder.decode(parts[1], Charset.defaultCharset());
-          doc.addField("q_s", dq);
-          doc.addField("q_t", dq);
+          setFieldIfUnset(doc, "q_s", dq);
+          setFieldIfUnset(doc, "q_t", dq);
         }
 
         if(parts[0].equals("rows")) {
           String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
-          doc.addField("rows_i", dr);
+          setFieldIfUnset(doc, "rows_i", dr);
         }
 
         if(parts[0].equals("distrib")) {
           String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
-          doc.addField("distrib_s", dr);
+          setFieldIfUnset(doc, "distrib_s", dr);
         }
 
         if(parts[0].equals("shards")) {
-          doc.addField("shards_s", "true");
+          setFieldIfUnset(doc, "shards_s", "true");
         }
 
         if(parts[0].equals("ids") && !isRTGRequest(doc)) {
-          doc.addField("ids_s", "true");
+          setFieldIfUnset(doc, "ids_s", "true");
         }
 
         if(parts[0].equals("isShard")) {
           String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
-          doc.addField("isShard_s", dr);
+          setFieldIfUnset(doc, "isShard_s", dr);
         }
 
         if(parts[0].equals("wt")) {
           String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
-          doc.addField("wt_s", dr);
+          setFieldIfUnset(doc, "wt_s", dr);
         }
 
         if(parts[0].equals("facet")) {
           String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
-          doc.addField("facet_s", dr);
+          setFieldIfUnset(doc, "facet_s", dr);
         }
 
         if(parts[0].equals("shards.purpose")) {
@@ -533,19 +553,9 @@ public class SolrLogPostTool {
       //Special params used to determine what stage a query is.
       //So we populate with defaults.
       //The absence of the distrib params means its a distributed query.
-
-
-      if(doc.getField("distrib_s") == null) {
-        doc.addField("distrib_s", "true");
-      }
-
-      if(doc.getField("shards_s") == null) {
-        doc.addField("shards_s", "false");
-      }
-
-      if(doc.getField("ids_s") == null) {
-        doc.addField("ids_s", "false");
-      }
+      setFieldIfUnset(doc, "distrib_s", "true");
+      setFieldIfUnset(doc, "shards_s", "false");
+      setFieldIfUnset(doc, "ids_s", "false");
     }
 
     private boolean isRTGRequest(SolrInputDocument doc) {
diff --git a/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java b/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java
index 10f7b8c..c65f5f1 100644
--- a/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java
+++ b/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java
@@ -75,6 +75,24 @@ public class SolrLogPostToolTest extends SolrTestCaseJ4 {
     assertEquals("REFINE_FACETS", purposes[1].toString());
   }
 
+  // Requests which have multiple copies of the same param should be parsed so that the first param value only is
+  // indexed, since the log schema expects many of these to be single-valued fields and will throw errors if multiple
+  // values are received.
+  @Test
+  public void testRecordsFirstInstanceOfSingleValuedParams() throws Exception {
+    final String record = "2019-12-09 15:05:01.931 INFO  (qtp2103763750-21) [c:logs4 s:shard1 r:core_node2 x:logs4_shard1_replica_n1] o.a.s.c.S.Request [logs4_shard1_replica_n1]  webapp=/solr path=/select params={q=*:*&q=inStock:true&_=1575835181759&shards.purpose=36&isShard=true&wt=javabin&wt=xml&distrib=false} hits=234868 status=0 QTime=8\n";
+
+    List<SolrInputDocument> docs = readDocs(record);
+    assertEquals(docs.size(), 1);
+    SolrInputDocument doc = docs.get(0);
+
+    assertEquals(doc.getFieldValues("q_s").size(), 1);
+    assertEquals(doc.getFieldValue("q_s"), "*:*");
+
+    assertEquals(doc.getFieldValues("wt_s").size(), 1);
+    assertEquals(doc.getFieldValue("wt_s"), "javabin");
+  }
+
   @Test
   public void testRTGRecord() throws Exception {
     final String record = "2020-03-19 20:00:30.845 INFO  (qtp1635378213-20354) [c:logs4 s:shard8 r:core_node63 x:logs4_shard8_replica_n60] o.a.s.c.S.Request [logs4_shard8_replica_n60]  webapp=/solr path=/get params={qt=/get&_stateVer_=logs4:104&ids=id1&ids=id2&ids=id3&wt=javabin&version=2} status=0 QTime=61";