You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ge...@apache.org on 2020/05/22 14:08:38 UTC
[lucene-solr] branch master updated: SOLR-14443: Make
SolrLogPostTool resilient to odd requests (#1525)
This is an automated email from the ASF dual-hosted git repository.
gerlowskija pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 78f4a5b SOLR-14443: Make SolrLogPostTool resilient to odd requests (#1525)
78f4a5b is described below
commit 78f4a5b8ff854861ac6ad17c27016e222463e54c
Author: Jason Gerlowski <ge...@apache.org>
AuthorDate: Fri May 22 10:08:26 2020 -0400
SOLR-14443: Make SolrLogPostTool resilient to odd requests (#1525)
---
.../java/org/apache/solr/util/SolrLogPostTool.java | 182 +++++++++++----------
.../org/apache/solr/util/SolrLogPostToolTest.java | 18 ++
2 files changed, 114 insertions(+), 86 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java b/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
index a1b67dd..4e20f2c 100644
--- a/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
+++ b/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
@@ -27,6 +27,8 @@ import java.util.TreeMap;
import java.util.UUID;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
+
+import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.request.UpdateRequest;
@@ -96,13 +98,11 @@ public class SolrLogPostTool {
rec++;
UUID id = UUID.randomUUID();
- doc.addField("id", id.toString());
- doc.addField("file_s", fileName);
+ doc.setField("id", id.toString());
+ doc.setField("file_s", fileName);
request.add(doc);
if (rec == 300) {
- CLIO.out("Sending batch of 300 log records...");
- request.process(client);
- CLIO.out("Batch sent");
+ sendBatch(client, request, false /* normal batch */);
request = new UpdateRequest();
rec = 0;
}
@@ -113,17 +113,35 @@ public class SolrLogPostTool {
}
if (rec > 0) {
- //Process last batch
- CLIO.out("Sending last batch ...");
- request.process(client);
- client.commit();
- CLIO.out("Committed");
+ sendBatch(client, request, true /* last batch */);
}
} finally {
client.close();
}
}
+ private static void sendBatch(SolrClient client, UpdateRequest request, boolean lastRequest) throws SolrServerException, IOException {
+ final String beginMessage = lastRequest ? "Sending last batch ..." : "Sending batch of 300 log records...";
+ CLIO.out(beginMessage);
+ try {
+ request.process(client);
+ CLIO.out("Batch sent");
+ } catch (Exception e) {
+ CLIO.err("Batch sending failed: " + e.getMessage());
+ e.printStackTrace(CLIO.getErrStream());
+ }
+
+ if (lastRequest) {
+ try {
+ client.commit();
+ CLIO.out("Committed");
+ } catch (Exception e) {
+ CLIO.err("Unable to commit documents: " + e.getMessage());
+ e.printStackTrace(CLIO.getErrStream());
+ }
+ }
+ }
+
static void gatherFiles(File rootFile, List<File> files) {
if(rootFile.isFile()) {
@@ -228,50 +246,48 @@ public class SolrLogPostTool {
return null;
}
+ private void setFieldIfUnset(SolrInputDocument doc, String fieldName, String fieldValue) {
+ if (doc.containsKey(fieldName)) return;
+
+ doc.setField(fieldName, fieldValue);
+ }
+
private SolrInputDocument parseError(String line, String trace) throws IOException {
SolrInputDocument doc = new SolrInputDocument();
- doc.addField("date_dt", parseDate(line));
- doc.addField("type_s", "error");
- doc.addField("line_t", line);
+ doc.setField("date_dt", parseDate(line));
+ doc.setField("type_s", "error");
+ doc.setField("line_t", line);
//Don't include traces that have only the %html header.
if(trace != null && trace.length() > 6) {
- doc.addField("stack_t", trace);
+ doc.setField("stack_t", trace);
}
if(this.cause != null) {
- doc.addField("root_cause_t", cause.replace("Caused by:", "").trim());
+ doc.setField("root_cause_t", cause.replace("Caused by:", "").trim());
}
- doc.addField("collection_s", parseCollection(line));
- doc.addField("core_s", parseCore(line));
- doc.addField("shard_s", parseShard(line));
- doc.addField("replica_s", parseReplica(line));
+ doc.setField("collection_s", parseCollection(line));
+ doc.setField("core_s", parseCore(line));
+ doc.setField("shard_s", parseShard(line));
+ doc.setField("replica_s", parseReplica(line));
return doc;
}
private SolrInputDocument parseCommit(String line) throws IOException {
SolrInputDocument doc = new SolrInputDocument();
- doc.addField("date_dt", parseDate(line));
- doc.addField("type_s", "commit");
- doc.addField("line_t", line);
- if(line.contains("softCommit=true")) {
- doc.addField("soft_commit_s", "true");
- } else {
- doc.addField("soft_commit_s", "false");
- }
+ doc.setField("date_dt", parseDate(line));
+ doc.setField("type_s", "commit");
+ doc.setField("line_t", line);
+ doc.setField("soft_commit_s", Boolean.toString(line.contains("softCommit=true")));
- if(line.contains("openSearcher=true")) {
- doc.addField("open_searcher_s", "true");
- } else {
- doc.addField("open_searcher_s", "false");
- }
+ doc.setField("open_searcher_s", Boolean.toString(line.contains("openSearcher=true")));
- doc.addField("collection_s", parseCollection(line));
- doc.addField("core_s", parseCore(line));
- doc.addField("shard_s", parseShard(line));
- doc.addField("replica_s", parseReplica(line));
+ doc.setField("collection_s", parseCollection(line));
+ doc.setField("core_s", parseCore(line));
+ doc.setField("shard_s", parseShard(line));
+ doc.setField("replica_s", parseReplica(line));
return doc;
}
@@ -279,36 +295,36 @@ public class SolrLogPostTool {
private SolrInputDocument parseQueryRecord(String line) {
SolrInputDocument doc = new SolrInputDocument();
- doc.addField("date_dt", parseDate(line));
- doc.addField("qtime_i", parseQTime(line));
- doc.addField("status_s", parseStatus(line));
+ doc.setField("date_dt", parseDate(line));
+ doc.setField("qtime_i", parseQTime(line));
+ doc.setField("status_s", parseStatus(line));
String path = parsePath(line);
- doc.addField("path_s", path);
+ doc.setField("path_s", path);
if(line.contains("hits=")) {
- doc.addField("hits_l", parseHits(line));
+ doc.setField("hits_l", parseHits(line));
}
String params = parseParams(line);
- doc.addField("params_t", params);
+ doc.setField("params_t", params);
addParams(doc, params);
- doc.addField("collection_s", parseCollection(line));
- doc.addField("core_s", parseCore(line));
- doc.addField("node_s", parseNode(line));
- doc.addField("shard_s", parseShard(line));
- doc.addField("replica_s", parseReplica(line));
+ doc.setField("collection_s", parseCollection(line));
+ doc.setField("core_s", parseCore(line));
+ doc.setField("node_s", parseNode(line));
+ doc.setField("shard_s", parseShard(line));
+ doc.setField("replica_s", parseReplica(line));
if(path != null && path.contains("/admin")) {
- doc.addField("type_s", "admin");
+ doc.setField("type_s", "admin");
} else if(path != null && params.contains("/replication")) {
- doc.addField("type_s", "replication");
+ doc.setField("type_s", "replication");
} else if (path != null && path.contains("/get")) {
- doc.addField("type_s", "get");
+ doc.setField("type_s", "get");
} else {
- doc.addField("type_s", "query");
+ doc.setField("type_s", "query");
}
return doc;
@@ -318,10 +334,10 @@ public class SolrLogPostTool {
private SolrInputDocument parseNewSearch(String line) {
SolrInputDocument doc = new SolrInputDocument();
- doc.addField("date_dt", parseDate(line));
- doc.addField("core_s", parseNewSearcherCore(line));
- doc.addField("type_s", "newSearcher");
- doc.addField("line_t", line);
+ doc.setField("date_dt", parseDate(line));
+ doc.setField("core_s", parseNewSearcherCore(line));
+ doc.setField("type_s", "newSearcher");
+ doc.setField("line_t", line);
return doc;
}
@@ -338,21 +354,21 @@ public class SolrLogPostTool {
private SolrInputDocument parseUpdate(String line) {
SolrInputDocument doc = new SolrInputDocument();
- doc.addField("date_dt", parseDate(line));
+ doc.setField("date_dt", parseDate(line));
if(line.contains("deleteByQuery=")) {
- doc.addField("type_s", "deleteByQuery");
+ doc.setField("type_s", "deleteByQuery");
} else if(line.contains("delete=")) {
- doc.addField("type_s", "delete");
+ doc.setField("type_s", "delete");
} else {
- doc.addField("type_s", "update");
+ doc.setField("type_s", "update");
}
- doc.addField("collection_s", parseCollection(line));
- doc.addField("core_s", parseCore(line));
- doc.addField("shard_s", parseShard(line));
- doc.addField("replica_s", parseReplica(line));
- doc.addField("line_t", line);
+ doc.setField("collection_s", parseCollection(line));
+ doc.setField("core_s", parseCore(line));
+ doc.setField("shard_s", parseShard(line));
+ doc.setField("replica_s", parseReplica(line));
+ doc.setField("line_t", line);
return doc;
}
@@ -474,47 +490,51 @@ public class SolrLogPostTool {
return builder.toString();
}
+ private void addOrReplaceFieldValue(SolrInputDocument doc, String fieldName, String fieldValue) {
+ doc.setField(fieldName, fieldValue);
+ }
+
private void addParams(SolrInputDocument doc, String params) {
String[] pairs = params.split("&");
for(String pair : pairs) {
String[] parts = pair.split("=");
if(parts.length == 2 && parts[0].equals("q")) {
String dq = URLDecoder.decode(parts[1], Charset.defaultCharset());
- doc.addField("q_s", dq);
- doc.addField("q_t", dq);
+ setFieldIfUnset(doc, "q_s", dq);
+ setFieldIfUnset(doc, "q_t", dq);
}
if(parts[0].equals("rows")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
- doc.addField("rows_i", dr);
+ setFieldIfUnset(doc, "rows_i", dr);
}
if(parts[0].equals("distrib")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
- doc.addField("distrib_s", dr);
+ setFieldIfUnset(doc, "distrib_s", dr);
}
if(parts[0].equals("shards")) {
- doc.addField("shards_s", "true");
+ setFieldIfUnset(doc, "shards_s", "true");
}
if(parts[0].equals("ids") && !isRTGRequest(doc)) {
- doc.addField("ids_s", "true");
+ setFieldIfUnset(doc, "ids_s", "true");
}
if(parts[0].equals("isShard")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
- doc.addField("isShard_s", dr);
+ setFieldIfUnset(doc, "isShard_s", dr);
}
if(parts[0].equals("wt")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
- doc.addField("wt_s", dr);
+ setFieldIfUnset(doc, "wt_s", dr);
}
if(parts[0].equals("facet")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
- doc.addField("facet_s", dr);
+ setFieldIfUnset(doc, "facet_s", dr);
}
if(parts[0].equals("shards.purpose")) {
@@ -533,19 +553,9 @@ public class SolrLogPostTool {
//Special params used to determine what stage a query is.
//So we populate with defaults.
//The absence of the distrib params means its a distributed query.
-
-
- if(doc.getField("distrib_s") == null) {
- doc.addField("distrib_s", "true");
- }
-
- if(doc.getField("shards_s") == null) {
- doc.addField("shards_s", "false");
- }
-
- if(doc.getField("ids_s") == null) {
- doc.addField("ids_s", "false");
- }
+ setFieldIfUnset(doc, "distrib_s", "true");
+ setFieldIfUnset(doc, "shards_s", "false");
+ setFieldIfUnset(doc, "ids_s", "false");
}
private boolean isRTGRequest(SolrInputDocument doc) {
diff --git a/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java b/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java
index 10f7b8c..c65f5f1 100644
--- a/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java
+++ b/solr/core/src/test/org/apache/solr/util/SolrLogPostToolTest.java
@@ -75,6 +75,24 @@ public class SolrLogPostToolTest extends SolrTestCaseJ4 {
assertEquals("REFINE_FACETS", purposes[1].toString());
}
+ // Requests which have multiple copies of the same param should be parsed so that the first param value only is
+ // indexed, since the log schema expects many of these to be single-valued fields and will throw errors if multiple
+ // values are received.
+ @Test
+ public void testRecordsFirstInstanceOfSingleValuedParams() throws Exception {
+ final String record = "2019-12-09 15:05:01.931 INFO (qtp2103763750-21) [c:logs4 s:shard1 r:core_node2 x:logs4_shard1_replica_n1] o.a.s.c.S.Request [logs4_shard1_replica_n1] webapp=/solr path=/select params={q=*:*&q=inStock:true&_=1575835181759&shards.purpose=36&isShard=true&wt=javabin&wt=xml&distrib=false} hits=234868 status=0 QTime=8\n";
+
+ List<SolrInputDocument> docs = readDocs(record);
+ assertEquals(docs.size(), 1);
+ SolrInputDocument doc = docs.get(0);
+
+ assertEquals(doc.getFieldValues("q_s").size(), 1);
+ assertEquals(doc.getFieldValue("q_s"), "*:*");
+
+ assertEquals(doc.getFieldValues("wt_s").size(), 1);
+ assertEquals(doc.getFieldValue("wt_s"), "javabin");
+ }
+
@Test
public void testRTGRecord() throws Exception {
final String record = "2020-03-19 20:00:30.845 INFO (qtp1635378213-20354) [c:logs4 s:shard8 r:core_node63 x:logs4_shard8_replica_n60] o.a.s.c.S.Request [logs4_shard8_replica_n60] webapp=/solr path=/get params={qt=/get&_stateVer_=logs4:104&ids=id1&ids=id2&ids=id3&wt=javabin&version=2} status=0 QTime=61";