You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ep...@apache.org on 2024/02/24 16:24:30 UTC
(solr) branch main updated: Remove the deprecated bin/post and bin/postlogs (#2282)
This is an automated email from the ASF dual-hosted git repository.
epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 437144d2ad9 Remove the deprecated bin/post and bin/postlogs (#2282)
437144d2ad9 is described below
commit 437144d2ad92dd3939f9853f55b1667971cb7843
Author: Eric Pugh <ep...@opensourceconnections.com>
AuthorDate: Sat Feb 24 11:24:25 2024 -0500
Remove the deprecated bin/post and bin/postlogs (#2282)
We have richer more capable equivalents in the bin/solr CLI tool.
---
solr/.gitattributes | 1 -
solr/bin/post | 240 ----
solr/bin/postlogs | 37 -
.../java/org/apache/solr/cli/SimplePostTool.java | 1427 --------------------
.../org/apache/solr/cli/SimplePostToolTest.java | 286 ----
solr/packaging/build.gradle | 2 -
solr/packaging/test/test_postlogs.bats | 12 -
.../modules/indexing-guide/pages/post-tool.adoc | 3 -
8 files changed, 2008 deletions(-)
diff --git a/solr/.gitattributes b/solr/.gitattributes
index 5add7a2e5b5..b64fca4aaf9 100644
--- a/solr/.gitattributes
+++ b/solr/.gitattributes
@@ -19,7 +19,6 @@
# -nix specific
bin/solr text eol=lf
bin/init.d/solr text eol=lf
-bin/post text eol=lf
*.bash text eol=lf
*.sh text eol=lf
diff --git a/solr/bin/post b/solr/bin/post
deleted file mode 100755
index e7ef443afd2..00000000000
--- a/solr/bin/post
+++ /dev/null
@@ -1,240 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)
-
-THIS_SCRIPT="$0"
-
-# Resolve symlinks to this script
-while [ -h "$THIS_SCRIPT" ] ; do
- ls=`ls -ld "$THIS_SCRIPT"`
- # Drop everything prior to ->
- link=`expr "$ls" : '.*-> \(.*\)$'`
- if expr "$link" : '/.*' > /dev/null; then
- THIS_SCRIPT="$link"
- else
- THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
- fi
-done
-
-SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
-SOLR_TIP=`cd "$SOLR_TIP"; pwd`
-
-if [ -n "$SOLR_JAVA_HOME" ]; then
- JAVA="$SOLR_JAVA_HOME/bin/java"
-elif [ -n "$JAVA_HOME" ]; then
- for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
- if [ -x "$java" ]; then
- JAVA="$java"
- break
- fi
- done
-else
- JAVA=java
-fi
-
-# test that Java exists and is executable on this server
-"$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; }
-
-echo "The bin/post script is deprecated in favour of the bin/solr post command. Please update your scripts."
-
-# ===== post specific code
-
-TOOL_JAR=("$SOLR_TIP/server/solr-webapp/webapp/WEB-INF/lib"/solr-core-*.jar)
-
-function print_usage() {
- echo ""
- echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
- echo " or post -help"
- echo ""
- echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
- echo ""
- echo "OPTIONS"
- echo "======="
- echo " Solr options:"
- echo " -url <base Solr update URL> (overrides collection, host, and port)"
- echo " -host <host> (default: localhost)"
- echo " -p or -port <port> (default: 8983)"
- echo " -commit yes|no (default: yes)"
- echo " -u or -user <user:pass> (sets BasicAuth credentials)"
- # optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
- echo ""
- echo " Web crawl options:"
- echo " -recursive <depth> (default: 1)"
- echo " -delay <seconds> (default: 10)"
- echo ""
- echo " Directory crawl options:"
- echo " -delay <seconds> (default: 0)"
- echo ""
- echo " stdin/args options:"
- echo " -type <content/type> (default: application/xml)"
- echo ""
- echo " Other options:"
- echo " -filetypes <type>[,<type>,...] (default: xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
- echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
- echo " -out yes|no (default: no; yes outputs Solr response to console)"
- echo " -format solr (sends application/json content as Solr commands to /update instead of /update/json/docs)"
- echo ""
- echo ""
- echo "Examples:"
- echo ""
- echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
- echo "* XML files: $THIS_SCRIPT -c records article*.xml"
- echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
- echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
- echo "* Web crawl: $THIS_SCRIPT -c gettingstarted https://solr.apache.org/ -recursive 1 -delay 1"
- echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
- echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
- echo ""
-} # end print_usage
-
-if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
- print_usage
- exit
-fi
-
-
-COLLECTION="$DEFAULT_SOLR_COLLECTION"
-PROPS=('-Dauto=yes')
-RECURSIVE=""
-FILES=()
-URLS=()
-ARGS=()
-
-while [ $# -gt 0 ]; do
- # TODO: natively handle the optional parameters to SPT
- # but for now they can be specified as bin/post -c collection-name delay=5 https://lucidworks.com
-
- if [[ -d "$1" ]]; then
- # Directory
-# echo "$1: DIRECTORY"
- RECURSIVE=yes
- FILES+=("$1")
- elif [[ -f "$1" ]]; then
- # File
-# echo "$1: FILE"
- FILES+=("$1")
- elif [[ "$1" == http* ]]; then
- # URL
-# echo "$1: URL"
- URLS+=("$1")
- else
- if [[ "$1" == -* ]]; then
- if [[ "$1" == "-c" ]]; then
- # Special case, pull out collection name
- shift
- COLLECTION="$1"
- elif [[ "$1" == "-p" ]]; then
- # -p alias for -port for convenience and compatibility with `bin/solr start`
- shift
- PROPS+=("-Dport=$1")
- elif [[ ("$1" == "-d" || "$1" == "--data" || "$1" == "-") ]]; then
- if [[ ! -t 0 ]]; then
- MODE="stdin"
- else
- # when no stdin exists and -d specified, the rest of the arguments
- # are assumed to be strings to post as-is
- MODE="args"
- shift
- if [[ $# -gt 0 ]]; then
- ARGS=("$@")
- shift $#
- else
- # SPT needs a valid args string, useful for 'bin/post -c foo -d' to force a commit
- ARGS+=("<add/>")
- fi
- fi
- elif [[ ("$1" == "-u" || "$1" == "-user") ]]; then
- shift
- PROPS+=("-Dbasicauth=$1")
- else
- if [[ "$1" == -D* ]] ; then
- PROPS+=("$1")
- if [[ "${1:2:4}" == "url=" ]]; then
- SOLR_URL=${1:6}
- fi
- else
- key="${1:1}"
- shift
- # echo "$1: PROP"
- PROPS+=("-D$key=$1")
- if [[ "$key" == "url" ]]; then
- SOLR_URL=$1
- fi
- fi
- fi
- else
- echo -e "\nUnrecognized argument: $1\n"
- echo -e "If this was intended to be a data file, it does not exist relative to $PWD\n"
- exit 1
- fi
- fi
- shift
-done
-
-# Check for errors
-if [[ $COLLECTION == "" && $SOLR_URL == "" ]]; then
- echo -e "\nCollection or URL must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment, or use -url instead.\n"
- echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
- exit 1
-fi
-
-# Unsupported: bin/post -c foo
-if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
- echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.\n"
- echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
- exit 1
-fi
-
-# SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
-# The following are unsupported constructs:
-# bin/post -c foo existing_file.csv http://example.com
-# echo '<xml.../>' | bin/post -c foo existing_file.csv
-# bin/post -c foo existing_file.csv -d 'anything'
-if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
- || ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
- echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n"
- exit 1
-fi
-
-PARAMS=""
-
-# TODO: let's simplify this
-if [[ $MODE != "stdin" && $MODE != "args" ]]; then
- if [[ $FILES != "" ]]; then
- MODE="files"
- PARAMS=("${FILES[@]}")
- fi
-
- if [[ $URLS != "" ]]; then
- MODE="web"
- PARAMS=("${URLS[@]}")
- fi
-else
- PARAMS=("${ARGS[@]}")
-fi
-
-PROPS+=("-Dc=$COLLECTION" "-Ddata=$MODE")
-if [[ -n "$RECURSIVE" ]]; then
- PROPS+=('-Drecursive=yes')
-fi
-
-echo "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.cli.SimplePostTool "${PARAMS[@]}"
-"$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.cli.SimplePostTool "${PARAMS[@]}"
-
-# post smoker:
-# bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
-# bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'
diff --git a/solr/bin/postlogs b/solr/bin/postlogs
deleted file mode 100755
index 249f693230d..00000000000
--- a/solr/bin/postlogs
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-############################################################################################
-#
-# A command line tool for indexing Solr logs in the out-of-the-box log format.
-#
-# First build the Solr distribution. Then run postlogs from inside the Solr distribution
-# home directory as described below:
-#
-# parameters:
-#
-# -- baseUrl: Example http://localhost:8983/solr/collection1
-# -- rootDir: All files found at or below the root will be indexed
-#
-# Sample syntax: ./bin/postlogs http://localhost:8983/solr/collection1 /user/foo/logs");
-#
-#
-############################################################################################
-
-echo "This script has been deprecated in favour of 'bin/solr postlogs' command."
-
-SOLR_TIP="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"/..
-java -classpath "$SOLR_TIP/server/lib/ext/*:$SOLR_TIP/server/solr-webapp/webapp/WEB-INF/lib/*" org.apache.solr.cli.SolrLogPostTool $1 $2
diff --git a/solr/core/src/java/org/apache/solr/cli/SimplePostTool.java b/solr/core/src/java/org/apache/solr/cli/SimplePostTool.java
deleted file mode 100644
index 170eb20ff59..00000000000
--- a/solr/core/src/java/org/apache/solr/cli/SimplePostTool.java
+++ /dev/null
@@ -1,1427 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.cli;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileFilter;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
-import java.net.ProtocolException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.net.URLEncoder;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.InvalidPathException;
-import java.security.GeneralSecurityException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Base64;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.TimeZone;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.Inflater;
-import java.util.zip.InflaterInputStream;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpression;
-import javax.xml.xpath.XPathExpressionException;
-import javax.xml.xpath.XPathFactory;
-import org.apache.solr.client.api.util.SolrVersion;
-import org.apache.solr.common.util.Utils;
-import org.apache.solr.util.RTimer;
-import org.w3c.dom.Document;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-import org.xml.sax.SAXException;
-
-/**
- * A simple utility class for posting raw updates to a Solr server. It has a main method, so it can
- * be run on the command line. View this not as a best-practice code example, but as a standalone
- * example built with an explicit purpose of not having external jar dependencies.
- *
- * @deprecated This class is being replaced with bin/solr tool. Please use {@link PostTool} instead.
- */
-@Deprecated(since = "9.2")
-public class SimplePostTool {
- private static final String DEFAULT_POST_HOST = "localhost";
- private static final String DEFAULT_POST_PORT = "8983";
- private static final String VERSION_OF_THIS_TOOL = SolrVersion.LATEST_STRING;
-
- private static final String DEFAULT_COMMIT = "yes";
- private static final String DEFAULT_OPTIMIZE = "no";
- private static final String DEFAULT_OUT = "no";
- private static final String DEFAULT_AUTO = "no";
- private static final String DEFAULT_RECURSIVE = "0";
- private static final int DEFAULT_WEB_DELAY = 10;
- private static final int MAX_WEB_DEPTH = 10;
- public static final String DEFAULT_CONTENT_TYPE = "application/json";
- public static final String DEFAULT_FILE_TYPES =
- "xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log";
- private static final String BASIC_AUTH = "basicauth";
-
- static final String DATA_MODE_FILES = "files";
- static final String DATA_MODE_ARGS = "args";
- static final String DATA_MODE_STDIN = "stdin";
- static final String DATA_MODE_WEB = "web";
- static final String DEFAULT_DATA_MODE = DATA_MODE_FILES;
-
- static final String FORMAT_SOLR = "solr";
-
- // Input args
- boolean auto = false;
- int recursive = 0;
- int delay = 0;
- String fileTypes;
- URL solrUrl;
- String credentials;
- OutputStream out = null;
- String type;
- String format;
- String mode;
- boolean commit;
- boolean optimize;
- String[] args;
-
- private int currentDepth;
-
- static HashMap<String, String> mimeMap;
- FileFilter fileFilter;
- // Backlog for crawling
- List<LinkedHashSet<URI>> backlog = new ArrayList<>();
- Set<URI> visited = new HashSet<>();
-
- static final Set<String> DATA_MODES = new HashSet<>();
- static final String USAGE_STRING_SHORT =
- "Usage: java [SystemProperties] -jar post.jar [-h|-] [<file|folder|url|arg> [<file|folder|url|arg>...]]";
-
- // Used in tests to avoid doing actual network traffic
- boolean mockMode = false;
- PageFetcher pageFetcher;
-
- static {
- DATA_MODES.add(DATA_MODE_FILES);
- DATA_MODES.add(DATA_MODE_ARGS);
- DATA_MODES.add(DATA_MODE_STDIN);
- DATA_MODES.add(DATA_MODE_WEB);
-
- mimeMap = new HashMap<>();
- mimeMap.put("xml", "application/xml");
- mimeMap.put("csv", "text/csv");
- mimeMap.put("json", "application/json");
- mimeMap.put("jsonl", "application/jsonl");
- mimeMap.put("pdf", "application/pdf");
- mimeMap.put("rtf", "text/rtf");
- mimeMap.put("html", "text/html");
- mimeMap.put("htm", "text/html");
- mimeMap.put("doc", "application/msword");
- mimeMap.put("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
- mimeMap.put("ppt", "application/vnd.ms-powerpoint");
- mimeMap.put(
- "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
- mimeMap.put("xls", "application/vnd.ms-excel");
- mimeMap.put("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
- mimeMap.put("odt", "application/vnd.oasis.opendocument.text");
- mimeMap.put("ott", "application/vnd.oasis.opendocument.text");
- mimeMap.put("odp", "application/vnd.oasis.opendocument.presentation");
- mimeMap.put("otp", "application/vnd.oasis.opendocument.presentation");
- mimeMap.put("ods", "application/vnd.oasis.opendocument.spreadsheet");
- mimeMap.put("ots", "application/vnd.oasis.opendocument.spreadsheet");
- mimeMap.put("txt", "text/plain");
- mimeMap.put("log", "text/plain");
- }
-
- /**
- * See usage() for valid command line usage
- *
- * @param args the params on the command line
- */
- public static void main(String[] args) {
- info("SimplePostTool version " + VERSION_OF_THIS_TOOL);
- if (0 < args.length
- && ("-help".equals(args[0]) || "--help".equals(args[0]) || "-h".equals(args[0]))) {
- usage();
- } else {
- final SimplePostTool t = parseArgsAndInit(args);
- t.execute();
- }
- }
-
- /**
- * After initialization, call execute to start the post job. This method delegates to the correct
- * mode method.
- */
- public void execute() {
- final RTimer timer = new RTimer();
- if (DATA_MODE_FILES.equals(mode) && args.length > 0) {
- doFilesMode();
- } else if (DATA_MODE_ARGS.equals(mode) && args.length > 0) {
- doArgsMode();
- } else if (DATA_MODE_WEB.equals(mode) && args.length > 0) {
- doWebMode();
- } else if (DATA_MODE_STDIN.equals(mode)) {
- doStdinMode();
- } else {
- usageShort();
- return;
- }
-
- if (commit) {
- commit();
- }
- if (optimize) {
- optimize();
- }
- displayTiming((long) timer.getTime());
- }
-
- /**
- * Pretty prints the number of milliseconds taken to post the content to Solr
- *
- * @param millis the time in milliseconds
- */
- private void displayTiming(long millis) {
- SimpleDateFormat df = new SimpleDateFormat("H:mm:ss.SSS", Locale.getDefault());
- df.setTimeZone(TimeZone.getTimeZone("UTC"));
- CLIO.out("Time spent: " + df.format(new Date(millis)));
- }
-
- /**
- * Parses incoming arguments and system params and initializes the tool
- *
- * @param args the incoming cmd line args
- * @return an instance of SimplePostTool
- */
- protected static SimplePostTool parseArgsAndInit(String[] args) {
- String urlStr = null;
- try {
- // Parse args
- final String mode = System.getProperty("data", DEFAULT_DATA_MODE);
- if (!DATA_MODES.contains(mode)) {
- fatal("System Property 'data' is not valid for this tool: " + mode);
- }
-
- String params = System.getProperty("params", "");
- String host = System.getProperty("host", DEFAULT_POST_HOST);
- String port = System.getProperty("port", DEFAULT_POST_PORT);
- String core = System.getProperty("c");
-
- urlStr = System.getProperty("url");
-
- if (urlStr == null && core == null) {
- fatal("Specifying either url or core/collection is mandatory.\n" + USAGE_STRING_SHORT);
- }
-
- if (urlStr == null) {
- urlStr = String.format(Locale.ROOT, "http://%s:%s/solr/%s/update", host, port, core);
- }
-
- urlStr = SimplePostTool.appendParam(urlStr, params);
- URL url = new URL(urlStr);
- String user = null;
- String credentials = null;
- if (url.getUserInfo() != null && url.getUserInfo().trim().length() > 0) {
- user = url.getUserInfo().split(":")[0];
- } else if (System.getProperty(BASIC_AUTH) != null) {
- user = System.getProperty(BASIC_AUTH).trim().split(":")[0];
- credentials = System.getProperty(BASIC_AUTH).trim();
- }
- if (user != null) {
- info("Basic Authentication enabled, user=" + user);
- }
-
- boolean auto = isOn(System.getProperty("auto", DEFAULT_AUTO));
- String type = System.getProperty("type");
- String format = System.getProperty("format");
- // Recursive
- int recursive = 0;
- String r = System.getProperty("recursive", DEFAULT_RECURSIVE);
- try {
- recursive = Integer.parseInt(r);
- } catch (Exception e) {
- if (isOn(r)) {
- recursive = DATA_MODE_WEB.equals(mode) ? 1 : 999;
- }
- }
- // Delay
- int delay = DATA_MODE_WEB.equals(mode) ? DEFAULT_WEB_DELAY : 0;
- try {
- delay = Integer.parseInt(System.getProperty("delay", "" + delay));
- } catch (Exception e) {
- }
- OutputStream out = isOn(System.getProperty("out", DEFAULT_OUT)) ? CLIO.getOutStream() : null;
- String fileTypes = System.getProperty("filetypes", DEFAULT_FILE_TYPES);
- boolean commit = isOn(System.getProperty("commit", DEFAULT_COMMIT));
- boolean optimize = isOn(System.getProperty("optimize", DEFAULT_OPTIMIZE));
-
- return new SimplePostTool(
- mode,
- url,
- credentials,
- auto,
- type,
- format,
- recursive,
- delay,
- fileTypes,
- out,
- commit,
- optimize,
- args);
- } catch (MalformedURLException e) {
- fatal("System Property 'url' is not a valid URL: " + urlStr);
- return null;
- }
- }
-
- /**
- * Constructor which takes in all mandatory input for the tool to work. Also see usage() for
- * further explanation of the params.
- *
- * @param mode whether to post files, web pages, params or stdin
- * @param url the Solr base Url to post to, should end with /update
- * @param auto if true, we'll guess type and add resourcename/url
- * @param type content-type of the data you are posting
- * @param recursive number of levels for file/web mode, or 0 if one file only
- * @param delay if recursive then delay will be the wait time between posts
- * @param fileTypes a comma separated list of file-name endings to accept for file/web
- * @param out an OutputStream to write output to, e.g. stdout to print to console
- * @param commit if true, will commit at end of posting
- * @param optimize if true, will optimize at end of posting
- * @param args a String[] of arguments, varies between modes
- */
- public SimplePostTool(
- String mode,
- URL url,
- String credentials,
- boolean auto,
- String type,
- String format,
- int recursive,
- int delay,
- String fileTypes,
- OutputStream out,
- boolean commit,
- boolean optimize,
- String[] args) {
- this.mode = mode;
- this.solrUrl = url;
- this.credentials = credentials;
- this.auto = auto;
- this.type = type;
- this.format = format;
- this.recursive = recursive;
- this.delay = delay;
- this.fileTypes = fileTypes;
- this.fileFilter = getFileFilterFromFileTypes(fileTypes);
- this.out = out;
- this.commit = commit;
- this.optimize = optimize;
- this.args = args;
- pageFetcher = new PageFetcher();
- }
-
- public SimplePostTool() {}
-
- //
- // Do some action depending on which mode we have
- //
- private void doFilesMode() {
- currentDepth = 0;
- // Skip posting files if special param "-" given
- if (!args[0].equals("-")) {
- info(
- "Posting files to [base] url "
- + solrUrl
- + (!auto ? " using content-type " + (type == null ? DEFAULT_CONTENT_TYPE : type) : "")
- + "...");
- if (auto) {
- info("Entering auto mode. File endings considered are " + fileTypes);
- }
- if (recursive > 0) {
- info("Entering recursive mode, max depth=" + recursive + ", delay=" + delay + "s");
- }
- int numFilesPosted = postFiles(args, 0, out, type);
- info(numFilesPosted + " files indexed.");
- }
- }
-
- private void doArgsMode() {
- info("POSTing args to " + solrUrl + "...");
- for (String a : args) {
- postData(stringToStream(a), null, out, type, solrUrl);
- }
- }
-
- private void doWebMode() {
- reset();
- int numPagesPosted = 0;
- try {
- if (type != null) {
- fatal("Specifying content-type with \"-Ddata=web\" is not supported");
- }
- if (args[0].equals("-")) {
- // Skip posting url if special param "-" given
- return;
- }
- // Set Extracting handler as default
- solrUrl = appendUrlPath(solrUrl, "/extract");
-
- info("Posting web pages to Solr url " + solrUrl);
- auto = true;
- info(
- "Entering auto mode. Indexing pages with content-types corresponding to file endings "
- + fileTypes);
- if (recursive > 0) {
- if (recursive > MAX_WEB_DEPTH) {
- recursive = MAX_WEB_DEPTH;
- warn("Too large recursion depth for web mode, limiting to " + MAX_WEB_DEPTH + "...");
- }
- if (delay < DEFAULT_WEB_DELAY) {
- warn(
- "Never crawl an external web site faster than every 10 seconds, your IP will probably be blocked");
- }
- info("Entering recursive mode, depth=" + recursive + ", delay=" + delay + "s");
- }
- numPagesPosted = postWebPages(args, 0, out);
- info(numPagesPosted + " web pages indexed.");
- } catch (MalformedURLException e) {
- fatal("Wrong URL trying to append /extract to " + solrUrl);
- }
- }
-
- private void doStdinMode() {
- info("POSTing stdin to " + solrUrl + "...");
- postData(System.in, null, out, type, solrUrl);
- }
-
- private void reset() {
- backlog = new ArrayList<>();
- visited = new HashSet<>();
- }
-
- //
- // USAGE
- //
- private static void usageShort() {
- CLIO.out(
- USAGE_STRING_SHORT + "\n" + " Please invoke with -h option for extended usage help.");
- }
-
- private static void usage() {
- CLIO.out(
- USAGE_STRING_SHORT
- + "\n\n"
- + "Supported System Properties and their defaults:\n"
- + " -Dc=<core/collection>\n"
- + " -Durl=<base Solr update URL> (overrides -Dc option if specified)\n"
- + " -Ddata=files|web|args|stdin (default="
- + DEFAULT_DATA_MODE
- + ")\n"
- + " -Dtype=<content-type> (default="
- + DEFAULT_CONTENT_TYPE
- + " -Dhost=<host> (default: "
- + ")\n"
- + DEFAULT_POST_HOST
- + ")\n"
- + " -Dport=<port> (default: "
- + DEFAULT_POST_PORT
- + ")\n"
- + " -Dbasicauth=<user:pass> (sets Basic Authentication credentials)\n"
- + " -Dauto=yes|no (default="
- + DEFAULT_AUTO
- + ")\n"
- + " -Drecursive=yes|no|<depth> (default="
- + DEFAULT_RECURSIVE
- + ")\n"
- + " -Ddelay=<seconds> (default=0 for files, 10 for web)\n"
- + " -Dfiletypes=<type>[,<type>,...] (default="
- + DEFAULT_FILE_TYPES
- + ")\n"
- + " -Dparams=\"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded)\n"
- + " -Dcommit=yes|no (default="
- + DEFAULT_COMMIT
- + ")\n"
- + " -Doptimize=yes|no (default="
- + DEFAULT_OPTIMIZE
- + ")\n"
- + " -Dout=yes|no (default="
- + DEFAULT_OUT
- + ")\n\n"
- + "This is a simple command line tool for POSTing raw data to a Solr port.\n"
- + "NOTE: Specifying the url/core/collection name is mandatory.\n"
- + "Data can be read from files specified as commandline args,\n"
- + "URLs specified as args, as raw commandline arg strings or via STDIN.\n"
- + "Examples:\n"
- + " java -Dc=gettingstarted -jar post.jar *.xml\n"
- + " java -Ddata=args -Dc=gettingstarted -jar post.jar '<delete><id>42</id></delete>'\n"
- + " java -Ddata=stdin -Dc=gettingstarted -jar post.jar < hd.xml\n"
- + " java -Ddata=web -Dc=gettingstarted -jar post.jar http://example.com/\n"
- + " java -Dtype=text/csv -Dc=gettingstarted -jar post.jar *.csv\n"
- + " java -Dtype=application/json -Dc=gettingstarted -jar post.jar *.json\n"
- + " java -Durl=http://localhost:8983/solr/techproducts/update/extract -Dparams=literal.id=pdf1 -jar post.jar solr-word.pdf\n"
- + " java -Dauto -Dc=gettingstarted -jar post.jar *\n"
- + " java -Dauto -Dc=gettingstarted -Drecursive -jar post.jar afolder\n"
- + " java -Dauto -Dc=gettingstarted -Dfiletypes=ppt,html -jar post.jar afolder\n"
- + "The options controlled by System Properties include the Solr\n"
- + "URL to POST to, the Content-Type of the data, whether a commit\n"
- + "or optimize should be executed, and whether the response should\n"
- + "be written to STDOUT. If auto=yes the tool will try to set type\n"
- + "automatically from file name. When posting rich documents the\n"
- + "file name will be propagated as \"resource.name\" and also used\n"
- + "as \"literal.id\". You may override these or any other request parameter\n"
- + "through the -Dparams property. To do a commit only, use \"-\" as argument.\n"
- + "The web mode is a simple crawler following links within domain, default delay=10s.");
- }
-
- private boolean checkIsValidPath(File srcFile) {
- try {
- srcFile.toPath();
- return true;
- } catch (InvalidPathException e) {
- return false;
- }
- }
-
- /**
- * Post all filenames provided in args
- *
- * @param args array of file names
- * @param startIndexInArgs offset to start
- * @param out output stream to post data to
- * @param type default content-type to use when posting (may be overridden in auto mode)
- * @return number of files posted
- */
- public int postFiles(String[] args, int startIndexInArgs, OutputStream out, String type) {
- reset();
- int filesPosted = 0;
- for (int j = startIndexInArgs; j < args.length; j++) {
- File srcFile = new File(args[j]);
- filesPosted = getFilesPosted(out, type, srcFile);
- }
- return filesPosted;
- }
-
- /**
- * Post all filenames provided in args
- *
- * @param files array of Files
- * @param startIndexInArgs offset to start
- * @param out output stream to post data to
- * @param type default content-type to use when posting (may be overridden in auto mode)
- * @return number of files posted
- */
- public int postFiles(File[] files, int startIndexInArgs, OutputStream out, String type) {
- reset();
- int filesPosted = 0;
- for (File srcFile : files) {
- filesPosted = getFilesPosted(out, type, srcFile);
- }
- return filesPosted;
- }
-
- private int getFilesPosted(final OutputStream out, final String type, final File srcFile) {
- int filesPosted = 0;
- boolean isValidPath = checkIsValidPath(srcFile);
- if (isValidPath && srcFile.isDirectory() && srcFile.canRead()) {
- filesPosted += postDirectory(srcFile, out, type);
- } else if (isValidPath && srcFile.isFile() && srcFile.canRead()) {
- filesPosted += postFiles(new File[] {srcFile}, out, type);
- } else {
- filesPosted += handleGlob(srcFile, out, type);
- }
- return filesPosted;
- }
-
- /**
- * Posts a whole directory
- *
- * @return number of files posted total
- */
- private int postDirectory(File dir, OutputStream out, String type) {
- if (dir.isHidden() && !dir.getName().equals(".")) {
- return (0);
- }
- info(
- "Indexing directory "
- + dir.getPath()
- + " ("
- + dir.listFiles(fileFilter).length
- + " files, depth="
- + currentDepth
- + ")");
- int posted = 0;
- posted += postFiles(dir.listFiles(fileFilter), out, type);
- if (recursive > currentDepth) {
- for (File d : dir.listFiles()) {
- if (d.isDirectory()) {
- currentDepth++;
- posted += postDirectory(d, out, type);
- currentDepth--;
- }
- }
- }
- return posted;
- }
-
- /**
- * Posts a list of file names
- *
- * @return number of files posted
- */
- int postFiles(File[] files, OutputStream out, String type) {
- int filesPosted = 0;
- for (File srcFile : files) {
- try {
- if (!srcFile.isFile() || srcFile.isHidden()) {
- continue;
- }
- postFile(srcFile, out, type);
- Thread.sleep(delay * 1000L);
- filesPosted++;
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- }
- return filesPosted;
- }
-
- /**
- * This only handles file globs not full path globbing.
- *
- * @param globFile file holding glob path
- * @param out outputStream to write results to
- * @param type default content-type to use when posting (may be overridden in auto mode)
- * @return number of files posted
- */
- int handleGlob(File globFile, OutputStream out, String type) {
- int filesPosted = 0;
- File parent = globFile.getParentFile();
- if (parent == null) {
- parent = new File(".");
- }
- String fileGlob = globFile.getName();
- GlobFileFilter ff = new GlobFileFilter(fileGlob, false);
- File[] fileList = parent.listFiles(ff);
- if (fileList == null || fileList.length == 0) {
- warn("No files or directories matching " + globFile);
- } else {
- filesPosted = postFiles(fileList, out, type);
- }
- return filesPosted;
- }
-
- /**
- * This method takes as input a list of start URL strings for crawling, converts the URL strings
- * to URI strings and adds each one to a backlog and then starts crawling
- *
- * @param args the raw input args from main()
- * @param startIndexInArgs offset for where to start
- * @param out outputStream to write results to
- * @return the number of web pages posted
- */
- public int postWebPages(String[] args, int startIndexInArgs, OutputStream out) {
- reset();
- LinkedHashSet<URI> s = new LinkedHashSet<>();
- for (int j = startIndexInArgs; j < args.length; j++) {
- try {
- URI uri = new URI(normalizeUrlEnding(args[j]));
- s.add(uri);
- } catch (URISyntaxException e) {
- warn("Skipping malformed input URL: " + args[j]);
- }
- }
- // Add URIs to level 0 of the backlog and start recursive crawling
- backlog.add(s);
- return webCrawl(0, out);
- }
-
- /**
- * Normalizes a URL string by removing anchor part and trailing slash
- *
- * @return the normalized URL string
- */
- protected static String normalizeUrlEnding(String link) {
- if (link.contains("#")) {
- link = link.substring(0, link.indexOf('#'));
- }
- if (link.endsWith("?")) {
- link = link.substring(0, link.length() - 1);
- }
- if (link.endsWith("/")) {
- link = link.substring(0, link.length() - 1);
- }
- return link;
- }
-
- /**
- * A very simple crawler, pulling URLs to fetch from a backlog and then recurses N levels deep if
- * recursive>0. Links are parsed from HTML through first getting an XHTML version using
- * SolrCell with extractOnly, and followed if they are local. The crawler pauses for a default
- * delay of 10 seconds between each fetch, this can be configured in the delay variable. This is
- * only meant for test purposes, as it does not respect robots or anything else fancy :)
- *
- * @param level which level to crawl
- * @param out output stream to write to
- * @return number of pages crawled on this level and below
- */
- protected int webCrawl(int level, OutputStream out) {
- int numPages = 0;
- LinkedHashSet<URI> stack = backlog.get(level);
- int rawStackSize = stack.size();
- stack.removeAll(visited);
- int stackSize = stack.size();
- LinkedHashSet<URI> subStack = new LinkedHashSet<>();
- info(
- "Entering crawl at level "
- + level
- + " ("
- + rawStackSize
- + " links total, "
- + stackSize
- + " new)");
- for (URI uri : stack) {
- try {
- visited.add(uri);
- URL url = uri.toURL();
- PageFetcherResult result = pageFetcher.readPageFromUrl(url);
- if (result.httpStatus == 200) {
- url = (result.redirectUrl != null) ? result.redirectUrl : url;
- URL postUrl =
- new URL(
- appendParam(
- solrUrl.toString(),
- "literal.id="
- + URLEncoder.encode(url.toString(), UTF_8)
- + "&literal.url="
- + URLEncoder.encode(url.toString(), UTF_8)));
- ByteBuffer content = result.content;
- boolean success =
- postData(
- new ByteArrayInputStream(content.array(), content.arrayOffset(), content.limit()),
- null,
- out,
- result.contentType,
- postUrl);
- if (success) {
- info("POSTed web resource " + url + " (depth: " + level + ")");
- Thread.sleep(delay * 1000L);
- numPages++;
- // Pull links from HTML pages only
- if (recursive > level && result.contentType.equals("text/html")) {
- Set<URI> children =
- pageFetcher.getLinksFromWebPage(
- url,
- new ByteArrayInputStream(
- content.array(), content.arrayOffset(), content.limit()),
- result.contentType,
- postUrl);
- subStack.addAll(children);
- }
- } else {
- warn("An error occurred while posting " + uri);
- }
- } else {
- warn("The URL " + uri + " returned a HTTP result status of " + result.httpStatus);
- }
- } catch (IOException | URISyntaxException e) {
- warn("Caught exception when trying to open connection to " + uri + ": " + e.getMessage());
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- }
- if (!subStack.isEmpty()) {
- backlog.add(subStack);
- numPages += webCrawl(level + 1, out);
- }
- return numPages;
- }
-
- /**
- * Computes the full URL based on a base url and a possibly relative link found in the href param
- * of an HTML anchor.
- *
- * @param baseUrl the base url from where the link was found
- * @param link the absolute or relative link
- * @return the string version of the full URL
- */
- protected String computeFullUrl(URL baseUrl, String link) {
- if (link == null || link.length() == 0) {
- return null;
- }
- if (!link.startsWith("http")) {
- if (link.startsWith("/")) {
- link = baseUrl.getProtocol() + "://" + baseUrl.getAuthority() + link;
- } else {
- if (link.contains(":")) {
- return null; // Skip non-relative URLs
- }
- String path = baseUrl.getPath();
- if (!path.endsWith("/")) {
- int sep = path.lastIndexOf('/');
- String file = path.substring(sep + 1);
- if (file.contains(".") || file.contains("?")) {
- path = path.substring(0, sep);
- }
- }
- link = baseUrl.getProtocol() + "://" + baseUrl.getAuthority() + path + "/" + link;
- }
- }
- link = normalizeUrlEnding(link);
- String l = link.toLowerCase(Locale.ROOT);
- // Simple brute force skip images
- if (l.endsWith(".jpg") || l.endsWith(".jpeg") || l.endsWith(".png") || l.endsWith(".gif")) {
- return null; // Skip images
- }
- return link;
- }
-
- /**
- * Uses the mime-type map to reverse lookup whether the file ending for our type is supported by
- * the fileTypes option
- *
- * @param type what content-type to lookup
- * @return true if this is a supported content type
- */
- protected boolean typeSupported(String type) {
- for (Map.Entry<String, String> entry : mimeMap.entrySet()) {
- if (entry.getValue().equals(type)) {
- if (fileTypes.contains(entry.getKey())) {
- return true;
- }
- }
- }
- return false;
- }
-
- /**
- * Tests if a string is either "true", "on", "yes" or "1"
- *
- * @param property the string to test
- * @return true if "on"
- */
- protected static boolean isOn(String property) {
- return ("true,on,yes,1".contains(property));
- }
-
- static void warn(String msg) {
- CLIO.err("SimplePostTool: WARNING: " + msg);
- }
-
- static void info(String msg) {
- CLIO.out(msg);
- }
-
- static void fatal(String msg) {
- CLIO.err("SimplePostTool: FATAL: " + msg);
- System.exit(2);
- }
-
- /** Does a simple commit operation */
- public void commit() {
- info("COMMITting Solr index changes to " + solrUrl + "...");
- doGet(appendParam(solrUrl.toString(), "commit=true"));
- }
-
- /** Does a simple optimize operation */
- public void optimize() {
- info("Performing an OPTIMIZE to " + solrUrl + "...");
- doGet(appendParam(solrUrl.toString(), "optimize=true"));
- }
-
- /**
- * Appends a URL query parameter to a URL
- *
- * @param url the original URL
- * @param param the parameter(s) to append, separated by "&"
- * @return the string version of the resulting URL
- */
- public static String appendParam(String url, String param) {
- String[] pa = param.split("&");
- for (String p : pa) {
- if (p.trim().length() == 0) {
- continue;
- }
- String[] kv = p.split("=");
- if (kv.length == 2) {
- url = url + (url.contains("?") ? "&" : "?") + kv[0] + "=" + kv[1];
- } else {
- warn("Skipping param " + p + " which is not on form key=value");
- }
- }
- return url;
- }
-
- /** Opens the file and posts its contents to the solrUrl, writes to response to output. */
- public void postFile(File file, OutputStream output, String type) {
- InputStream is = null;
- try {
- URL url = solrUrl;
- String suffix = "";
- if (auto) {
- if (type == null) {
- type = guessType(file);
- }
- // TODO: Add a flag that disables /update and sends all to /update/extract, to avoid CSV,
- // JSON, and XML files
- // TODO: from being interpreted as Solr documents internally
- if (type.equals("application/json") && !FORMAT_SOLR.equals(format)) {
- suffix = "/json/docs";
- String urlStr = appendUrlPath(solrUrl, suffix).toString();
- url = new URL(urlStr);
- } else if (type.equals("application/xml")
- || type.equals("text/csv")
- || type.equals("application/json")) {
- // Default handler
- } else {
- // SolrCell
- suffix = "/extract";
- String urlStr = appendUrlPath(solrUrl, suffix).toString();
- if (!urlStr.contains("resource.name")) {
- urlStr =
- appendParam(
- urlStr, "resource.name=" + URLEncoder.encode(file.getAbsolutePath(), UTF_8));
- }
- if (!urlStr.contains("literal.id")) {
- urlStr =
- appendParam(
- urlStr, "literal.id=" + URLEncoder.encode(file.getAbsolutePath(), UTF_8));
- }
- url = new URL(urlStr);
- }
- } else {
- if (type == null) {
- type = DEFAULT_CONTENT_TYPE;
- }
- }
- info(
- "POSTing file "
- + file.getName()
- + (auto ? " (" + type + ")" : "")
- + " to [base]"
- + suffix
- + (mockMode ? " MOCK!" : ""));
- is = new FileInputStream(file);
- postData(is, file.length(), output, type, url);
- } catch (IOException e) {
- warn("Can't open/read file: " + file);
- } finally {
- try {
- if (is != null) {
- is.close();
- }
- } catch (IOException e) {
- fatal("IOException while closing file: " + e);
- }
- }
- }
-
- /**
- * Appends to the path of the URL
- *
- * @param url the URL
- * @param append the path to append
- * @return the final URL version
- */
- protected static URL appendUrlPath(URL url, String append) throws MalformedURLException {
- return new URL(
- url.getProtocol()
- + "://"
- + url.getAuthority()
- + url.getPath()
- + append
- + (url.getQuery() != null ? "?" + url.getQuery() : ""));
- }
-
- /**
- * Guesses the type of file, based on file name suffix Returns "application/octet-stream" if no
- * corresponding mimeMap type.
- *
- * @param file the file
- * @return the content-type guessed
- */
- protected static String guessType(File file) {
- String name = file.getName();
- String suffix = name.substring(name.lastIndexOf('.') + 1);
- String type = mimeMap.get(suffix.toLowerCase(Locale.ROOT));
- return (type != null) ? type : "application/octet-stream";
- }
-
- /** Performs a simple get on the given URL */
- public void doGet(String url) {
- try {
- doGet(new URL(url));
- } catch (MalformedURLException e) {
- warn("The specified URL " + url + " is not a valid URL. Please check");
- }
- }
-
- /** Performs a simple get on the given URL */
- public void doGet(URL url) {
- try {
- if (mockMode) {
- return;
- }
- HttpURLConnection urlc = (HttpURLConnection) url.openConnection();
- basicAuth(urlc);
- urlc.connect();
- checkResponseCode(urlc);
- } catch (IOException e) {
- warn("An error occurred getting data from " + url + ". Please check that Solr is running.");
- } catch (Exception e) {
- warn("An error occurred getting data from " + url + ". Message: " + e.getMessage());
- }
- }
-
- /**
- * Reads data from the data stream and posts it to solr, writes to the response to output
- *
- * @return true if success
- */
- public boolean postData(
- InputStream data, Long length, OutputStream output, String type, URL url) {
- if (mockMode) {
- return true;
- }
-
- boolean success = true;
- if (type == null) {
- type = DEFAULT_CONTENT_TYPE;
- }
- HttpURLConnection urlConnection = null;
- try {
- try {
- urlConnection = (HttpURLConnection) url.openConnection();
- try {
- urlConnection.setRequestMethod("POST");
- } catch (ProtocolException e) {
- fatal("Shouldn't happen: HttpURLConnection doesn't support POST??" + e);
- }
- urlConnection.setDoOutput(true);
- urlConnection.setDoInput(true);
- urlConnection.setUseCaches(false);
- urlConnection.setAllowUserInteraction(false);
- urlConnection.setRequestProperty("Content-type", type);
- basicAuth(urlConnection);
- if (null != length) {
- urlConnection.setFixedLengthStreamingMode(length);
- } else {
- urlConnection.setChunkedStreamingMode(-1); // use JDK default chunkLen, 4k in Java 8.
- }
- urlConnection.connect();
- } catch (IOException e) {
- fatal("Connection error (is Solr running at " + solrUrl + " ?): " + e);
- success = false;
- } catch (Exception e) {
- fatal("POST failed with error " + e.getMessage());
- }
-
- try (final OutputStream out = urlConnection.getOutputStream()) {
- pipe(data, out);
- } catch (IOException e) {
- fatal("IOException while posting data: " + e);
- }
-
- try {
- success &= checkResponseCode(urlConnection);
- try (final InputStream in = urlConnection.getInputStream()) {
- pipe(in, output);
- }
- } catch (IOException e) {
- warn("IOException while reading response: " + e);
- success = false;
- } catch (GeneralSecurityException e) {
- fatal(
- "Looks like Solr is secured and would not let us in. Try with another user in '-u' parameter");
- }
- } finally {
- if (urlConnection != null) {
- urlConnection.disconnect();
- }
- }
- return success;
- }
-
- private void basicAuth(HttpURLConnection urlc) throws Exception {
- if (urlc.getURL().getUserInfo() != null) {
- String encoding =
- Base64.getEncoder().encodeToString(urlc.getURL().getUserInfo().getBytes(US_ASCII));
- urlc.setRequestProperty("Authorization", "Basic " + encoding);
- } else if (credentials != null) {
- if (!credentials.contains(":")) {
- throw new Exception("credentials '" + credentials + "' must be of format user:pass");
- }
- urlc.setRequestProperty(
- "Authorization",
- "Basic " + Base64.getEncoder().encodeToString(credentials.getBytes(UTF_8)));
- }
- }
-
- private static boolean checkResponseCode(HttpURLConnection urlc)
- throws IOException, GeneralSecurityException {
- if (urlc.getResponseCode() >= 400) {
- warn(
- "Solr returned an error #"
- + urlc.getResponseCode()
- + " ("
- + urlc.getResponseMessage()
- + ") for url: "
- + urlc.getURL());
- Charset charset = StandardCharsets.ISO_8859_1;
- final String contentType = urlc.getContentType();
- // code cloned from ContentStreamBase, but post.jar should be standalone!
- if (contentType != null) {
- int idx = contentType.toLowerCase(Locale.ROOT).indexOf("charset=");
- if (idx > 0) {
- charset = Charset.forName(contentType.substring(idx + "charset=".length()).trim());
- }
- }
- // Print the response returned by Solr
- try (InputStream errStream = urlc.getErrorStream()) {
- if (errStream != null) {
- BufferedReader br = new BufferedReader(new InputStreamReader(errStream, charset));
- final StringBuilder response = new StringBuilder("Response: ");
- int ch;
- while ((ch = br.read()) != -1) {
- response.append((char) ch);
- }
- warn(response.toString().trim());
- }
- }
- if (urlc.getResponseCode() == 401) {
- throw new GeneralSecurityException(
- "Solr requires authentication (response 401). Please try again with '-u' option");
- }
- if (urlc.getResponseCode() == 403) {
- throw new GeneralSecurityException(
- "You are not authorized to perform this action against Solr. (response 403)");
- }
- return false;
- }
- return true;
- }
-
- /**
- * Converts a string to an input stream
- *
- * @param s the string
- * @return the input stream
- */
- public static InputStream stringToStream(String s) {
- return new ByteArrayInputStream(s.getBytes(StandardCharsets.UTF_8));
- }
-
- /**
- * Pipes everything from the source to the dest. If dest is null, then everything is read from
- * source and thrown away.
- */
- private static void pipe(InputStream source, OutputStream dest) throws IOException {
- byte[] buf = new byte[1024];
- int read = 0;
- while ((read = source.read(buf)) >= 0) {
- if (null != dest) {
- dest.write(buf, 0, read);
- }
- }
- if (null != dest) {
- dest.flush();
- }
- }
-
- public FileFilter getFileFilterFromFileTypes(String fileTypes) {
- String glob;
- if (fileTypes.equals("*")) {
- glob = ".*";
- } else {
- glob = "^.*\\.(" + fileTypes.replace(",", "|") + ")$";
- }
- return new GlobFileFilter(glob, true);
- }
-
- //
- // Utility methods for XPath handing
- //
-
- /** Gets all nodes matching an XPath */
- public static NodeList getNodesFromXP(Node n, String xpath) throws XPathExpressionException {
- XPathFactory factory = XPathFactory.newInstance();
- XPath xp = factory.newXPath();
- XPathExpression expr = xp.compile(xpath);
- return (NodeList) expr.evaluate(n, XPathConstants.NODESET);
- }
-
- /**
- * Gets the string content of the matching an XPath
- *
- * @param n the node (or doc)
- * @param xpath the xpath string
- * @param concatAll if true, text from all matching nodes will be concatenated, else only the
- * first returned
- */
- public static String getXP(Node n, String xpath, boolean concatAll)
- throws XPathExpressionException {
- NodeList nodes = getNodesFromXP(n, xpath);
- StringBuilder sb = new StringBuilder();
- if (nodes.getLength() > 0) {
- for (int i = 0; i < nodes.getLength(); i++) {
- sb.append(nodes.item(i).getNodeValue()).append(' ');
- if (!concatAll) {
- break;
- }
- }
- return sb.toString().trim();
- } else return "";
- }
-
- /** Takes a string as input and returns a DOM */
- public static Document makeDom(byte[] in)
- throws SAXException, IOException, ParserConfigurationException {
- InputStream is = new ByteArrayInputStream(in);
- Document dom = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(is);
- return dom;
- }
-
- /** Inner class to filter files based on glob wildcards */
- static class GlobFileFilter implements FileFilter {
- private String _pattern;
- private Pattern p;
-
- public GlobFileFilter(String pattern, boolean isRegex) {
- _pattern = pattern;
- if (!isRegex) {
- _pattern =
- _pattern
- .replace("^", "\\^")
- .replace("$", "\\$")
- .replace(".", "\\.")
- .replace("(", "\\(")
- .replace(")", "\\)")
- .replace("+", "\\+")
- .replace("*", ".*")
- .replace("?", ".");
- _pattern = "^" + _pattern + "$";
- }
-
- try {
- p = Pattern.compile(_pattern, Pattern.CASE_INSENSITIVE);
- } catch (PatternSyntaxException e) {
- fatal("Invalid type list " + pattern + ". " + e.getDescription());
- }
- }
-
- @Override
- public boolean accept(File file) {
- return p.matcher(file.getName()).find();
- }
- }
-
- //
- // Simple crawler class which can fetch a page and check for robots.txt
- //
- class PageFetcher {
- Map<String, List<String>> robotsCache;
- static final String DISALLOW = "Disallow:";
-
- public PageFetcher() {
- robotsCache = new HashMap<>();
- }
-
- public PageFetcherResult readPageFromUrl(URL u) throws URISyntaxException {
- PageFetcherResult res = new PageFetcherResult();
- try {
- if (isDisallowedByRobots(u)) {
- warn("The URL " + u + " is disallowed by robots.txt and will not be crawled.");
- res.httpStatus = 403;
- URI uri = u.toURI();
- visited.add(uri);
- return res;
- }
- res.httpStatus = 404;
- HttpURLConnection conn = (HttpURLConnection) u.openConnection();
- conn.setRequestProperty(
- "User-Agent",
- "SimplePostTool-crawler/" + VERSION_OF_THIS_TOOL + " (https://solr.apache.org/)");
- conn.setRequestProperty("Accept-Encoding", "gzip, deflate");
- conn.connect();
- res.httpStatus = conn.getResponseCode();
- if (!normalizeUrlEnding(conn.getURL().toString())
- .equals(normalizeUrlEnding(u.toString()))) {
- info("The URL " + u + " caused a redirect to " + conn.getURL());
- u = conn.getURL();
- res.redirectUrl = u;
- URI uri = u.toURI();
- visited.add(uri);
- }
- if (res.httpStatus == 200) {
- // Raw content type of form "text/html; encoding=utf-8"
- String rawContentType = conn.getContentType();
- String type = rawContentType.split(";")[0];
- if (typeSupported(type) || "*".equals(fileTypes)) {
- String encoding = conn.getContentEncoding();
- InputStream is;
- if (encoding != null && encoding.equalsIgnoreCase("gzip")) {
- is = new GZIPInputStream(conn.getInputStream());
- } else if (encoding != null && encoding.equalsIgnoreCase("deflate")) {
- is = new InflaterInputStream(conn.getInputStream(), new Inflater(true));
- } else {
- is = conn.getInputStream();
- }
-
- // Read into memory, so that we later can pull links from the page without re-fetching
- res.content = Utils.toByteArray(is);
- is.close();
- } else {
- warn("Skipping URL with unsupported type " + type);
- res.httpStatus = 415;
- }
- }
- } catch (IOException e) {
- warn("IOException when reading page from url " + u + ": " + e.getMessage());
- }
- return res;
- }
-
- public boolean isDisallowedByRobots(URL url) {
- String host = url.getHost();
- String strRobot = url.getProtocol() + "://" + host + "/robots.txt";
- List<String> disallows = robotsCache.get(host);
- if (disallows == null) {
- disallows = new ArrayList<>();
- URL urlRobot;
- try {
- urlRobot = new URL(strRobot);
- disallows = parseRobotsTxt(urlRobot.openStream());
- } catch (MalformedURLException e) {
- return true; // We cannot trust this robots URL, should not happen
- } catch (IOException e) {
- // There is no robots.txt, will cache an empty disallow list
- }
- }
-
- robotsCache.put(host, disallows);
-
- String strURL = url.getFile();
- for (String path : disallows) {
- if (path.equals("/") || strURL.indexOf(path) == 0) return true;
- }
- return false;
- }
-
- /**
- * Very simple robots.txt parser which obeys all Disallow lines regardless of user agent or
- * whether there are valid Allow: lines.
- *
- * @param is Input stream of the robots.txt file
- * @return a list of disallow paths
- * @throws IOException if problems reading the stream
- */
- protected List<String> parseRobotsTxt(InputStream is) throws IOException {
- List<String> disallows = new ArrayList<>();
- BufferedReader r = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
- String l;
- while ((l = r.readLine()) != null) {
- String[] arr = l.split("#");
- if (arr.length == 0) continue;
- l = arr[0].trim();
- if (l.startsWith(DISALLOW)) {
- l = l.substring(DISALLOW.length()).trim();
- if (l.length() == 0) continue;
- disallows.add(l);
- }
- }
- is.close();
- return disallows;
- }
-
- /**
- * Finds links on a web page, using /extract?extractOnly=true
- *
- * @param u the URL of the web page
- * @param is the input stream of the page
- * @param type the content-type
- * @param postUrl the URL (typically /solr/extract) in order to pull out links
- * @return a set of URIs parsed from the page
- */
- protected Set<URI> getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) {
- Set<URI> linksFromPage = new HashSet<>();
- URL url = null;
- try {
- ByteArrayOutputStream os = new ByteArrayOutputStream();
- URL extractUrl = new URL(appendParam(postUrl.toString(), "extractOnly=true"));
- extractUrl = new URL(appendParam(extractUrl.toString(), "wt=xml"));
- boolean success = postData(is, null, os, type, extractUrl);
- if (success) {
- Document d = makeDom(os.toByteArray());
- String innerXml = getXP(d, "/response/str/text()[1]", false);
- d = makeDom(innerXml.getBytes(StandardCharsets.UTF_8));
- NodeList links = getNodesFromXP(d, "/html/body//a/@href");
- for (int i = 0; i < links.getLength(); i++) {
- String link = links.item(i).getTextContent();
- link = computeFullUrl(u, link);
- if (link == null) {
- continue;
- }
- URI newUri = new URI(link);
- if (newUri.getAuthority() == null || !newUri.getAuthority().equals(u.getAuthority())) {
- linksFromPage.add(newUri);
- }
- }
- }
- } catch (MalformedURLException e) {
- warn("Malformed URL " + url);
- } catch (IOException e) {
- warn("IOException opening URL " + url + ": " + e.getMessage());
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
-
- return linksFromPage;
- }
- }
-
- /** Utility class to hold the result form a page fetch */
- public static class PageFetcherResult {
- int httpStatus = 200;
- String contentType = "text/html";
- URL redirectUrl = null;
- ByteBuffer content;
- }
-}
diff --git a/solr/core/src/test/org/apache/solr/cli/SimplePostToolTest.java b/solr/core/src/test/org/apache/solr/cli/SimplePostToolTest.java
deleted file mode 100644
index fdd58149a82..00000000000
--- a/solr/core/src/test/org/apache/solr/cli/SimplePostToolTest.java
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.cli;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Set;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.cli.SimplePostTool.PageFetcher;
-import org.apache.solr.cli.SimplePostTool.PageFetcherResult;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * NOTE: do *not* use real hostnames, not even "example.com", in this test.
- *
- * <p>A MockPageFetcher is used to prevent real HTTP requests from being executed.
- */
-public class SimplePostToolTest extends SolrTestCaseJ4 {
-
- SimplePostTool t_file, t_file_auto, t_file_rec, t_web, t_test;
- PageFetcher pf;
-
- @Before
- public void initVariousPostTools() throws Exception {
- String[] args = {"-"};
-
- // Add a dummy core/collection property so that the SimplePostTool
- // doesn't fail fast.
- System.setProperty("c", "testcollection");
-
- System.setProperty("data", "files");
- t_file = SimplePostTool.parseArgsAndInit(args);
-
- System.setProperty("auto", "yes");
- t_file_auto = SimplePostTool.parseArgsAndInit(args);
-
- System.setProperty("recursive", "yes");
- t_file_rec = SimplePostTool.parseArgsAndInit(args);
-
- System.setProperty("data", "web");
- t_web = SimplePostTool.parseArgsAndInit(args);
-
- System.setProperty("params", "param1=foo¶m2=bar");
- System.setProperty("url", "http://user:password@localhost:5150/solr/update");
- t_test = SimplePostTool.parseArgsAndInit(args);
-
- pf = new MockPageFetcher();
- for (SimplePostTool mockable : new SimplePostTool[] {t_web, t_file_auto}) {
- mockable.pageFetcher = pf;
- mockable.mockMode = true;
- }
- }
-
- @Test
- public void testParseArgsAndInit() {
- assertFalse(t_file.auto);
- assertTrue(t_file_auto.auto);
- assertEquals(0, t_file_auto.recursive);
- assertEquals(999, t_file_rec.recursive);
- assertTrue(t_file.commit);
- assertFalse(t_file.optimize);
- assertNull(t_file.out);
-
- assertEquals(1, t_web.recursive);
- assertEquals(10, t_web.delay);
-
- assertEquals(
- "http://user:password@localhost:5150/solr/update?param1=foo¶m2=bar",
- t_test.solrUrl.toExternalForm());
- }
-
- @Test
- public void testNormalizeUrlEnding() {
- assertEquals("http://[ff01::114]", SimplePostTool.normalizeUrlEnding("http://[ff01::114]/"));
- assertEquals(
- "http://[ff01::114]", SimplePostTool.normalizeUrlEnding("http://[ff01::114]/#foo?bar=baz"));
- assertEquals(
- "http://[ff01::114]/index.html",
- SimplePostTool.normalizeUrlEnding("http://[ff01::114]/index.html#hello"));
- }
-
- @Test
- public void testComputeFullUrl() throws MalformedURLException {
- assertEquals(
- "http://[ff01::114]/index.html",
- t_web.computeFullUrl(new URL("http://[ff01::114]/"), "/index.html"));
- assertEquals(
- "http://[ff01::114]/index.html",
- t_web.computeFullUrl(new URL("http://[ff01::114]/foo/bar/"), "/index.html"));
- assertEquals(
- "http://[ff01::114]/fil.html",
- t_web.computeFullUrl(new URL("http://[ff01::114]/foo.htm?baz#hello"), "fil.html"));
- // TODO: How to know what is the base if URL path ends with "foo"??
- // assertEquals("http://[ff01::114]/fil.html", t_web.computeFullUrl(new
- // URL("http://[ff01::114]/foo?baz#hello"), "fil.html"));
- assertNull(t_web.computeFullUrl(new URL("http://[ff01::114]/"), "fil.jpg"));
- assertNull(t_web.computeFullUrl(new URL("http://[ff01::114]/"), "mailto:hello@foo.bar"));
- assertNull(t_web.computeFullUrl(new URL("http://[ff01::114]/"), "ftp://server/file"));
- }
-
- @Test
- public void testTypeSupported() {
- assertTrue(t_web.typeSupported("application/pdf"));
- assertTrue(t_web.typeSupported("application/xml"));
- assertFalse(t_web.typeSupported("text/foo"));
-
- t_web.fileTypes = "doc,xls,ppt";
- t_web.fileFilter = t_web.getFileFilterFromFileTypes(t_web.fileTypes);
- assertFalse(t_web.typeSupported("application/pdf"));
- assertTrue(t_web.typeSupported("application/msword"));
- }
-
- @Test
- public void testIsOn() {
- assertTrue(SimplePostTool.isOn("true"));
- assertTrue(SimplePostTool.isOn("1"));
- assertFalse(SimplePostTool.isOn("off"));
- }
-
- @Test
- public void testAppendParam() {
- assertEquals(
- "http://[ff01::114]?foo=bar", SimplePostTool.appendParam("http://[ff01::114]", "foo=bar"));
- assertEquals(
- "http://[ff01::114]/?a=b&foo=bar",
- SimplePostTool.appendParam("http://[ff01::114]/?a=b", "foo=bar"));
- }
-
- @Test
- public void testAppendUrlPath() throws MalformedURLException {
- assertEquals(
- new URL("http://[ff01::114]/a?foo=bar"),
- SimplePostTool.appendUrlPath(new URL("http://[ff01::114]?foo=bar"), "/a"));
- }
-
- @Test
- public void testGuessType() {
- File f = new File("foo.doc");
- assertEquals("application/msword", SimplePostTool.guessType(f));
- f = new File("foobar");
- assertEquals("application/octet-stream", SimplePostTool.guessType(f));
- f = new File("foo.json");
- assertEquals("application/json", SimplePostTool.guessType(f));
- }
-
- @Test
- public void testDoFilesMode() {
- t_file_auto.recursive = 0;
- File dir = getFile("exampledocs");
- int num = t_file_auto.postFiles(new File[] {dir}, 0, null, null);
- assertEquals(2, num);
- }
-
- @Test
- public void testDoWebMode() {
- // Uses mock pageFetcher
- t_web.delay = 0;
- t_web.recursive = 5;
- int num = t_web.postWebPages(new String[] {"http://[ff01::114]/#removeme"}, 0, null);
- assertEquals(5, num);
-
- t_web.recursive = 1;
- num = t_web.postWebPages(new String[] {"http://[ff01::114]/"}, 0, null);
- assertEquals(3, num);
-
- // Without respecting robots.txt
- t_web.pageFetcher.robotsCache.put("[ff01::114]", Collections.emptyList());
- t_web.recursive = 5;
- num = t_web.postWebPages(new String[] {"http://[ff01::114]/#removeme"}, 0, null);
- assertEquals(6, num);
- }
-
- @Test
- public void testRobotsExclusion() throws MalformedURLException {
- assertFalse(t_web.pageFetcher.isDisallowedByRobots(new URL("http://[ff01::114]/")));
- assertTrue(t_web.pageFetcher.isDisallowedByRobots(new URL("http://[ff01::114]/disallowed")));
- assertEquals(
- "There should be two entries parsed from robots.txt",
- 2,
- t_web.pageFetcher.robotsCache.get("[ff01::114]").size());
- }
-
- static class MockPageFetcher extends PageFetcher {
- HashMap<String, String> htmlMap = new HashMap<>();
- HashMap<String, Set<URI>> linkMap = new HashMap<>();
-
- public MockPageFetcher() throws IOException, URISyntaxException {
- (new SimplePostTool()).super();
- htmlMap.put(
- "http://[ff01::114]",
- "<html><body><a href=\"http://[ff01::114]/page1\">page1</a><a href=\"http://[ff01::114]/page2\">page2</a></body></html>");
- htmlMap.put(
- "http://[ff01::114]/index.html",
- "<html><body><a href=\"http://[ff01::114]/page1\">page1</a><a href=\"http://[ff01::114]/page2\">page2</a></body></html>");
- htmlMap.put(
- "http://[ff01::114]/page1",
- "<html><body><a href=\"http://[ff01::114]/page1/foo\"></body></html>");
- htmlMap.put(
- "http://[ff01::114]/page1/foo",
- "<html><body><a href=\"http://[ff01::114]/page1/foo/bar\"></body></html>");
- htmlMap.put(
- "http://[ff01::114]/page1/foo/bar",
- "<html><body><a href=\"http://[ff01::114]/page1\"></body></html>");
- htmlMap.put(
- "http://[ff01::114]/page2",
- "<html><body><a href=\"http://[ff01::114]/\"><a href=\"http://[ff01::114]/disallowed\"/></body></html>");
- htmlMap.put(
- "http://[ff01::114]/disallowed",
- "<html><body><a href=\"http://[ff01::114]/\"></body></html>");
-
- Set<URI> s = new HashSet<>();
- s.add(new URI("http://[ff01::114]/page1"));
- s.add(new URI("http://[ff01::114]/page2"));
- linkMap.put("http://[ff01::114]", s);
- linkMap.put("http://[ff01::114]/index.html", s);
- s = new HashSet<>();
- s.add(new URI("http://[ff01::114]/page1/foo"));
- linkMap.put("http://[ff01::114]/page1", s);
- s = new HashSet<>();
- s.add(new URI("http://[ff01::114]/page1/foo/bar"));
- linkMap.put("http://[ff01::114]/page1/foo", s);
- s = new HashSet<>();
- s.add(new URI("http://[ff01::114]/disallowed"));
- linkMap.put("http://[ff01::114]/page2", s);
-
- // Simulate a robots.txt file with comments and a few disallows
- StringBuilder sb = new StringBuilder();
- sb.append(
- "# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n");
- sb.append("User-agent: * # match all bots\n");
- sb.append("Disallow: # This is void\n");
- sb.append("Disallow: /disallow # Disallow this path\n");
- sb.append("Disallow: /nonexistentpath # Disallow this path\n");
- this.robotsCache.put(
- "[ff01::114]",
- super.parseRobotsTxt(
- new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8))));
- }
-
- @Override
- public PageFetcherResult readPageFromUrl(URL u) {
- PageFetcherResult res = new PageFetcherResult();
- if (isDisallowedByRobots(u)) {
- res.httpStatus = 403;
- return res;
- }
- res.httpStatus = 200;
- res.contentType = "text/html";
- res.content = ByteBuffer.wrap(htmlMap.get(u.toString()).getBytes(StandardCharsets.UTF_8));
- return res;
- }
-
- @Override
- public Set<URI> getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) {
- Set<URI> s = linkMap.get(SimplePostTool.normalizeUrlEnding(u.toString()));
- if (s == null) s = new HashSet<>();
- return s;
- }
- }
-}
diff --git a/solr/packaging/build.gradle b/solr/packaging/build.gradle
index b028155985c..32e849c1330 100644
--- a/solr/packaging/build.gradle
+++ b/solr/packaging/build.gradle
@@ -121,8 +121,6 @@ distributions {
// Manually correct posix permissions (matters when packaging on Windows).
filesMatching([
"**/*.sh",
- "**/bin/post",
- "**/bin/postlogs",
"**/bin/solr",
"**/bin/init.d/solr",
]) { copy ->
diff --git a/solr/packaging/test/test_postlogs.bats b/solr/packaging/test/test_postlogs.bats
index 05b5db19b75..77d80f92c31 100644
--- a/solr/packaging/test/test_postlogs.bats
+++ b/solr/packaging/test/test_postlogs.bats
@@ -38,18 +38,6 @@ teardown() {
delete_all_collections
}
-@test "post solr log into solr via script" {
- run solr create -c COLL_NAME
- assert_output --partial "Created collection 'COLL_NAME'"
-
- run postlogs http://localhost:${SOLR_PORT}/solr/COLL_NAME ${SOLR_LOGS_DIR}/solr.log
- assert_output --partial 'Sending last batch'
- assert_output --partial 'Committed'
-
- run curl "http://localhost:${SOLR_PORT}/solr/COLL_NAME/select?q=*:*"
- refute_output --partial '"numFound":0'
-}
-
@test "post solr log into solr via cli" {
run solr create -c COLL_NAME
assert_output --partial "Created collection 'COLL_NAME'"
diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc
index 0f96e947b2a..57ad8d83575 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/post-tool.adoc
@@ -20,9 +20,6 @@ Solr includes a simple command line tool for POSTing various types of content to
NOTE: This tool is meant for use by new users exploring Solr's capabilities, and is not intended as a robust solution to be used for indexing documents into production systems.
-NOTE: You may be familiar with SimplePostTool and the `bin/post` Unix shell script. While this is still available, it is deprecated and will be removed in Solr 10.
-
-
To run it, open a window and enter:
[,console]