You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by eh...@apache.org on 2015/01/18 11:12:45 UTC

svn commit: r1652722 - /lucene/dev/branches/lucene_solr_5_0/solr/bin/post

Author: ehatcher
Date: Sun Jan 18 10:12:45 2015
New Revision: 1652722

URL: http://svn.apache.org/r1652722
Log:
SOLR-6900: add support for stdin and string args

Modified:
    lucene/dev/branches/lucene_solr_5_0/solr/bin/post

Modified: lucene/dev/branches/lucene_solr_5_0/solr/bin/post
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_0/solr/bin/post?rev=1652722&r1=1652721&r2=1652722&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_5_0/solr/bin/post (original)
+++ lucene/dev/branches/lucene_solr_5_0/solr/bin/post Sun Jan 18 10:12:45 2015
@@ -14,13 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO wishlist:
-# - handle stdin as well, such that `cat foo.csv | bin/post my_collection` works
-# - bin/post collection "file with spaces.csv" does not work, breaks arguments at whitespace apparently.
-# - support arbitrary posting like - java -Ddata=args org.apache.solr.util.SimplePostTool "<delete><id>SP2514N</id></delete>"
-# - convert OPTIONS (key=val pass-through to SPT) to standard 'nix switches
-
-# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing)
+# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)
 
 THIS_SCRIPT="$0"
 
@@ -62,7 +56,7 @@ TOOL_JAR=$SOLR_TIP/dist/solr-core-*.jar
 
 function print_usage() {
   echo ""
-  echo "Usage: post -c <collection/core> <files|directories|urls> [OPTIONS]"
+  echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
   echo "    or post -help"
   echo ""
   echo "   collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
@@ -74,27 +68,33 @@ function print_usage() {
   echo "    -host <host> (default: localhost)"
   echo "    -port <port> (default: 8983)"
   echo "    -commit yes|no (default: yes)"
+  # optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
   echo ""
   echo "  Web crawl options:"
   echo "    -recursive <depth> (default: 1)"
-  echo "    -delay <seconds> (default=10)"
+  echo "    -delay <seconds> (default: 10)"
   echo ""
   echo "  Directory crawl options:"
-  echo "    -delay <seconds> (default=0)"
+  echo "    -delay <seconds> (default: 0)"
+  echo ""
+  echo "  stdin/args options:"
+  echo "    -type <content/type> (default: application/xml)"
   echo ""
   echo "  Other options:"
   echo "    -filetypes <type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
   echo "    -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
-  echo "    -out yes|no (default=no; yes outputs Solr response to console)"
+  echo "    -out yes|no (default: no; yes outputs Solr response to console)"
   echo ""
   echo ""
   echo "Examples:"
   echo ""
-  echo "JSON file: $THIS_SCRIPT -c wizbang events.json"
-  echo "XML files: $THIS_SCRIPT -c records article*.xml"
-  echo "CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
-  echo "Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
-  echo "Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com -recursive 2 -delay 1"
+  echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
+  echo "* XML files: $THIS_SCRIPT -c records article*.xml"
+  echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
+  echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
+  echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com -recursive 2 -delay 1"
+  echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
+  echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
   echo ""
 } # end print_usage
 
@@ -109,6 +109,7 @@ PROPS="-Dauto=yes"
 RECURSIVE=""
 FILES=()
 URLS=()
+ARGS=()
 
 while [ $# -gt 0 ]; do
   # TODO: natively handle the optional parameters to SPT
@@ -117,18 +118,15 @@ while [ $# -gt 0 ]; do
   if [[ -d "$1" ]]; then
     # Directory
 #    echo "$1: DIRECTORY"
-    MODE="files"
     RECURSIVE="-Drecursive=yes"
     FILES+=("$1")
   elif [[ -f "$1" ]]; then
     # File
 #    echo "$1: FILE"
-    MODE="files"
     FILES+=("$1")
   elif [[ "$1" == http* ]]; then
     # URL
 #    echo "$1: URL"
-    MODE="web"
     URLS+=("$1")
   else
     if [[ $1 == -* ]]; then
@@ -136,6 +134,19 @@ while [ $# -gt 0 ]; do
         # Special case, pull out collection name
         shift
         COLLECTION=$1
+      elif [[ ($1 == "-d" || $1 == "--data" || $1 == "-") ]]; then
+        if [[ -s /dev/stdin ]]; then
+          MODE="stdin"
+        else
+          # when no stdin exists and -d specified, the rest of the arguments
+          # are assumed to be strings to post as-is
+          MODE="args"
+          shift
+          if [[ $# -gt 0 ]]; then
+            ARGS=("$@")
+            shift $#
+          fi
+        fi
       else
         key=${1:1}
         shift
@@ -151,33 +162,54 @@ while [ $# -gt 0 ]; do
 done
 
 # Check for errors
-if [[ ${#FILES[@]} != 0 && ${#URLS[@]} != 0 ]]; then
-  echo -e "\nCombining files (or directories) and URLs is not supported.  Post them separately.\n"
+if [[ $COLLECTION == "" ]]; then
+  echo -e "\nCollection must be specified.  Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
   exit 1
 fi
 
-if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 ]]; then
-  echo -e "\nNo files, directories, or URLs were specified.  See '$THIS_SCRIPT -h' for usage instructions.\n"
+# Unsupported: bin/post -c foo
+if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
+  echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.  See '$THIS_SCRIPT -h' for usage instructions.\n"
   exit 1
 fi
 
-if [[ $COLLECTION == "" ]]; then
-  echo -e "\nCollection must be specified.  Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
+# SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
+# The following are unsupported constructs:
+#    bin/post -c foo existing_file.csv http://example.com
+#    echo '<xml.../>' | bin/post -c foo existing_file.csv
+#    bin/post -c foo existing_file.csv -d 'anything'
+if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
+      || ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
+  echo -e "\nCombining files/directories, URLs, stdin, or args is not supported.  Post them separately.\n"
   exit 1
 fi
 
 PARAMS=""
-if [[ $FILES != "" ]]; then
-  MODE="files"
-  PARAMS=("${FILES[@]}")
-fi
 
-if [[ $URLS != "" ]]; then
-  MODE="web"
-  PARAMS=("${URLS[@]}")
+# TODO: let's simplify this
+if [[ $MODE != "stdin" && $MODE != "args" ]]; then
+  if [[ $FILES != "" ]]; then
+    MODE="files"
+    PARAMS=("${FILES[@]}")
+  fi
+
+  if [[ $URLS != "" ]]; then
+    MODE="web"
+    PARAMS=("${URLS[@]}")
+  fi
+else
+  if [[ ${#ARGS[@]} == 0 ]]; then
+    # SPT needs a valid (to post to Solr) args string, useful for 'bin/post -c foo -d' to force a commit
+    ARGS+=("<add/>")
+  fi
+  PARAMS=("${ARGS[@]}")
 fi
 
 PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE"
 
-#echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
-"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
\ No newline at end of file
+echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
+"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
+
+# post smoker:
+# bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
+# bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'