You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/07/07 14:53:08 UTC
[09/18] jena git commit: Finish up first pass of work on tdbloader2
script refactoring (JENA-977)
Finish up first pass of work on tdbloader2 script refactoring (JENA-977)
- Add options for setting the JVM and sort arguments that do not rely on
environment variables. NB - For backwards compatibility the existing
environment variables are still honoured if the new command line
options are not used
- Improve some error messages
- Explicitly support -- for separating data files from options for cases
where file names may be confused
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/d4a0bc50
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/d4a0bc50
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/d4a0bc50
Branch: refs/heads/master
Commit: d4a0bc50a6d82ab5bbb43ab90e65216e5b165621
Parents: cc4a80a
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 15:04:50 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 15:04:50 2015 +0100
----------------------------------------------------------------------
apache-jena/bin/tdbloader2 | 76 ++++++++++++++++++++++++++++-------
apache-jena/bin/tdbloader2common | 4 ++
apache-jena/bin/tdbloader2data | 40 +++++++++++++++---
apache-jena/bin/tdbloader2index | 72 ++++++++++++++++++++++++++++-----
4 files changed, 162 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index d0d906c..e598aeb 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -53,6 +53,20 @@ Advanced additional options are as follows:
--debug
Enable debug mode, adds extra debug output
+ -j <JvmArgs>
+ --jvm-args <JvmArgs>
+ Sets the arguments that should be passed to the JVM for the
+ JVM based portions of the build.
+
+ Generally it is best to not change these unless you have been
+ specifically advised to. The scripts will use appropriate
+ defaults if this is not specified.
+
+ In particular be careful increasing the heap size since many
+ parts of TDB actually use memory mapped files that live
+ outside the heap so if the heap is too large the heap may
+ conflict with the memory mapped files for memory space.
+
-k
--keep-work
Keeps the temporary work files around after they are no longer
@@ -69,6 +83,14 @@ Advanced additional options are as follows:
When no phase is specified it defaults to all
+ -s <SortArgs>
+ --sort-args <SortArgs>
+ Sets the arguments that should be passed to sort for the sort
+ based portions of the build.
+
+ Generally it is best not to change these as the scripts will
+ use appropriate defaults for your system.
+
-t
--trace
Enable trace mode, essentially sets -x within the scripts
@@ -111,15 +133,7 @@ case "$(uname)" in
esac
export JENA_CP
-# echo JENA_CP
-if [ -z "$SORT_ARGS" ]; then
- SORT_ARGS="--buffer-size=50%"
- sort --parallel=3 < /dev/null 2>/dev/null
- if [ $? = 0 ]; then
- SORT_ARGS="$SORT_ARGS --parallel=3"
- fi
-fi
-export SORT_ARGS
+
# Process arguments
LOC=
@@ -127,6 +141,8 @@ PHASE=
KEEP_WORK=0
DEBUG=0
TRACE=0
+JVM_ARGS=
+SORT_ARGS=
while [ $# -gt 0 ]
do
@@ -142,6 +158,12 @@ do
printUsage
exit 0
;;
+ -j|--jvm-args)
+ # JVM Arguments
+ shift
+ JVM_ARGS="$1"
+ shift
+ ;;
-k|--keep-work)
# Keep work files
shift
@@ -164,14 +186,30 @@ do
PHASE="$1"
shift
;;
+ -s|--sort-args)
+ # Sort arguments
+ shift
+ SORT_ARGS=$1
+ shift
+ ;;
-t|--trace)
# Trace mode
shift
TRACE=1
set -x
;;
+ --)
+ # Arguments separator
+ # All further arguments are treated as data files
+ shift
+ break
+ ;;
+ -*)
+ # Looks like an option but not known
+ abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --"
+ ;;
*)
- # Once we see an unrecognized argument treat as start of files to process
+ # Once we see an unrecognized argument that doesn't look like an option treat as start of files to process
break
;;
esac
@@ -180,7 +218,11 @@ done
if [ -z "$PHASE" ]; then
PHASE="all"
fi
+
+# Prepare arguments to pass to children
COMMON_ARGS=
+DATA_ARGS=
+INDEX_ARGS=
if [ $KEEP_WORK = 1 ]; then
COMMON_ARGS="--keep-work"
fi
@@ -190,6 +232,12 @@ fi
if [ $TRACE = 1 ]; then
COMMON_ARGS="$COMMON_ARGS --trace"
fi
+if [ -n "$JVM_ARGS" ]; then
+ COMMON_ARGS="$COMMON_ARGS --jvm-args $JVM_ARGS"
+fi
+if [ -n "$SORT_ARGS" ]; then
+ INDEX_ARGS="--sort-args $SORT_ARGS"
+fi
# ---- Start
info "-- TDB Bulk Loader Start"
@@ -200,14 +248,14 @@ case "$PHASE" in
all)
# All Phases
# Data Phase
- "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+ "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during data phase"
fi
# Index Phase
- "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+ "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during data phase"
@@ -216,7 +264,7 @@ case "$PHASE" in
data)
# Data Phase
- "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+ "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during data phase"
@@ -225,7 +273,7 @@ case "$PHASE" in
index)
# Index Phase
- "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+ "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
RET=$?
if [ $RET -ne 0 ]; then
abort $RET "Failed during index phase"
http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2common
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common
index 2830545..2c116ad 100644
--- a/apache-jena/bin/tdbloader2common
+++ b/apache-jena/bin/tdbloader2common
@@ -80,7 +80,10 @@ function getDriveInfo() {
}
function getFreeMem() {
+ # May be called from a script where exit on error is set
+ # in which case disable for the life of this function
set +e
+
local FREE_MEM=-1
case "$OSTYPE" in
darwin*)
@@ -98,6 +101,7 @@ function getFreeMem() {
fi
;;
esac
+
set -e
echo "$FREE_MEM"
http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index 2c48a50..f942e20 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -63,6 +63,20 @@ Advanced additional options are as follows:
--debug
Enable debug mode, adds extra debug output
+ -j <JvmArgs>
+ --jvm-args <JvmArgs>
+ Sets the arguments that should be passed to the JVM for the
+ JVM based portions of the build.
+
+ Generally it is best to not change these unless you have been
+ specifically advised to. The scripts will use appropriate
+ defaults if this is not specified.
+
+ In particular be careful increasing the heap size since many
+ parts of TDB actually use memory mapped files that live
+ outside the heap so if the heap is too large the heap may
+ conflict with the memory mapped files for memory space.
+
-k
--keep-work
Keeps the temporary work files around after they are no longer
@@ -96,6 +110,12 @@ do
printUsage
exit 0
;;
+ -j|--jvm-args)
+ # JVM Arguments
+ shift
+ JVM_ARGS="$1"
+ shift
+ ;;
-k|--keep-work)
# Keep work files
# This option is actually not used by this script but may be passed in
@@ -119,6 +139,16 @@ do
shift
set -x
;;
+ --)
+ # Arguments separator
+ # All further arguments are treated as data files
+ shift
+ break
+ ;;
+ -*)
+ # Unrecognized
+ abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --"
+ ;;
*)
# Any further arguments are treated as data files
break
@@ -128,10 +158,10 @@ done
# Verify arguments
if [ -z "$LOC" ]; then
- abort 1 "No location specified"
+ abort 1 "Required database location not specified"
fi
if [ $# = 0 ]; then
- abort 1 "No data files specified"
+ abort 1 "No data files specified, one/more data files must be specified"
fi
# Make LOC absolute
@@ -145,7 +175,7 @@ fi
# Skip a possible configuration file
if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
then
- abort 1 "Location is not empty: $LOC"
+ abort 1 "Database location is not empty: $LOC"
fi
if [ ! -e "$LOC" ] ; then
@@ -158,12 +188,12 @@ if [ ! -e "$LOC" ] ; then
debug "New database directory created: $LOC"
fi
if [ ! -d "$LOC" ]; then
- abort 1 "Location is not a directory: $LOC"
+ abort 1 "Database location is not a directory: $LOC"
fi
FILES="$@"
-## JVM Arguments
+## Prepare JVM Arguments
JVM_ARGS=${JVM_ARGS:--Xmx1200M}
debug "JVM Arguments are $JVM_ARGS"
http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 15a5832..b997b39 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -60,11 +60,33 @@ Advanced additional options are as follows:
--debug
Enable debug mode, adds extra debug output
+ -j <JvmArgs>
+ --jvm-args <JvmArgs>
+ Sets the arguments that should be passed to the JVM for the
+ JVM based portions of the build.
+
+ Generally it is best to not change these unless you have been
+ specifically advised to. The scripts will use appropriate
+ defaults if this is not specified.
+
+ In particular be careful increasing the heap size since many
+ parts of TDB actually use memory mapped files that live
+ outside the heap so if the heap is too large the heap may
+ conflict with the memory mapped files for memory space.
+
-k
--keep-work
Keeps the temporary work files around after they are no longer
needed. May be useful for debugging.
+ -s <SortArgs>
+ --sort-args <SortArgs>
+ Sets the arguments that should be passed to sort for the sort
+ based portions of the build.
+
+ Generally it is best not to change these as the scripts will
+ use appropriate defaults for your system.
+
-t
--trace
Enable trace mode, essentially sets -x within the scripts
@@ -81,6 +103,8 @@ export LC_ALL="C"
LOC=
KEEP_WORK=0
DEBUG=0
+JVM_ARGS=
+SORT_ARGS=
while [ $# -gt 0 ]
do
@@ -95,6 +119,12 @@ do
printUsage
exit 0
;;
+ -j|--jvm-args)
+ # JVM Arguments
+ shift
+ JVM_ARGS="$1"
+ shift
+ ;;
-k|--keep-work)
# Keep work files
shift
@@ -111,21 +141,27 @@ do
LOC=${ARG/-*loc=/}
shift
;;
+ -s|--sort-args)
+ # Sort arguments
+ shift
+ SORT_ARGS=$1
+ shift
+ ;;
-t|--trace)
# Trace mode
shift
set -x
;;
*)
- # Any further arguments are ignored
- break
+ # Additional options are not supported
+ abort 1 "Unrecognized option $ARG"
;;
esac
done
# Verify arguments
if [ -z "$LOC" ]; then
- abort 1 "No location specified"
+ abort 1 "Required database location not specified"
fi
# Make LOC absolute
@@ -137,26 +173,40 @@ fi
# Check location
if [ ! -e "$LOC" ]; then
- abort 1 "Location specified does not exist: $LOC"
+ abort 1 "Database location specified does not exist: $LOC"
fi
if [ ! -d "$LOC" ]; then
- abort 1 "Location is not a directory: $LOC"
+ abort 1 "Database location is not a directory: $LOC"
fi
+# Locate and check data text files
DATA_TRIPLES="$LOC/data-triples.tmp"
DATA_QUADS="$LOC/data-quads.tmp"
if [ ! -e "$DATA_TRIPLES" ]; then
- abort 1 "No triples text file found in location, please run the tdbloader2data script first"
+ abort 1 "No triples text file found in database location, please run the tdbloader2data script first"
fi
if [ ! -e "$DATA_QUADS" ]; then
- abort 1 "No quads text file found in location, please run the tdbloader2data script first"
+ abort 1 "No quads text file found in database location, please run the tdbloader2data script first"
fi
debug "Data text files are $DATA_TRIPLES and $DATA_QUADS"
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+# Prepare sort arguments
+if [ -z "$SORT_ARGS" ]; then
+ SORT_ARGS="--buffer-size=50%"
+
+ ##--parallel is not always available.
+ # Temporarily disable exit on error while we check for --parallel support
+ set +e
+ sort --parallel=3 < /dev/null 2>/dev/null
+ if [ $? = 0 ]; then
+ SORT_ARGS="$SORT_ARGS --parallel=3"
+ fi
+ set -e
+fi
+
+# Prepare JVM arguments
JVM_ARGS=${JVM_ARGS:--Xmx1200M}
debug "JVM Arguments are $JVM_ARGS"
@@ -201,7 +251,7 @@ fi
SORT_TEMP_DIR=$(makeAbsolute "$SORT_TEMP_DIR")
debug "Sort Temp Directory: $SORT_TEMP_DIR"
SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}"))
-debug "Sort Temp Directory ${DIR} is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)"
+debug "Sort Temp Directory is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)"
if [ "${SORT_DRIVE_INFO[2]}" -le 10 ]; then
warn "-----"
@@ -288,7 +338,7 @@ generate_index()
info "Build $IDX"
rm -f "$LOC/$IDX.dat"
rm -f "$LOC/$IDX.idn"
- java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
+ java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
info "Build $IDX Completed"
# Remove work file unless keeping