You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/07/07 14:53:00 UTC

[01/18] jena git commit: Initial work on refactoring tdbloader2 scripts (JENA-977)

Repository: jena
Updated Branches:
  refs/heads/jena2 b02309f6d -> 7e2c9527f
  refs/heads/master 459f06935 -> 2dc063f3b


Initial work on refactoring tdbloader2 scripts (JENA-977)

- Better option processing
- Split tdbloader2worker into a data and index phase script
- Support only running a specific phase


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/d92e3362
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/d92e3362
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/d92e3362

Branch: refs/heads/master
Commit: d92e336263da3f0f2a58dfc24cb9b5f23449cc5c
Parents: 13855a6
Author: Rob Vesse <rv...@apache.org>
Authored: Thu Jun 25 16:56:29 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Fri Jun 26 16:30:15 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2      |  72 +++++++++++++++-
 apache-jena/bin/tdbloader2data  | 107 ++++++++++++++++++++++++
 apache-jena/bin/tdbloader2index | 155 +++++++++++++++++++++++++++++++++++
 3 files changed, 333 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/d92e3362/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index c081074..37cc874 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -48,6 +48,7 @@ case "$(uname)" in
 esac
 
 export JENA_CP
+echo $JENA_CP
 if [ -z "$SORT_ARGS" ]
 then
     SORT_ARGS="--buffer-size=50%"
@@ -58,4 +59,73 @@ then
 fi
 export SORT_ARGS
 
-exec "$JENA_HOME/bin/tdbloader2worker" "$@"
+# Process arguments
+LOC=
+PHASE=
+
+while [ $# -gt 0 ]
+do
+  ARG=$1
+  case "$ARG" in
+    --loc|-loc)
+      # Location space separated
+      shift
+      LOC="$1"
+      shift
+      ;;
+    -*loc=*)
+      # Location = separated
+      LOC=${ARG/-*loc=/}
+      shift
+      ;;
+    --phase)
+      # Phase space separated
+      shift
+      PHASE="$1"
+      shift
+      ;;
+    *)
+      # Once we see an unrecognized argument treat as start of files to process
+      break
+      ;;
+  esac
+done
+
+if [ -z "$PHASE" ]; then
+  PHASE="all"
+fi
+
+echo "Location is '$LOC'"
+echo "Phase is '$PHASE'"
+
+log() { echo " $(date $DATE)" "$@" ; }
+
+#DATE="+%Y-%m-%dT%H:%M:%S%:z"
+DATE="+%H:%M:%S"
+
+# ---- Start
+log "-- TDB Bulk Loader Start"
+TIME1="$(date +%s)"
+
+case "$PHASE" in
+  all)
+    exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@"
+    exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC"
+    ;;
+  data)
+    exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@"
+    ;;
+  index)
+    exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC"
+    ;;
+  *)
+    echo "Unrecognized phase $PHASE" 1>&2
+    exit 1
+    ;;
+esac
+
+# ---- End
+TIME2="$(date +%s)"
+log "-- TDB Bulk Loader Finish"
+ELAPSED=$(($TIME2-$TIME1))
+log "-- $ELAPSED seconds"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/d92e3362/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
new file mode 100755
index 0000000..90200e4
--- /dev/null
+++ b/apache-jena/bin/tdbloader2data
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+
+# The environment for this sub-script is setup by "tdbloader2"
+
+# Exit on error.
+set -e
+
+# Sort order is ASCII
+export LC_ALL="C"
+
+log() { echo " $(date $DATE)" "$@" ; }
+
+#DATE="+%Y-%m-%dT%H:%M:%S%:z"
+DATE="+%H:%M:%S"
+
+## JVM Arguments
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
+if [ -z "$JENA_CP" ]
+then
+    echo "Classpath not provided : set JENA_CP" 1>&2
+    exit 1
+fi
+
+USAGE="Usage: tdbloader2data --loc location datafile ..."
+PKG=org.apache.jena.tdb.store.bulkloader2
+
+while [ $# -gt 0 ]
+do
+  ARG=$1
+  case "$ARG" in
+    --loc|-loc)
+      # Location space separated
+      shift
+      LOC="$1"
+      shift
+      ;;
+    -*loc=*)
+      # Location = separated
+      LOC=${ARG/-*loc=/}
+      shift
+      ;;
+    --help)
+      echo $USAGE
+      exit 0
+      ;;
+    *)
+      # Any further arguments are treated as data files
+      break
+      ;;
+  esac
+done
+
+# Verify arguments
+if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi
+if [ $# = 0 ]; then echo "No data files specified" ; exit 1 ; fi
+
+# Look for any index and data files in the directory.
+# Skip a possible configuration file
+if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
+then 
+    echo "Location is not empty: $LOC"
+    exit 1
+fi
+
+if [ ! -e "$LOC" ] ; then
+  # If non-existent try to create
+  mkdir "$LOC"
+  if [ $? != 0 ]; then
+    echo "Failed to create new directory: $LOC"
+    exit 1
+  fi
+fi
+if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
+
+FILES="$@"
+## Stdin?
+KEEPWORKFILES="${KEEPWORKFILES:-}"
+
+# ---- Data loading phase
+log "Data Load Phase"
+# Produce nodes file and triples/quads text file.
+
+DATA_TRIPLES="$LOC/data-triples.tmp"
+DATA_QUADS="$LOC/data-quads.tmp"
+
+java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
+    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
+
+log "Data Load Phase Completed"

http://git-wip-us.apache.org/repos/asf/jena/blob/d92e3362/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
new file mode 100755
index 0000000..372aa5c
--- /dev/null
+++ b/apache-jena/bin/tdbloader2index
@@ -0,0 +1,155 @@
+#!/usr/bin/env bash
+
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+
+# The environment for this sub-script is setup by "tdbloader2"
+
+# Exit on error.
+set -e
+
+# Sort order is ASCII
+export LC_ALL="C"
+
+log() { echo " $(date $DATE)" "$@" ; }
+
+TMP=$$
+#DATE="+%Y-%m-%dT%H:%M:%S%:z"
+DATE="+%H:%M:%S"
+
+##--parallel is not always available.
+SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
+if [ -z "$JENA_CP" ]
+then
+    echo "Classpath not provided : set JENA_CP" 1>&2
+    exit 1
+fi
+
+USAGE="Usage: tdbloader2index --loc location"
+PKG=org.apache.jena.tdb.store.bulkloader2
+
+while [ $# -gt 0 ]
+do
+  ARG=$1
+  case "$ARG" in
+    --loc|-loc)
+      # Location space separated
+      shift
+      LOC="$1"
+      shift
+      ;;
+    -*loc=*)
+      # Location = separated
+      LOC=${ARG/-*loc=/}
+      shift
+      ;;
+    --help)
+      echo $USAGE
+      exit 0
+      ;;
+    *)
+      # Any further arguments are ignored
+      break
+      ;;
+  esac
+done
+
+# Verify arguments
+if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi
+if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi
+if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
+
+KEEPWORKFILES="${KEEPWORKFILES:-}"
+
+DATA_TRIPLES="$LOC/data-triples.tmp"
+DATA_QUADS="$LOC/data-quads.tmp"
+
+# ---- Index intermediates
+## All files are writtern S P O / G S P O columns per row but in different sort orders.
+log "Index Building Phase"
+
+which pv >/dev/null 2>&1
+HAS_PV=$?
+
+process_rows()
+{
+    local KEYS="$1"
+    local DATA="$2"
+    local IDX=$3
+    local WORK="$LOC/$IDX-txt"
+
+    if [ ! -s "$DATA" ]
+    then
+	    return
+	  fi
+
+    log "Creating Index $IDX"
+    log "  Sort $IDX"
+    if [ $HAS_PV = 0 ]; then
+      # Use pv (pipe viewer) to monitor sort progress
+      # Note that progress data will only be seen if running in the foreground
+      SIZE=$(du -k "$DATA" | cut -f 1)
+      pv -c -N data < "$DATA" | sort $SORT_ARGS -u $KEYS | pv -c -N sort -s $SIZE > $WORK
+    else
+      # Use sort without any progress monitoring
+      sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
+    fi
+    log "  Sort $IDX Completed"
+    log "  Build $IDX"
+    rm -f "$LOC/$IDX.dat"
+    rm -f "$LOC/$IDX.idn"
+    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
+    log "  Build $IDX Completed"
+    # Remove intermediary file.
+    if [ "$KEEPWORKFILES" != "yes" ] 
+    then
+	    rm "$WORK"
+    fi
+}
+
+K1="-k 1,1"
+K2="-k 2,2"
+K3="-k 3,3"
+K4="-k 4,4"
+
+process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
+
+process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
+
+process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
+
+process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
+
+process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
+
+process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
+
+process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
+
+process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
+
+process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
+
+log "Index Building Phase Completed"
+
+# ---- Clean up.
+if [ "$KEEPWORKFILES" != "yes" ] 
+then
+    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
+fi


[16/18] jena git commit: Regenerate commands with JENA_HOME fix (JENA-977)

Posted by rv...@apache.org.
Regenerate commands with JENA_HOME fix (JENA-977)

This commit regenerates the commands with the fix for JENA_HOME
resolution in the template


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/ea55883d
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/ea55883d
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/ea55883d

Branch: refs/heads/master
Commit: ea55883d3e3607e27a24e553caca8f8678020214
Parents: 40fe2cd
Author: Rob Vesse <rv...@apache.org>
Authored: Wed Jul 1 10:34:50 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Wed Jul 1 10:34:50 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/arq        | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/infer      | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/iri        | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/juuid      | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/nquads     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/ntriples   | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/qparse     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rdfcat     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rdfcompare | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rdfcopy    | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rdfparse   | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rdfxml     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/riot       | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rset       | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rsparql    | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/rupdate    | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/schemagen  | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/sparql     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/tdbbackup  | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/tdbdump    | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/tdbloader  | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/tdbquery   | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/tdbstats   | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/tdbupdate  | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/trig       | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/turtle     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/uparse     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/update     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/utf8       | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/wwwdec     | 77 ++++++++++++++++++++++++++++++++---------
 apache-jena/bin/wwwenc     | 77 ++++++++++++++++++++++++++++++++---------
 31 files changed, 1891 insertions(+), 496 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/arq
----------------------------------------------------------------------
diff --git a/apache-jena/bin/arq b/apache-jena/bin/arq
index 00ad2b4..8d2f371 100755
--- a/apache-jena/bin/arq
+++ b/apache-jena/bin/arq
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/infer
----------------------------------------------------------------------
diff --git a/apache-jena/bin/infer b/apache-jena/bin/infer
index 4f25d25..d0ba197 100755
--- a/apache-jena/bin/infer
+++ b/apache-jena/bin/infer
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/iri
----------------------------------------------------------------------
diff --git a/apache-jena/bin/iri b/apache-jena/bin/iri
index d67bda1..f5e9e0b 100755
--- a/apache-jena/bin/iri
+++ b/apache-jena/bin/iri
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/juuid
----------------------------------------------------------------------
diff --git a/apache-jena/bin/juuid b/apache-jena/bin/juuid
index f6d9c0b..dcf6b7f 100755
--- a/apache-jena/bin/juuid
+++ b/apache-jena/bin/juuid
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/nquads
----------------------------------------------------------------------
diff --git a/apache-jena/bin/nquads b/apache-jena/bin/nquads
index 005bf03..592f6a9 100755
--- a/apache-jena/bin/nquads
+++ b/apache-jena/bin/nquads
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/ntriples
----------------------------------------------------------------------
diff --git a/apache-jena/bin/ntriples b/apache-jena/bin/ntriples
index ca46add..34db759 100755
--- a/apache-jena/bin/ntriples
+++ b/apache-jena/bin/ntriples
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/qparse
----------------------------------------------------------------------
diff --git a/apache-jena/bin/qparse b/apache-jena/bin/qparse
index 4888271..02560fc 100755
--- a/apache-jena/bin/qparse
+++ b/apache-jena/bin/qparse
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rdfcat
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rdfcat b/apache-jena/bin/rdfcat
index d84cf6c..8553e94 100755
--- a/apache-jena/bin/rdfcat
+++ b/apache-jena/bin/rdfcat
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rdfcompare
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rdfcompare b/apache-jena/bin/rdfcompare
index d231e38..e9ff1c1 100755
--- a/apache-jena/bin/rdfcompare
+++ b/apache-jena/bin/rdfcompare
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rdfcopy
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rdfcopy b/apache-jena/bin/rdfcopy
index 44673ef..bd95e53 100755
--- a/apache-jena/bin/rdfcopy
+++ b/apache-jena/bin/rdfcopy
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rdfparse
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rdfparse b/apache-jena/bin/rdfparse
index 57570ef..1d22621 100755
--- a/apache-jena/bin/rdfparse
+++ b/apache-jena/bin/rdfparse
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rdfxml
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rdfxml b/apache-jena/bin/rdfxml
index 35a5840..bebb6e1 100755
--- a/apache-jena/bin/rdfxml
+++ b/apache-jena/bin/rdfxml
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/riot
----------------------------------------------------------------------
diff --git a/apache-jena/bin/riot b/apache-jena/bin/riot
index 681c0ed..d3b21b5 100755
--- a/apache-jena/bin/riot
+++ b/apache-jena/bin/riot
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rset
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rset b/apache-jena/bin/rset
index c14f584..7d494a8 100755
--- a/apache-jena/bin/rset
+++ b/apache-jena/bin/rset
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rsparql
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rsparql b/apache-jena/bin/rsparql
index a9a0bc8..ddea695 100755
--- a/apache-jena/bin/rsparql
+++ b/apache-jena/bin/rsparql
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/rupdate
----------------------------------------------------------------------
diff --git a/apache-jena/bin/rupdate b/apache-jena/bin/rupdate
index a569d04..7612084 100755
--- a/apache-jena/bin/rupdate
+++ b/apache-jena/bin/rupdate
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/schemagen
----------------------------------------------------------------------
diff --git a/apache-jena/bin/schemagen b/apache-jena/bin/schemagen
index 0a87241..a2fbc3f 100755
--- a/apache-jena/bin/schemagen
+++ b/apache-jena/bin/schemagen
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/sparql
----------------------------------------------------------------------
diff --git a/apache-jena/bin/sparql b/apache-jena/bin/sparql
index 85f0fa2..dc6b334 100755
--- a/apache-jena/bin/sparql
+++ b/apache-jena/bin/sparql
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/tdbbackup
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbbackup b/apache-jena/bin/tdbbackup
index d6786a3..b5f3d9b 100755
--- a/apache-jena/bin/tdbbackup
+++ b/apache-jena/bin/tdbbackup
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/tdbdump
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbdump b/apache-jena/bin/tdbdump
index 89b4c2f..2ead54e 100755
--- a/apache-jena/bin/tdbdump
+++ b/apache-jena/bin/tdbdump
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/tdbloader
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader b/apache-jena/bin/tdbloader
index 187888a..06b4356 100755
--- a/apache-jena/bin/tdbloader
+++ b/apache-jena/bin/tdbloader
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/tdbquery
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbquery b/apache-jena/bin/tdbquery
index 18c6fb6..b4bd8fa 100755
--- a/apache-jena/bin/tdbquery
+++ b/apache-jena/bin/tdbquery
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/tdbstats
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbstats b/apache-jena/bin/tdbstats
index a9a9cd9..aa40445 100755
--- a/apache-jena/bin/tdbstats
+++ b/apache-jena/bin/tdbstats
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/tdbupdate
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbupdate b/apache-jena/bin/tdbupdate
index fe5aa52..3c94419 100755
--- a/apache-jena/bin/tdbupdate
+++ b/apache-jena/bin/tdbupdate
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/trig
----------------------------------------------------------------------
diff --git a/apache-jena/bin/trig b/apache-jena/bin/trig
index b3a9121..e984da0 100755
--- a/apache-jena/bin/trig
+++ b/apache-jena/bin/trig
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/turtle
----------------------------------------------------------------------
diff --git a/apache-jena/bin/turtle b/apache-jena/bin/turtle
index 716eb7c..9adb88f 100755
--- a/apache-jena/bin/turtle
+++ b/apache-jena/bin/turtle
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/uparse
----------------------------------------------------------------------
diff --git a/apache-jena/bin/uparse b/apache-jena/bin/uparse
index d8b7226..d2eef61 100755
--- a/apache-jena/bin/uparse
+++ b/apache-jena/bin/uparse
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/update
----------------------------------------------------------------------
diff --git a/apache-jena/bin/update b/apache-jena/bin/update
index 7d3c164..a7eac43 100755
--- a/apache-jena/bin/update
+++ b/apache-jena/bin/update
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/utf8
----------------------------------------------------------------------
diff --git a/apache-jena/bin/utf8 b/apache-jena/bin/utf8
index 090bcb0..67ea2e3 100755
--- a/apache-jena/bin/utf8
+++ b/apache-jena/bin/utf8
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/wwwdec
----------------------------------------------------------------------
diff --git a/apache-jena/bin/wwwdec b/apache-jena/bin/wwwdec
index 12e2c8d..f979d5c 100755
--- a/apache-jena/bin/wwwdec
+++ b/apache-jena/bin/wwwdec
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup

http://git-wip-us.apache.org/repos/asf/jena/blob/ea55883d/apache-jena/bin/wwwenc
----------------------------------------------------------------------
diff --git a/apache-jena/bin/wwwenc b/apache-jena/bin/wwwenc
index c34ba5e..9e4a9d4 100755
--- a/apache-jena/bin/wwwenc
+++ b/apache-jena/bin/wwwenc
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup


[02/18] jena git commit: Further tweak new tdbloader2 scripts (JENA-977)

Posted by rv...@apache.org.
Further tweak new tdbloader2 scripts (JENA-977)

- Add proper usage to tdbloader2
- Check for temporary data files needed for index phase in
  tdbloader2index


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/7b61a144
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/7b61a144
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/7b61a144

Branch: refs/heads/master
Commit: 7b61a144854d81acbd180b5debfd5c8638d2af57
Parents: d92e336
Author: Rob Vesse <rv...@apache.org>
Authored: Thu Jun 25 17:04:36 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Fri Jun 26 16:30:45 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2      | 35 ++++++++++++++++++++++++++++++++---
 apache-jena/bin/tdbloader2index |  9 +++++++++
 2 files changed, 41 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/7b61a144/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 37cc874..34ee029 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -15,6 +15,30 @@
 ## See the License for the specific language governing permissions and
 ## limitations under the License.
 
+function printUsage() {
+  cat << EOF
+Usage: tdbloader2 <Options> <Data>
+
+Options are as follows:
+
+  --help
+    Prints this help summary and exits
+
+  --loc <DatabaseDirectory>
+    Sets the location in which the database should be created
+
+  --phase <Phase>
+    Sets the phase of the build to run, supported values are:
+
+      all    Full bulk load
+      data   Data phase only
+      index  Index phase only, requires the data phase to previously have been run
+
+    When not specified defaults to all
+
+EOF
+}
+
 # If JENA_HOME is empty
 if [ -z "$JENA_HOME" ]
 	then
@@ -48,7 +72,7 @@ case "$(uname)" in
 esac
 
 export JENA_CP
-echo $JENA_CP
+#echo $JENA_CP
 if [ -z "$SORT_ARGS" ]
 then
     SORT_ARGS="--buffer-size=50%"
@@ -84,6 +108,11 @@ do
       PHASE="$1"
       shift
       ;;
+    --help)
+      # Help
+      printUsage
+      exit 0
+      ;;
     *)
       # Once we see an unrecognized argument treat as start of files to process
       break
@@ -95,8 +124,8 @@ if [ -z "$PHASE" ]; then
   PHASE="all"
 fi
 
-echo "Location is '$LOC'"
-echo "Phase is '$PHASE'"
+#echo "Location is '$LOC'"
+#echo "Phase is '$PHASE'"
 
 log() { echo " $(date $DATE)" "$@" ; }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/7b61a144/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 372aa5c..5624854 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -80,6 +80,15 @@ KEEPWORKFILES="${KEEPWORKFILES:-}"
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
+if [ ! -e "$DATA_TRIPLES" ] ; then
+  echo "No triples data file found in location, please run the tdbloader2data script first"
+  exit 1
+fi
+if [ ! -e "$DATA_QUADS" ]; then
+  echo "No quads data file found in location, please run the tdbloader2data script first"
+  exit 1
+fi
+
 # ---- Index intermediates
 ## All files are writtern S P O / G S P O columns per row but in different sort orders.
 log "Index Building Phase"


[04/18] jena git commit: Various further improvements to the scripts (JENA-977)

Posted by rv...@apache.org.
Various further improvements to the scripts (JENA-977)

- Validate sort temporary directory when indexing and WARN if the disk
  it is on is low on space (10% or less free)
- Support --debug and --trace flags in all scripts, add various debug
  output throughout scripts
- Fix a bug with not detecting sort failure when pv is used to monitor
  progress
- Fix a bug in size calculations used for progress monitoring and sort
  failure detection

This commit includes some temporary DEV changes that will be reverted
later


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/7770596b
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/7770596b
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/7770596b

Branch: refs/heads/master
Commit: 7770596bc94613409fe2753240b603ae22a38b57
Parents: a96b016
Author: Rob Vesse <rv...@apache.org>
Authored: Fri Jun 26 16:15:18 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Fri Jun 26 16:31:05 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2      |  59 +++++++++++-----
 apache-jena/bin/tdbloader2data  |  43 ++++++++++--
 apache-jena/bin/tdbloader2index | 126 ++++++++++++++++++++++++++++++-----
 3 files changed, 192 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/7770596b/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 9ff2727..9508031 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -49,6 +49,10 @@ Common additional options are as follows:
 
 Advanced additional options are as follows:
 
+  -d
+  --debug
+    Enable debug mode, adds extra debug output
+
   -k
   --keep-work
     Keeps the temporary work files around after they are no longer
@@ -65,6 +69,10 @@ Advanced additional options are as follows:
 
     When no phase is specified it defaults to all
 
+  -t
+  --trace
+    Enable trace mode, essentially sets -x within the scripts
+
 EOF
 }
 
@@ -101,13 +109,12 @@ case "$(uname)" in
 esac
 
 export JENA_CP
-#echo $JENA_CP
-if [ -z "$SORT_ARGS" ]
-then
+# echo JENA_CP
+if [ -z "$SORT_ARGS" ]; then
     SORT_ARGS="--buffer-size=50%"
-    if $(sort --parallel=3 < /dev/null 2>/dev/null) 
-    then
-	SORT_ARGS="$SORT_ARGS --parallel=3"
+    sort --parallel=3 < /dev/null 2>/dev/null
+    if [ $? = 0 ]; then
+    	SORT_ARGS="$SORT_ARGS --parallel=3"
     fi
 fi
 export SORT_ARGS
@@ -116,11 +123,23 @@ export SORT_ARGS
 LOC=
 PHASE=
 KEEP_WORK=0
+DEBUG=0
+TRACE=0
 
 while [ $# -gt 0 ]
 do
   ARG=$1
   case "$ARG" in
+    -d|--debug)
+      # Debug Mode
+      shift
+      DEBUG=1
+      ;;
+    -h|--help)
+      # Help
+      printUsage
+      exit 0
+      ;;
     -k|--keep-work)
       # Keep work files
       shift
@@ -143,10 +162,11 @@ do
       PHASE="$1"
       shift
       ;;
-    -h|--help)
-      # Help
-      printUsage
-      exit 0
+    -t|--trace)
+      # Trace mode
+      shift
+      TRACE=1
+      set -x
       ;;
     *)
       # Once we see an unrecognized argument treat as start of files to process
@@ -159,9 +179,15 @@ if [ -z "$PHASE" ]; then
   PHASE="all"
 fi
 COMMON_ARGS=
-if [ $KEEP_WORK = 0 ]; then
+if [ $KEEP_WORK = 1 ]; then
   COMMON_ARGS="--keep-work"
 fi
+if [ $DEBUG = 1 ]; then
+  COMMON_ARGS="$COMMON_ARGS --debug"
+fi
+if [ $TRACE = 1 ]; then
+  COMMON_ARGS="$COMMON_ARGS --trace"
+fi
 
 log() { echo " $(date $DATE)" "$@" ; }
 
@@ -172,16 +198,19 @@ DATE="+%H:%M:%S"
 log "-- TDB Bulk Loader Start"
 TIME1="$(date +%s)"
 
+TOOL_DIR=$JENA_HOME/bin/
+# DEV - Following is just for debugging
+TOOL_DIR=
 case "$PHASE" in
   all)
-    exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
-    exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC"
     ;;
   data)
-    exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
     ;;
   index)
-    exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC"
     ;;
   *)
     echo "Unrecognized phase $PHASE" 1>&2

http://git-wip-us.apache.org/repos/asf/jena/blob/7770596b/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index 5aceb27..efb590a 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -52,35 +52,58 @@ Common additional options are as follows:
 
 Advanced additional options are as follows:
 
+  -d
+  --debug
+    Enable debug mode, adds extra debug output
+
   -k
   --keep-work
     Keeps the temporary work files around after they are no longer
     needed.  May be useful for debugging.
 
+  -t
+  --trace
+    Enable trace mode, essentially sets -x within the scripts
+
 EOF
 }
 
 # Exit on error.
 set -e
 
-# Sort order is ASCII
-export LC_ALL="C"
-
 log() { echo " $(date $DATE)" "$@" ; }
 
+function debug() {
+ if [ $DEBUG = 1 ]; then
+   log "DEBUG" "$@"
+ fi
+}
+
 #DATE="+%Y-%m-%dT%H:%M:%S%:z"
 DATE="+%H:%M:%S"
 
 PKG=org.apache.jena.tdb.store.bulkloader2
+#DEV - Allows use against Jena 2 API
+PKG=com.hp.hpl.jena.tdb.store.bulkloader2
 
 # Process Arguments
 LOC=
 KEEP_WORK=0
+DEBUG=0
 
 while [ $# -gt 0 ]
 do
   ARG=$1
   case "$ARG" in
+    -d|--debug)
+      # Debug Mode
+      shift
+      DEBUG=1
+      ;;
+    -h|--help)
+      printUsage
+      exit 0
+      ;;
     -k|--keep-work)
       # Keep work files
       # This option is actually not used by this script but may be passed in
@@ -99,9 +122,10 @@ do
       LOC=${ARG/-*loc=/}
       shift
       ;;
-    -h|--help)
-      printUsage
-      exit 0
+    -t|--trace)
+      # Trace mode
+      shift
+      set -x
       ;;
     *)
       # Any further arguments are treated as data files
@@ -124,18 +148,23 @@ fi
 
 if [ ! -e "$LOC" ] ; then
   # If non-existent try to create
+  debug "Trying to create new database directory: $LOC"
   mkdir "$LOC"
   if [ $? != 0 ]; then
     echo "Failed to create new directory: $LOC"
     exit 1
   fi
+  debug "New database directory created: $LOC"
 fi
 if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
 
+# TODO Make LOC absolute
+
 FILES="$@"
 
 ## JVM Arguments
 JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+debug "JVM Arguments are $JVM_ARGS"
 
 # Classpath set in "tdbloader2"
 if [ -z "$JENA_CP" ]
@@ -151,6 +180,8 @@ log "Data Load Phase"
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
+debug "Data files are $DATA_TRIPLES and $DATA_QUADS"
+
 java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
     "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
 

http://git-wip-us.apache.org/repos/asf/jena/blob/7770596b/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 2730af1..971b824 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -49,11 +49,18 @@ Common additional options are as follows:
 
 Advanced additional options are as follows:
 
+  -d
+  --debug
+    Enable debug mode, adds extra debug output
+
   -k
   --keep-work
     Keeps the temporary work files around after they are no longer
     needed.  May be useful for debugging.
 
+  -t
+  --trace
+    Enable trace mode, essentially sets -x within the scripts
 EOF
 }
 
@@ -65,20 +72,45 @@ export LC_ALL="C"
 
 log() { echo " $(date $DATE)" "$@" ; }
 
-TMP=$$
+function debug() {
+ if [ $DEBUG = 1 ]; then
+   log "DEBUG" "$@"
+ fi
+}
+
+function warn() {
+  log "WARN" "$@"
+}
+
+function getSize() {
+  ls -l $1 | awk '{print $5}'
+}
+
 #DATE="+%Y-%m-%dT%H:%M:%S%:z"
 DATE="+%H:%M:%S"
 
 PKG=org.apache.jena.tdb.store.bulkloader2
+#DEV - Allows use against Jena 2 API
+PKG=com.hp.hpl.jena.tdb.store.bulkloader2
 
 # Process Arguments
 LOC=
 KEEP_WORK=0
+DEBUG=0
 
 while [ $# -gt 0 ]
 do
   ARG=$1
   case "$ARG" in
+    -d|--debug)
+      # Debug Mode
+      shift
+      DEBUG=1
+      ;;
+    -h|--help)
+      printUsage
+      exit 0
+      ;;
     -k|--keep-work)
       # Keep work files
       shift
@@ -95,9 +127,10 @@ do
       LOC=${ARG/-*loc=/}
       shift
       ;;
-    -h|--help)
-      printUsage
-      exit 0
+    -t|--trace)
+      # Trace mode
+      shift
+      set -x
       ;;
     *)
       # Any further arguments are ignored
@@ -111,6 +144,8 @@ if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi
 if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi
 if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
 
+# TODO Make LOC absolute
+
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
@@ -123,9 +158,12 @@ if [ ! -e "$DATA_QUADS" ]; then
   exit 1
 fi
 
+debug "Data files are $DATA_TRIPLES and $DATA_QUADS"
+
 ##--parallel is not always available.
 SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
 JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+debug "JVM Arguments are $JVM_ARGS"
 
 # Classpath set in "tdbloader2"
 if [ -z "$JENA_CP" ]
@@ -133,17 +171,57 @@ then
     echo "Classpath not provided : set JENA_CP" 1>&2
     exit 1
 fi
+debug "Jena Classpath is $JENA_CP"
 
 # ---- Index intermediates
 ## All files are writtern S P O / G S P O columns per row but in different sort orders.
 log "Index Building Phase"
 
 # Check whether Pipe Viewer is available
-# Needs to temporarily disable exit on error
+# Needs to temporarily disable exit on error as which produces an error
+# if the given command is not found
 set +e
 which pv >/dev/null 2>&1
 HAS_PV=$?
 set -e
+if [ $HAS_PV = 0 ]; then
+  debug "pv (Pipe Viewer) available on your system so sorts will show progres"
+else
+  debug "No pv (Pipe Viewer) on your system so sorts will show no progress"
+fi
+
+# Check where we are storing temporary sort files
+debug "Sort Arguments: $SORT_ARGS"
+SORT_TEMP_DIR=
+if [[ "$SORT_ARGS" == *"-T "* ]]; then
+  # Specified via -T argument
+  SORT_TEMP_DIR=(${SORT_ARGS/-T /})
+  SORT_TEMP_DIR=${SORT_TEMP_DIR[0]}
+elif [[ "$SORT_ARGS" == *"--temporary-directory="* ]]; then
+  # Specified via --temporary-directory argument
+  SORT_TEMP_DIR=(${SORT_ARGS/--temporary-directory=/})
+  SORT_TEMP_DIR=${SORT_TEMP_DIR[0]}
+else
+  # Using the system temp directory
+  SORT_TEMP_DIR="$TMPDIR"
+fi
+debug "Sort Temp Directory: $SORT_TEMP_DIR"
+
+# Find out how much space is on the sort directory
+SORT_DRIVE_INFO=$(df "$SORT_TEMP_DIR" | tail -n +2)
+SORT_DRIVE_DISK=$(echo $SORT_DRIVE_INFO | awk '{print $1}')
+SORT_DRIVE_FREE_SPACE=$(echo $SORT_DRIVE_INFO | awk '{print $4}')
+SORT_DRIVE_USED=$(echo $SORT_DRIVE_INFO | awk '{print $5}')
+SORT_DRIVE_FREE=${SORT_DRIVE_USED/"%"/}
+SORT_DRIVE_FREE=$((100 - $SORT_DRIVE_FREE))
+debug "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes)"
+
+if [ $SORT_DRIVE_FREE -le 10 ]; then
+  echo
+  warn "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which only has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes) available"
+  warn "This may result in sort failures if the data to be indexed is large"
+  echo
+fi
 
 generate_index()
 {
@@ -152,35 +230,52 @@ generate_index()
     local IDX=$3
     local WORK="$LOC/$IDX-txt"
 
-    if [ ! -s "$DATA" ]
-    then
+    if [ ! -s "$DATA" ]; then
+      debug "Skipping Index $IDX as no relevant data to index"
 	    return
 	  fi
 
     log "Creating Index $IDX"
 
     # Sort the input data
-    log "  Sort $IDX"
+    log "Sort $IDX"
+    debug "Sorting $DATA into work file $WORK"
     if [ $HAS_PV = 0 ]; then
       # Use pv (pipe viewer) to monitor sort progress
       # Note that progress data will only be seen if running in the foreground
-      SIZE=$(du -k "$DATA" | cut -f 1)
+      # To report progress need to know size of input data
+      SIZE=$(getSize "$DATA")
+      debug "Size of data to be sorted is $SIZE bytes"
+
       pv -c -N data < "$DATA" | sort $SORT_ARGS -u $KEYS | pv -c -N sort -s $SIZE > $WORK
+
+      # CAUTION
+      # If sort errors here then the piping through pv will stop us from seeing the error
+      # and we'll continue onwards
+      # Therefore we need to check that the output size is same as input size as this is
+      # the only way to tell if sort suceeded
+      OUTPUT_SIZE=$(getSize "$WORK")
+      debug "Size of sorted data is $OUTPUT_SIZE bytes"
+      if [ $SIZE != $OUTPUT_SIZE ]; then
+        log "Aborting due to sort error"
+        exit 1
+      fi
     else
       # Use sort without any progress monitoring
       sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
     fi
-    log "  Sort $IDX Completed"
+    log "Sort $IDX Completed"
 
     # Build into an index
-    log "  Build $IDX"
+    log "Build $IDX"
     rm -f "$LOC/$IDX.dat"
     rm -f "$LOC/$IDX.idn"
     java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    log "  Build $IDX Completed"
+    log "Build $IDX Completed"
 
     # Remove work file unless keeping
-    if [ $KEEP_WORK = 1 ]; then
+    if [ $KEEP_WORK = 0 ]; then
+      debug "Cleaning up work file $WORK"
 	    rm "$WORK"
     fi
 }
@@ -211,6 +306,7 @@ generate_index "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
 log "Index Building Phase Completed"
 
 # ---- Clean up.
-if [ $KEEP_WORK = 1 ]; then
-    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
+if [ $KEEP_WORK = 0 ]; then
+  debug "Cleaning up data files $DATA_TRIPLES and $DATA_QUADS"
+  rm -f "$DATA_TRIPLES" "$DATA_QUADS"
 fi


[11/18] jena git commit: Fix bug where JENA_HOME is a symbolic link (JENA-977)

Posted by rv...@apache.org.
Fix bug where JENA_HOME is a symbolic link (JENA-977)

This commit fixes a bug that can occur when JENA_HOME is a symbolic
link, the scripts need to resolve the link as otherwise they cannot
source the common function scripts successfully.

Scripts now also bail out if they can't find the common functions script
to source.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/d9ff26ec
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/d9ff26ec
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/d9ff26ec

Branch: refs/heads/master
Commit: d9ff26ec96b6cbf15d6649704dbcfe7f1d8d09eb
Parents: f64dbdc
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 15:59:33 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 15:59:33 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2      | 59 +++++++++++++++++++++++++++++++++---
 apache-jena/bin/tdbloader2data  | 48 ++++++++++++++++++++++++++++-
 apache-jena/bin/tdbloader2index | 48 ++++++++++++++++++++++++++++-
 3 files changed, 149 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/d9ff26ec/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index e598aeb..d8b375c 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -98,17 +98,45 @@ Advanced additional options are as follows:
 EOF
 }
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|*BSB*|*BSD|BSD*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
 if [ -z "$JENA_HOME" ];	then
   echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
   SCRIPT="$0"
   # Catch common issue: script has been symlinked
 	if [ -L "$SCRIPT" ]; then
-		SCRIPT="$(readlink -f "$0")"
+		SCRIPT=$(resolveLink "$0")
 		# If link is relative
 		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
 		esac
 	fi
 
@@ -117,7 +145,30 @@ if [ -z "$JENA_HOME" ];	then
   export JENA_HOME
   echo "Located JENA_HOME at ${JENA_HOME}"
 fi
-source "${JENA_HOME}/bin/tdbloader2common"
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+	case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
+fi
+
+if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then
+  # Can source common functions
+  source "${JENA_HOME}/bin/tdbloader2common"
+else
+  echo "Unable to locate common functions script tdbloader2common"
+  exit 1
+fi
 
 # ---- Setup
 JVM_ARGS=${JVM_ARGS:--Xmx1024M}

http://git-wip-us.apache.org/repos/asf/jena/blob/d9ff26ec/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index ab0fe87..2f8ffa7 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -18,12 +18,58 @@
 
 # The environment for this sub-script is setup by "tdbloader2"
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|*BSB*|*BSD|BSD*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # Pull in common functions
 if [ -z "$JENA_HOME" ]; then
   echo "JENA_HOME is not set"
   exit 1
 fi
-source "${JENA_HOME}/bin/tdbloader2common"
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+	case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
+fi
+
+if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then
+  # Can source common functions
+  source "${JENA_HOME}/bin/tdbloader2common"
+else
+  echo "Unable to locate common functions script tdbloader2common"
+  exit 1
+fi
 
 function printUsage() {
   cat << EOF

http://git-wip-us.apache.org/repos/asf/jena/blob/d9ff26ec/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index b997b39..78f82b5 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -18,12 +18,58 @@
 
 # The environment for this sub-script is setup by "tdbloader2"
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|*BSB*|*BSD|BSD*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # Pull in common functions
 if [ -z "$JENA_HOME" ]; then
   echo "JENA_HOME is not set"
   exit 1
 fi
-source "${JENA_HOME}/bin/tdbloader2common"
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+	case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
+fi
+
+if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then
+  # Can source common functions
+  source "${JENA_HOME}/bin/tdbloader2common"
+else
+  echo "Unable to locate common functions script tdbloader2common"
+  exit 1
+fi
 
 function printUsage() {
   cat << EOF


[06/18] jena git commit: Further refactoring of tdbloader2 scripts (JENA-977)

Posted by rv...@apache.org.
Further refactoring of tdbloader2 scripts (JENA-977)

- Move common functions into tdbloader2common script
- Remove duplicated definitions from other scripts and source in the new
  common script
- Add helper function for getting drive information
- Add check in tdbloader2index script which will abort the build if
  there is insufficient free space to sort the data file since the
  sorted output will be same size in the input so if there are fewer
  bytes free than the size of the input we can abort early


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/c55c1f74
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/c55c1f74
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/c55c1f74

Branch: refs/heads/master
Commit: c55c1f74b4571eee2c9e333967b5671e862adff7
Parents: 3c59213
Author: Rob Vesse <rv...@apache.org>
Authored: Mon Jun 29 17:21:18 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Mon Jun 29 17:21:18 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2       |  29 ++++-----
 apache-jena/bin/tdbloader2common |  85 +++++++++++++++++++++++++++
 apache-jena/bin/tdbloader2data   |  53 ++++++++---------
 apache-jena/bin/tdbloader2index  | 107 +++++++++++++++-------------------
 4 files changed, 169 insertions(+), 105 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/c55c1f74/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 55a0faf..b7a1af2 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -77,12 +77,10 @@ EOF
 }
 
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
+if [ -z "$JENA_HOME" ];	then
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+	if [ -L "$SCRIPT" ]; then
 		SCRIPT="$(readlink "$0")"
 		# If link is relative
 		case "$SCRIPT" in
@@ -91,9 +89,10 @@ if [ -z "$JENA_HOME" ]
 		esac
 	fi
 
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
 fi
+source "${JENA_HOME}/bin/tdbloader2common"
 
 # ---- Setup
 JVM_ARGS=${JVM_ARGS:--Xmx1024M}
@@ -189,13 +188,8 @@ if [ $TRACE = 1 ]; then
   COMMON_ARGS="$COMMON_ARGS --trace"
 fi
 
-log() { echo " $(date $DATE)" "$@" ; }
-
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
 # ---- Start
-log "-- TDB Bulk Loader Start"
+info "-- TDB Bulk Loader Start"
 TIME1="$(date +%s)"
 
 TOOL_DIR="$JENA_HOME/bin"
@@ -211,13 +205,12 @@ case "$PHASE" in
     exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
     ;;
   *)
-    echo "Unrecognized phase $PHASE" 1>&2
-    exit 1
+    abort 1 "Unrecognized phase $PHASE"
     ;;
 esac
 
 # ---- End
 TIME2="$(date +%s)"
-log "-- TDB Bulk Loader Finish"
+info "-- TDB Bulk Loader Finish"
 ELAPSED=$(($TIME2-$TIME1))
-log "-- $ELAPSED seconds"
\ No newline at end of file
+info "-- $ELAPSED seconds"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/c55c1f74/apache-jena/bin/tdbloader2common
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common
new file mode 100644
index 0000000..beae115
--- /dev/null
+++ b/apache-jena/bin/tdbloader2common
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+
+function log() {
+  echo " $(date $DATE)" "$@"
+}
+
+function debug() {
+ if [ $DEBUG = 1 ]; then
+   log "DEBUG" "$@"
+ fi
+}
+
+function info() {
+  log "INFO" "$@"
+}
+
+function warn() {
+  log "WARN" "$@" 1>&2
+}
+
+function error() {
+  log "ERROR" "$@" 1>&2
+}
+
+function abort() {
+  local EXIT=$1
+
+  # Trick to check for numeric
+  # -eq only returns true if the value is integer equals
+  if [ "$EXIT" -eq "$EXIT" ]; then
+    # Can use the provided exit code
+    shift
+  else
+    # Caller forgot to provide an exit code so use default of 1
+    EXIT=1
+  fi
+
+  # Log error and exit
+  error "$@"
+  exit $EXIT
+}
+
+function getSize() {
+  ls -l $1 | awk '{print $5}'
+}
+
+function getDriveInfo() {
+  local DIR=$1
+
+  local DRIVE_INFO=$(df "$DIR" | tail -n +2)
+  local DISK=$(echo $DRIVE_INFO | awk '{print $1}')
+  local FREE_BYTES=$(echo $DRIVE_INFO | awk '{print $4}')
+  local USED_PERCENT=$(echo $DRIVE_INFO | awk '{print $5}')
+  USED_PERCENT=${USED_PERCENT/"%"/}
+  local FREE_PERCENT=$((100 - $USED_PERCENT))
+
+  local INFO=()
+  INFO[0]=$DISK
+  INFO[1]=$USED_PERCENT
+  INFO[2]=$FREE_PERCENT
+  INFO[3]=$FREE_BYTES
+
+  echo ${INFO[@]}
+}
+
+#DATE="+%Y-%m-%dT%H:%M:%S%:z"
+DATE="+%H:%M:%S"
+
+PKG=org.apache.jena.tdb.store.bulkloader2
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/c55c1f74/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index eaf9069..6904c83 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -18,6 +18,13 @@
 
 # The environment for this sub-script is setup by "tdbloader2"
 
+# Pull in common functions
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME is not set"
+  exit 1
+fi
+source "${JENA_HOME}/bin/tdbloader2common"
+
 function printUsage() {
   cat << EOF
 tdbloader2data - TDB Bulk Loader - Data Phase
@@ -71,19 +78,6 @@ EOF
 # Exit on error.
 set -e
 
-log() { echo " $(date $DATE)" "$@" ; }
-
-function debug() {
- if [ $DEBUG = 1 ]; then
-   log "DEBUG" "$@"
- fi
-}
-
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
-PKG=org.apache.jena.tdb.store.bulkloader2
-
 # Process Arguments
 LOC=
 KEEP_WORK=0
@@ -133,15 +127,18 @@ do
 done
 
 # Verify arguments
-if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi
-if [ $# = 0 ]; then echo "No data files specified" ; exit 1 ; fi
+if [ -z "$LOC" ]; then
+  abort 1 "No location specified"
+fi
+if [ $# = 0 ]; then
+  abort 1 "No data files specified"
+fi
 
 # Look for any index and data files in the directory.
 # Skip a possible configuration file
 if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
 then 
-    echo "Location is not empty: $LOC"
-    exit 1
+    abort 1 "Location is not empty: $LOC"
 fi
 
 if [ ! -e "$LOC" ] ; then
@@ -149,12 +146,13 @@ if [ ! -e "$LOC" ] ; then
   debug "Trying to create new database directory: $LOC"
   mkdir "$LOC"
   if [ $? != 0 ]; then
-    echo "Failed to create new directory: $LOC"
-    exit 1
+    abort 1 "Failed to create new directory: $LOC"
   fi
   debug "New database directory created: $LOC"
 fi
-if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
+if [ ! -d "$LOC" ]; then
+  abort 1 "Location is not a directory: $LOC"
+fi
 
 # TODO Make LOC absolute
 
@@ -165,22 +163,21 @@ JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 debug "JVM Arguments are $JVM_ARGS"
 
 # Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
+if [ -z "$JENA_CP" ]; then
+  abort 1 "Classpath not provided : set JENA_CP"
 fi
 
 # ---- Data loading phase
-log "Data Load Phase"
-# Produce nodes file and triples/quads text file.
+info "Data Load Phase"
 
+# Produce nodes file and triples/quads text file.
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
-debug "Data files are $DATA_TRIPLES and $DATA_QUADS"
+debug "Triples text files is $DATA_TRIPLES"
+debug "Quads text file is $DATA_QUADS"
 
 java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
     "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
 
-log "Data Load Phase Completed"
+info "Data Load Phase Completed"

http://git-wip-us.apache.org/repos/asf/jena/blob/c55c1f74/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index f506df9..5de8d6a 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -18,6 +18,13 @@
 
 # The environment for this sub-script is setup by "tdbloader2"
 
+# Pull in common functions
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME is not set"
+  exit 1
+fi
+source "${JENA_HOME}/bin/tdbloader2common"
+
 function printUsage() {
   cat << EOF
 tdbloader2index - TDB Bulk Loader - Index Phase
@@ -70,27 +77,6 @@ set -e
 # Sort order is ASCII
 export LC_ALL="C"
 
-log() { echo " $(date $DATE)" "$@" ; }
-
-function debug() {
- if [ $DEBUG = 1 ]; then
-   log "DEBUG" "$@"
- fi
-}
-
-function warn() {
-  log "WARN" "$@"
-}
-
-function getSize() {
-  ls -l $1 | awk '{print $5}'
-}
-
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
-PKG=org.apache.jena.tdb.store.bulkloader2
-
 # Process Arguments
 LOC=
 KEEP_WORK=0
@@ -138,22 +124,26 @@ do
 done
 
 # Verify arguments
-if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi
-if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi
-if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
+if [ -z "$LOC" ]; then
+  abort 1 "No location specified"
+fi
+if [ ! -e "$LOC" ]; then
+  abort 1 "Location specified does not exist: $LOC"
+fi
+if [ ! -d "$LOC" ]; then
+  abort 1 "Location is not a directory: $LOC"
+fi
 
 # TODO Make LOC absolute
 
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
-if [ ! -e "$DATA_TRIPLES" ] ; then
-  echo "No triples data file found in location, please run the tdbloader2data script first"
-  exit 1
+if [ ! -e "$DATA_TRIPLES" ]; then
+  abort 1 "No triples text file found in location, please run the tdbloader2data script first"
 fi
 if [ ! -e "$DATA_QUADS" ]; then
-  echo "No quads data file found in location, please run the tdbloader2data script first"
-  exit 1
+  abort 1 "No quads text file found in location, please run the tdbloader2data script first"
 fi
 
 debug "Data files are $DATA_TRIPLES and $DATA_QUADS"
@@ -164,16 +154,14 @@ JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 debug "JVM Arguments are $JVM_ARGS"
 
 # Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
+if [ -z "$JENA_CP" ]; then
+  abort 1 "Classpath not provided : set JENA_CP"
 fi
 debug "Jena Classpath is $JENA_CP"
 
 # ---- Index intermediates
 ## All files are writtern S P O / G S P O columns per row but in different sort orders.
-log "Index Building Phase"
+info "Index Building Phase"
 
 # Check whether Pipe Viewer is available
 # Needs to temporarily disable exit on error as which produces an error
@@ -204,21 +192,14 @@ else
   SORT_TEMP_DIR="$TMPDIR"
 fi
 debug "Sort Temp Directory: $SORT_TEMP_DIR"
+SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}"))
+debug "Sort Temp Directory ${DIR} is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)"
 
-# Find out how much space is on the sort directory
-SORT_DRIVE_INFO=$(df "$SORT_TEMP_DIR" | tail -n +2)
-SORT_DRIVE_DISK=$(echo $SORT_DRIVE_INFO | awk '{print $1}')
-SORT_DRIVE_FREE_SPACE=$(echo $SORT_DRIVE_INFO | awk '{print $4}')
-SORT_DRIVE_USED=$(echo $SORT_DRIVE_INFO | awk '{print $5}')
-SORT_DRIVE_FREE=${SORT_DRIVE_USED/"%"/}
-SORT_DRIVE_FREE=$((100 - $SORT_DRIVE_FREE))
-debug "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes)"
-
-if [ $SORT_DRIVE_FREE -le 10 ]; then
-  echo
-  warn "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which only has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes) available"
+if [ "${SORT_DRIVE_INFO[2]}" -le 10 ]; then
+  warn "-----"
+  warn "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_INFO[0]} which only has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes) available"
   warn "This may result in sort failures if the data to be indexed is large"
-  echo
+  warn "-----"
 fi
 
 generate_index()
@@ -233,17 +214,26 @@ generate_index()
 	    return
 	  fi
 
-    log "Creating Index $IDX"
+    info "Creating Index $IDX"
+
+    # For various purposes we need to know the size of the input data
+    local SIZE=$(getSize "$DATA")
+    debug "Size of data to be sorted is $SIZE bytes"
+
+    # Verify that we have enough space to sort the data
+    local WORK_DRIVE_INFO=($(getDriveInfo "${WORK}"))
+    if [ "${SIZE}" -ge "${WORK_DRIVE_INFO[3]}" ]; then
+      abort 1 "Insufficient free space on database drive ${WORK_DRIVE_INFO[0]}, there are ${WORK_DRIVE_INFO[3]} bytes free but ${SIZE} bytes are required"
+    else
+      debug "Sufficient free space on database drive ${WORK_DRIVE_INFO[0]} to attempt sorting data file ${DATA} (${SIZE} bytes required from ${WORK_DRIVE_INFO[3]} bytes free)"
+    fi
 
     # Sort the input data
-    log "Sort $IDX"
+    info "Sort $IDX"
     debug "Sorting $DATA into work file $WORK"
     if [ $HAS_PV = 0 ]; then
       # Use pv (pipe viewer) to monitor sort progress
       # Note that progress data will only be seen if running in the foreground
-      # To report progress need to know size of input data
-      SIZE=$(getSize "$DATA")
-      debug "Size of data to be sorted is $SIZE bytes"
 
       pv -c -N data < "$DATA" | sort $SORT_ARGS -u $KEYS | pv -c -N sort -s $SIZE > $WORK
 
@@ -252,24 +242,23 @@ generate_index()
       # and we'll continue onwards
       # Therefore we need to check that the output size is same as input size as this is
       # the only way to tell if sort suceeded
-      OUTPUT_SIZE=$(getSize "$WORK")
+      local OUTPUT_SIZE=$(getSize "$WORK")
       debug "Size of sorted data is $OUTPUT_SIZE bytes"
       if [ $SIZE != $OUTPUT_SIZE ]; then
-        log "Aborting due to sort error"
-        exit 1
+        abort 1 "Aborting due to sort error, see preceding output for error from sort"
       fi
     else
       # Use sort without any progress monitoring
       sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
     fi
-    log "Sort $IDX Completed"
+    info "Sort $IDX Completed"
 
     # Build into an index
-    log "Build $IDX"
+    info "Build $IDX"
     rm -f "$LOC/$IDX.dat"
     rm -f "$LOC/$IDX.idn"
     java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    log "Build $IDX Completed"
+    info "Build $IDX Completed"
 
     # Remove work file unless keeping
     if [ $KEEP_WORK = 0 ]; then
@@ -301,7 +290,7 @@ generate_index "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
 
 generate_index "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
 
-log "Index Building Phase Completed"
+info "Index Building Phase Completed"
 
 # ---- Clean up.
 if [ $KEEP_WORK = 0 ]; then


[10/18] jena git commit: Ensure data file paths are absolute (JENA-977)

Posted by rv...@apache.org.
Ensure data file paths are absolute (JENA-977)

This commit improves the tdbloader2 script to ensure that data file
paths are made absolute and any symbolic links are resolved.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/f64dbdcb
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/f64dbdcb
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/f64dbdcb

Branch: refs/heads/master
Commit: f64dbdcb6ac77cfb6654916e43797fdca3d4fb5c
Parents: d4a0bc5
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 15:33:09 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 15:33:09 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2data | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/f64dbdcb/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index f942e20..ab0fe87 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -191,8 +191,6 @@ if [ ! -d "$LOC" ]; then
   abort 1 "Database location is not a directory: $LOC"
 fi
 
-FILES="$@"
-
 ## Prepare JVM Arguments
 JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 debug "JVM Arguments are $JVM_ARGS"
@@ -205,6 +203,32 @@ fi
 # ---- Data loading phase
 info "Data Load Phase"
 
+# Prepare Files
+FILES=()
+F=0
+while [ $# -gt 0 ]; do
+  FILE=$1
+  shift
+
+  ABS_FILE=$(makeAbsolute "$FILE")
+  if [ "$FILE" != "$ABS_FILE" ]; then
+    # Relative path was resolved
+    FILES[$F]="$ABS_FILE"
+    debug "Relative data file $FILE was resolved to absolute data file $ABS_FILE"
+  else
+    # Already absolute
+    FILES[$F]="$FILE"
+  fi
+
+  F=$(($F + 1))
+done
+info "Got ${#FILES[@]} data files to load"
+F=1
+for file in ${FILES[@]}; do
+  info "Data file $F: $file"
+  F=$(($F + 1))
+done
+
 # Produce nodes file and triples/quads text file.
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
@@ -213,6 +237,6 @@ debug "Triples text files is $DATA_TRIPLES"
 debug "Quads text file is $DATA_QUADS"
 
 java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
-    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
+    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" "${FILES[@]}"
 
 info "Data Load Phase Completed"


[09/18] jena git commit: Finish up first pass of work on tdbloader2 script refactoring (JENA-977)

Posted by rv...@apache.org.
Finish up first pass of work on tdbloader2 script refactoring (JENA-977)

- Add options for setting the JVM and sort arguments that do not rely on
  environment variables.  NB - For backwards compatibility the existing
  environment variables are still honoured if the new command line
  options are not used
- Improve some error messages
- Explicitly support -- for separating data files from options for cases
  where file names may be confused


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/d4a0bc50
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/d4a0bc50
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/d4a0bc50

Branch: refs/heads/master
Commit: d4a0bc50a6d82ab5bbb43ab90e65216e5b165621
Parents: cc4a80a
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 15:04:50 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 15:04:50 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2       | 76 ++++++++++++++++++++++++++++-------
 apache-jena/bin/tdbloader2common |  4 ++
 apache-jena/bin/tdbloader2data   | 40 +++++++++++++++---
 apache-jena/bin/tdbloader2index  | 72 ++++++++++++++++++++++++++++-----
 4 files changed, 162 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index d0d906c..e598aeb 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -53,6 +53,20 @@ Advanced additional options are as follows:
   --debug
     Enable debug mode, adds extra debug output
 
+  -j <JvmArgs>
+  --jvm-args <JvmArgs>
+    Sets the arguments that should be passed to the JVM for the
+    JVM based portions of the build.
+
+    Generally it is best to not change these unless you have been
+    specifically advised to.  The scripts will use appropriate
+    defaults if this is not specified.
+
+    In particular be careful increasing the heap size since many
+    parts of TDB actually use memory mapped files that live
+    outside the heap so if the heap is too large the heap may
+    conflict with the memory mapped files for memory space.
+
   -k
   --keep-work
     Keeps the temporary work files around after they are no longer
@@ -69,6 +83,14 @@ Advanced additional options are as follows:
 
     When no phase is specified it defaults to all
 
+  -s <SortArgs>
+  --sort-args <SortArgs>
+    Sets the arguments that should be passed to sort for the sort
+    based portions of the build.
+
+    Generally it is best not to change these as the scripts will
+    use appropriate defaults for your system.
+
   -t
   --trace
     Enable trace mode, essentially sets -x within the scripts
@@ -111,15 +133,7 @@ case "$(uname)" in
 esac
 
 export JENA_CP
-# echo JENA_CP
-if [ -z "$SORT_ARGS" ]; then
-    SORT_ARGS="--buffer-size=50%"
-    sort --parallel=3 < /dev/null 2>/dev/null
-    if [ $? = 0 ]; then
-    	SORT_ARGS="$SORT_ARGS --parallel=3"
-    fi
-fi
-export SORT_ARGS
+
 
 # Process arguments
 LOC=
@@ -127,6 +141,8 @@ PHASE=
 KEEP_WORK=0
 DEBUG=0
 TRACE=0
+JVM_ARGS=
+SORT_ARGS=
 
 while [ $# -gt 0 ]
 do
@@ -142,6 +158,12 @@ do
       printUsage
       exit 0
       ;;
+    -j|--jvm-args)
+      # JVM Arguments
+      shift
+      JVM_ARGS="$1"
+      shift
+      ;;
     -k|--keep-work)
       # Keep work files
       shift
@@ -164,14 +186,30 @@ do
       PHASE="$1"
       shift
       ;;
+    -s|--sort-args)
+      # Sort arguments
+      shift
+      SORT_ARGS=$1
+      shift
+      ;;
     -t|--trace)
       # Trace mode
       shift
       TRACE=1
       set -x
       ;;
+    --)
+      # Arguments separator
+      # All further arguments are treated as data files
+      shift
+      break
+      ;;
+    -*)
+      # Looks like an option but not known
+      abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --"
+      ;;
     *)
-      # Once we see an unrecognized argument treat as start of files to process
+      # Once we see an unrecognized argument that doesn't look like an option treat as start of files to process
       break
       ;;
   esac
@@ -180,7 +218,11 @@ done
 if [ -z "$PHASE" ]; then
   PHASE="all"
 fi
+
+# Prepare arguments to pass to children
 COMMON_ARGS=
+DATA_ARGS=
+INDEX_ARGS=
 if [ $KEEP_WORK = 1 ]; then
   COMMON_ARGS="--keep-work"
 fi
@@ -190,6 +232,12 @@ fi
 if [ $TRACE = 1 ]; then
   COMMON_ARGS="$COMMON_ARGS --trace"
 fi
+if [ -n "$JVM_ARGS" ]; then
+  COMMON_ARGS="$COMMON_ARGS --jvm-args $JVM_ARGS"
+fi
+if [ -n "$SORT_ARGS" ]; then
+  INDEX_ARGS="--sort-args $SORT_ARGS"
+fi
 
 # ---- Start
 info "-- TDB Bulk Loader Start"
@@ -200,14 +248,14 @@ case "$PHASE" in
   all)
     # All Phases
     # Data Phase
-    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
     RET=$?
     if [ $RET -ne 0 ]; then
       abort $RET "Failed during data phase"
     fi
 
     # Index Phase
-    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
     RET=$?
     if [ $RET -ne 0 ]; then
       abort $RET "Failed during data phase"
@@ -216,7 +264,7 @@ case "$PHASE" in
 
   data)
     # Data Phase
-    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
     RET=$?
     if [ $RET -ne 0 ]; then
       abort $RET "Failed during data phase"
@@ -225,7 +273,7 @@ case "$PHASE" in
 
   index)
     # Index Phase
-    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
     RET=$?
     if [ $RET -ne 0 ]; then
       abort $RET "Failed during index phase"

http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2common
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common
index 2830545..2c116ad 100644
--- a/apache-jena/bin/tdbloader2common
+++ b/apache-jena/bin/tdbloader2common
@@ -80,7 +80,10 @@ function getDriveInfo() {
 }
 
 function getFreeMem() {
+  # May be called from a script where exit on error is set
+  # in which case disable for the life of this function
   set +e
+
   local FREE_MEM=-1
   case "$OSTYPE" in
     darwin*)
@@ -98,6 +101,7 @@ function getFreeMem() {
       fi
       ;;
   esac
+
   set -e
 
   echo "$FREE_MEM"

http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index 2c48a50..f942e20 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -63,6 +63,20 @@ Advanced additional options are as follows:
   --debug
     Enable debug mode, adds extra debug output
 
+  -j <JvmArgs>
+  --jvm-args <JvmArgs>
+    Sets the arguments that should be passed to the JVM for the
+    JVM based portions of the build.
+
+    Generally it is best to not change these unless you have been
+    specifically advised to.  The scripts will use appropriate
+    defaults if this is not specified.
+
+    In particular be careful increasing the heap size since many
+    parts of TDB actually use memory mapped files that live
+    outside the heap so if the heap is too large the heap may
+    conflict with the memory mapped files for memory space.
+
   -k
   --keep-work
     Keeps the temporary work files around after they are no longer
@@ -96,6 +110,12 @@ do
       printUsage
       exit 0
       ;;
+    -j|--jvm-args)
+      # JVM Arguments
+      shift
+      JVM_ARGS="$1"
+      shift
+      ;;
     -k|--keep-work)
       # Keep work files
       # This option is actually not used by this script but may be passed in
@@ -119,6 +139,16 @@ do
       shift
       set -x
       ;;
+    --)
+      # Arguments separator
+      # All further arguments are treated as data files
+      shift
+      break
+      ;;
+    -*)
+      # Unrecognized
+      abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --"
+      ;;
     *)
       # Any further arguments are treated as data files
       break
@@ -128,10 +158,10 @@ done
 
 # Verify arguments
 if [ -z "$LOC" ]; then
-  abort 1 "No location specified"
+  abort 1 "Required database location not specified"
 fi
 if [ $# = 0 ]; then
-  abort 1 "No data files specified"
+  abort 1 "No data files specified, one/more data files must be specified"
 fi
 
 # Make LOC absolute
@@ -145,7 +175,7 @@ fi
 # Skip a possible configuration file
 if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
 then 
-    abort 1 "Location is not empty: $LOC"
+    abort 1 "Database location is not empty: $LOC"
 fi
 
 if [ ! -e "$LOC" ] ; then
@@ -158,12 +188,12 @@ if [ ! -e "$LOC" ] ; then
   debug "New database directory created: $LOC"
 fi
 if [ ! -d "$LOC" ]; then
-  abort 1 "Location is not a directory: $LOC"
+  abort 1 "Database location is not a directory: $LOC"
 fi
 
 FILES="$@"
 
-## JVM Arguments
+## Prepare JVM Arguments
 JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 debug "JVM Arguments are $JVM_ARGS"
 

http://git-wip-us.apache.org/repos/asf/jena/blob/d4a0bc50/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 15a5832..b997b39 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -60,11 +60,33 @@ Advanced additional options are as follows:
   --debug
     Enable debug mode, adds extra debug output
 
+  -j <JvmArgs>
+  --jvm-args <JvmArgs>
+    Sets the arguments that should be passed to the JVM for the
+    JVM based portions of the build.
+
+    Generally it is best to not change these unless you have been
+    specifically advised to.  The scripts will use appropriate
+    defaults if this is not specified.
+
+    In particular be careful increasing the heap size since many
+    parts of TDB actually use memory mapped files that live
+    outside the heap so if the heap is too large the heap may
+    conflict with the memory mapped files for memory space.
+
   -k
   --keep-work
     Keeps the temporary work files around after they are no longer
     needed.  May be useful for debugging.
 
+  -s <SortArgs>
+  --sort-args <SortArgs>
+    Sets the arguments that should be passed to sort for the sort
+    based portions of the build.
+
+    Generally it is best not to change these as the scripts will
+    use appropriate defaults for your system.
+
   -t
   --trace
     Enable trace mode, essentially sets -x within the scripts
@@ -81,6 +103,8 @@ export LC_ALL="C"
 LOC=
 KEEP_WORK=0
 DEBUG=0
+JVM_ARGS=
+SORT_ARGS=
 
 while [ $# -gt 0 ]
 do
@@ -95,6 +119,12 @@ do
       printUsage
       exit 0
       ;;
+    -j|--jvm-args)
+      # JVM Arguments
+      shift
+      JVM_ARGS="$1"
+      shift
+      ;;
     -k|--keep-work)
       # Keep work files
       shift
@@ -111,21 +141,27 @@ do
       LOC=${ARG/-*loc=/}
       shift
       ;;
+    -s|--sort-args)
+      # Sort arguments
+      shift
+      SORT_ARGS=$1
+      shift
+      ;;
     -t|--trace)
       # Trace mode
       shift
       set -x
       ;;
     *)
-      # Any further arguments are ignored
-      break
+      # Additional options are not supported
+      abort 1 "Unrecognized option $ARG"
       ;;
   esac
 done
 
 # Verify arguments
 if [ -z "$LOC" ]; then
-  abort 1 "No location specified"
+  abort 1 "Required database location not specified"
 fi
 
 # Make LOC absolute
@@ -137,26 +173,40 @@ fi
 
 # Check location
 if [ ! -e "$LOC" ]; then
-  abort 1 "Location specified does not exist: $LOC"
+  abort 1 "Database location specified does not exist: $LOC"
 fi
 if [ ! -d "$LOC" ]; then
-  abort 1 "Location is not a directory: $LOC"
+  abort 1 "Database location is not a directory: $LOC"
 fi
 
+# Locate and check data text files
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
 if [ ! -e "$DATA_TRIPLES" ]; then
-  abort 1 "No triples text file found in location, please run the tdbloader2data script first"
+  abort 1 "No triples text file found in database location, please run the tdbloader2data script first"
 fi
 if [ ! -e "$DATA_QUADS" ]; then
-  abort 1 "No quads text file found in location, please run the tdbloader2data script first"
+  abort 1 "No quads text file found in database location, please run the tdbloader2data script first"
 fi
 
 debug "Data text files are $DATA_TRIPLES and $DATA_QUADS"
 
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+# Prepare sort arguments
+if [ -z "$SORT_ARGS" ]; then
+    SORT_ARGS="--buffer-size=50%"
+
+    ##--parallel is not always available.
+    # Temporarily disable exit on error while we check for --parallel support
+    set +e
+    sort --parallel=3 < /dev/null 2>/dev/null
+    if [ $? = 0 ]; then
+    	SORT_ARGS="$SORT_ARGS --parallel=3"
+    fi
+    set -e
+fi
+
+# Prepare JVM arguments
 JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 debug "JVM Arguments are $JVM_ARGS"
 
@@ -201,7 +251,7 @@ fi
 SORT_TEMP_DIR=$(makeAbsolute "$SORT_TEMP_DIR")
 debug "Sort Temp Directory: $SORT_TEMP_DIR"
 SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}"))
-debug "Sort Temp Directory ${DIR} is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)"
+debug "Sort Temp Directory is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)"
 
 if [ "${SORT_DRIVE_INFO[2]}" -le 10 ]; then
   warn "-----"
@@ -288,7 +338,7 @@ generate_index()
     info "Build $IDX"
     rm -f "$LOC/$IDX.dat"
     rm -f "$LOC/$IDX.idn"
-    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
+    java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
     info "Build $IDX Completed"
 
     # Remove work file unless keeping


[07/18] jena git commit: Further improvements to tdbloader2 scripts (JENA-977)

Posted by rv...@apache.org.
Further improvements to tdbloader2 scripts (JENA-977)

- Auto-detection of JENA_HOME now exports it so it is visible to the
  child scripts
- Force making database directory path absolute and resolving any
  symbolic links in the path
- Additional checks in tdbloader2index to warn if sort is going to be
  external and it may run out of temporary disk space for the sort


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/a7ac2797
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/a7ac2797
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/a7ac2797

Branch: refs/heads/master
Commit: a7ac2797856bf60476204b8997b5a5bf4cfa15c5
Parents: c55c1f7
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 13:44:29 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 13:44:29 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2       |   5 +-
 apache-jena/bin/tdbloader2common | 106 ++++++++++++++++++++++++++++++++++
 apache-jena/bin/tdbloader2data   |   9 ++-
 apache-jena/bin/tdbloader2index  |  39 +++++++++++--
 4 files changed, 152 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/a7ac2797/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index b7a1af2..310ee66 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -78,10 +78,11 @@ EOF
 
 # If JENA_HOME is empty
 if [ -z "$JENA_HOME" ];	then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
   SCRIPT="$0"
   # Catch common issue: script has been symlinked
 	if [ -L "$SCRIPT" ]; then
-		SCRIPT="$(readlink "$0")"
+		SCRIPT="$(readlink -f "$0")"
 		# If link is relative
 		case "$SCRIPT" in
    			/*) ;; # fine
@@ -91,6 +92,8 @@ if [ -z "$JENA_HOME" ];	then
 
   # Work out root from script location
   JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
 fi
 source "${JENA_HOME}/bin/tdbloader2common"
 

http://git-wip-us.apache.org/repos/asf/jena/blob/a7ac2797/apache-jena/bin/tdbloader2common
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common
index beae115..2830545 100644
--- a/apache-jena/bin/tdbloader2common
+++ b/apache-jena/bin/tdbloader2common
@@ -79,6 +79,112 @@ function getDriveInfo() {
   echo ${INFO[@]}
 }
 
+function getFreeMem() {
+  set +e
+  local FREE_MEM=-1
+  case "$OSTYPE" in
+    darwin*)
+      # Have to get this from top
+      FREE_MEM=$(top -l 1 | grep PhysMem | awk '{print $6}')
+      FREE_MEM=${FREE_MEM%M}
+      FREE_MEM=$(($FREE_MEM * 1024 * 1024))
+      ;;
+    *)
+      # Try to use free if available
+      which free >/dev/null 2>&1
+      if [ $? -eq 0 ]; then
+        # Have free available
+        FREE_MEM=$(free -b)
+      fi
+      ;;
+  esac
+  set -e
+
+  echo "$FREE_MEM"
+}
+
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|*BSB*|*BSD|BSD*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
+function resolveLinks() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    NAME=$(resolveLink "$NAME")
+  elif [[ "$NAME" == *"/" ]]; then
+    # If the path ends in a / test -L will report false even
+    # if the path is actually a symbolic link
+    # So check if the name without the trailing / is a link and if
+    # so resolve it
+    if [ -L "${NAME%/}" ]; then
+      NAME=${NAME%/}
+      NAME=$(resolveLink "$NAME")
+    fi
+  fi
+  echo "$NAME"
+}
+
+function makeAbsolute() {
+  local NAME=$1
+
+  # Follow links
+  NAME=$(resolveLinks "$NAME")
+
+  # Put back trailing slash
+  # Do this before we make the path absolute or we'll absolutize wrong
+  if [ -d "$NAME" ]; then
+    if [[ "$NAME" != *"/" ]]; then
+      NAME="${NAME}/"
+    fi
+  fi
+
+  if [[ "$NAME" != "/"* ]]; then
+    # Now make absolute
+    case "$OSTYPE" in
+      darwin*|*BSB*|*BSD|BSD*)
+        # BSD style readlink does not support the -f for canonicalization
+        # so have to do this via cd, pwd and basename
+        local FILENAME=$(basename "$NAME")
+        NAME=$(cd $(dirname "$NAME"); pwd)
+        NAME="$NAME/$FILENAME"
+        ;;
+      *)
+        # Otherwise assume standard GNU readlink
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+
+    # Put back trailing slash
+    if [ -d "$NAME" ]; then
+      if [[ "$NAME" != *"/" ]]; then
+        NAME="${NAME}/"
+      fi
+    fi
+  fi
+
+  echo "$NAME"
+}
+
 #DATE="+%Y-%m-%dT%H:%M:%S%:z"
 DATE="+%H:%M:%S"
 

http://git-wip-us.apache.org/repos/asf/jena/blob/a7ac2797/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index 6904c83..2c48a50 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -134,6 +134,13 @@ if [ $# = 0 ]; then
   abort 1 "No data files specified"
 fi
 
+# Make LOC absolute
+ABS_LOC=$(makeAbsolute "$LOC")
+if [ "$ABS_LOC" != "$LOC" ]; then
+  LOC="$ABS_LOC"
+  debug "Absolute database location is $LOC"
+fi
+
 # Look for any index and data files in the directory.
 # Skip a possible configuration file
 if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
@@ -154,8 +161,6 @@ if [ ! -d "$LOC" ]; then
   abort 1 "Location is not a directory: $LOC"
 fi
 
-# TODO Make LOC absolute
-
 FILES="$@"
 
 ## JVM Arguments

http://git-wip-us.apache.org/repos/asf/jena/blob/a7ac2797/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 5de8d6a..15a5832 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -127,6 +127,15 @@ done
 if [ -z "$LOC" ]; then
   abort 1 "No location specified"
 fi
+
+# Make LOC absolute
+ABS_LOC=$(makeAbsolute "$LOC")
+if [ "$ABS_LOC" != "$LOC" ]; then
+  LOC="$ABS_LOC"
+  debug "Absolute database location is $LOC"
+fi
+
+# Check location
 if [ ! -e "$LOC" ]; then
   abort 1 "Location specified does not exist: $LOC"
 fi
@@ -134,8 +143,6 @@ if [ ! -d "$LOC" ]; then
   abort 1 "Location is not a directory: $LOC"
 fi
 
-# TODO Make LOC absolute
-
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
@@ -146,7 +153,7 @@ if [ ! -e "$DATA_QUADS" ]; then
   abort 1 "No quads text file found in location, please run the tdbloader2data script first"
 fi
 
-debug "Data files are $DATA_TRIPLES and $DATA_QUADS"
+debug "Data text files are $DATA_TRIPLES and $DATA_QUADS"
 
 ##--parallel is not always available.
 SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
@@ -160,7 +167,7 @@ fi
 debug "Jena Classpath is $JENA_CP"
 
 # ---- Index intermediates
-## All files are writtern S P O / G S P O columns per row but in different sort orders.
+## All files are written S P O / G S P O columns per row but in different sort orders.
 info "Index Building Phase"
 
 # Check whether Pipe Viewer is available
@@ -191,6 +198,7 @@ else
   # Using the system temp directory
   SORT_TEMP_DIR="$TMPDIR"
 fi
+SORT_TEMP_DIR=$(makeAbsolute "$SORT_TEMP_DIR")
 debug "Sort Temp Directory: $SORT_TEMP_DIR"
 SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}"))
 debug "Sort Temp Directory ${DIR} is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)"
@@ -221,13 +229,36 @@ generate_index()
     debug "Size of data to be sorted is $SIZE bytes"
 
     # Verify that we have enough space to sort the data
+
+    # Firstly check that the output disk has sufficient space
     local WORK_DRIVE_INFO=($(getDriveInfo "${WORK}"))
     if [ "${SIZE}" -ge "${WORK_DRIVE_INFO[3]}" ]; then
+      # If there is insufficient disk space then we can abort now
       abort 1 "Insufficient free space on database drive ${WORK_DRIVE_INFO[0]}, there are ${WORK_DRIVE_INFO[3]} bytes free but ${SIZE} bytes are required"
     else
       debug "Sufficient free space on database drive ${WORK_DRIVE_INFO[0]} to attempt sorting data file ${DATA} (${SIZE} bytes required from ${WORK_DRIVE_INFO[3]} bytes free)"
     fi
 
+    # Secondly check if there is enough space to sort in-memory or if sort may need to do an external sort
+    # We only issue warnings when the sort is likely to be external because there are various factors
+    # such as virtual memory and OS file caching that may complicate this
+    FREE_MEM=$(getFreeMem)
+    if [ "$FREE_MEM" -ge 0 ]; then
+      if [ "$SIZE" -ge "$FREE_MEM" ]; then
+        warn "Insufficient free memory to sort data in-memory, sort will need to perform an external sort using Temp Directory ${SORT_TEMP_DIR}"
+
+        # Check for disk space on temporary disk
+        SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}"))
+        if [ "$SIZE" -ge "${SORT_DRIVE_INFO[3]}" ]; then
+          warn "There may be insufficient for sort to perform an external sort using Tempo Directory ${SORT_TEMP_DIR} (${SIZE} bytes required but only ${SORT_DRIVE_INFO[3]} bytes free)"
+        fi
+      else
+        debug "Should be sufficient free memory ($FREE_MEM bytes) for sort to be fully in-memory"
+      fi
+    else
+      warn "Unable to determine free memory on your OS, can't check whether sort will be in-memory or external sort using Temp Directory ${SORT_TEMP_DIR}"
+    fi
+
     # Sort the input data
     info "Sort $IDX"
     debug "Sorting $DATA into work file $WORK"


[12/18] jena git commit: Minor clean up of OS type testing (JENA-977)

Posted by rv...@apache.org.
Minor clean up of OS type testing (JENA-977)


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/c25ad5d8
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/c25ad5d8
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/c25ad5d8

Branch: refs/heads/master
Commit: c25ad5d800779ca829a7bde581f98d62c417719b
Parents: d9ff26e
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 16:04:42 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 16:04:42 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2       | 2 +-
 apache-jena/bin/tdbloader2common | 4 ++--
 apache-jena/bin/tdbloader2data   | 2 +-
 apache-jena/bin/tdbloader2index  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/c25ad5d8/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index d8b375c..12168fa 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -103,7 +103,7 @@ function resolveLink() {
 
   if [ -L "$NAME" ]; then
     case "$OSTYPE" in
-      darwin*|*BSB*|*BSD|BSD*)
+      darwin*|bsd*)
         # BSD style readlink behaves differently to GNU readlink
         # Have to manually follow links
         while [ -L "$NAME" ]; do

http://git-wip-us.apache.org/repos/asf/jena/blob/c25ad5d8/apache-jena/bin/tdbloader2common
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common
index 2c116ad..2c73f7f 100644
--- a/apache-jena/bin/tdbloader2common
+++ b/apache-jena/bin/tdbloader2common
@@ -112,7 +112,7 @@ function resolveLink() {
 
   if [ -L "$NAME" ]; then
     case "$OSTYPE" in
-      darwin*|*BSB*|*BSD|BSD*)
+      darwin*|bsd*)
         # BSD style readlink behaves differently to GNU readlink
         # Have to manually follow links
         while [ -L "$NAME" ]; do
@@ -165,7 +165,7 @@ function makeAbsolute() {
   if [[ "$NAME" != "/"* ]]; then
     # Now make absolute
     case "$OSTYPE" in
-      darwin*|*BSB*|*BSD|BSD*)
+      darwin*|bsd*)
         # BSD style readlink does not support the -f for canonicalization
         # so have to do this via cd, pwd and basename
         local FILENAME=$(basename "$NAME")

http://git-wip-us.apache.org/repos/asf/jena/blob/c25ad5d8/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index 2f8ffa7..d0ca066 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -23,7 +23,7 @@ function resolveLink() {
 
   if [ -L "$NAME" ]; then
     case "$OSTYPE" in
-      darwin*|*BSB*|*BSD|BSD*)
+      darwin*|bsd*)
         # BSD style readlink behaves differently to GNU readlink
         # Have to manually follow links
         while [ -L "$NAME" ]; do

http://git-wip-us.apache.org/repos/asf/jena/blob/c25ad5d8/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 78f82b5..4d50f93 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -23,7 +23,7 @@ function resolveLink() {
 
   if [ -L "$NAME" ]; then
     case "$OSTYPE" in
-      darwin*|*BSB*|*BSD|BSD*)
+      darwin*|bsd*)
         # BSD style readlink behaves differently to GNU readlink
         # Have to manually follow links
         while [ -L "$NAME" ]; do


[15/18] jena git commit: Update template.bin to improve JENA_HOME resolution (JENA-977)

Posted by rv...@apache.org.
Update template.bin to improve JENA_HOME resolution (JENA-977)

Applies the JENA_HOME resolution fixes from the tdbloader2 script
changes to the template.bin template that is used to generate the
various Jena command scripts


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/40fe2cd9
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/40fe2cd9
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/40fe2cd9

Branch: refs/heads/master
Commit: 40fe2cd93861e99e53184f76383323565407e372
Parents: 9b07039
Author: Rob Vesse <rv...@apache.org>
Authored: Wed Jul 1 10:32:45 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Wed Jul 1 10:32:45 2015 +0100

----------------------------------------------------------------------
 apache-jena/README       |  9 +++--
 apache-jena/cmd-maker    | 12 ++-----
 apache-jena/template.bin | 77 ++++++++++++++++++++++++++++++++++---------
 3 files changed, 69 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/40fe2cd9/apache-jena/README
----------------------------------------------------------------------
diff --git a/apache-jena/README b/apache-jena/README
index 5f80997..e283a8c 100644
--- a/apache-jena/README
+++ b/apache-jena/README
@@ -27,20 +27,23 @@
   line tools.  The scripts can be copied to a convenient place on the
   command path.
 
-  To use the Jena tools from the command line you need to set the JENAROOT
+  To use the Jena tools from the command line you need to set the JENA_HOME
   environment variable to point to the location where you have 
   unzipped the Jena distribution:
 
   Windows:
-    set JENA_HOME=\path\to\apache-jena-2.7.5
+    set JENA_HOME=\path\to\apache-jena-x.y.z
     bat\sparql.bat --version    
 
   Linux:
     The command scripts automatically set JENA_HOME but if you want
     to switch to a different version fro the same scripts:
 
-    export JENA_HOME=/path/to/apache-jena-2.7.5
+    export JENA_HOME=/path/to/apache-jena-x.y.z
     bin/sparql --version    
+
+  Where x.y.z is the version of the Jena command line tools you have 
+  downloaded
     
   If you receive a class not found exception when trying to run one of the 
   scripts then you may have set JENA_HOME incorrectly. A quick and easy way

http://git-wip-us.apache.org/repos/asf/jena/blob/40fe2cd9/apache-jena/cmd-maker
----------------------------------------------------------------------
diff --git a/apache-jena/cmd-maker b/apache-jena/cmd-maker
index eb2a4e4..71e297c 100755
--- a/apache-jena/cmd-maker
+++ b/apache-jena/cmd-maker
@@ -19,14 +19,8 @@
 # Not tdbloader2.
 ## tdbloader2 is slightly different.
 ##   The main program is not a java program
-##   It is split into tdbloader2 and tdbloader2worker
-##   tdbloader2worker (the mainporgram) is the same in 
-##   developement and here. tdbloader2 is like the script 
-##   wrappers except it execs tdbloader2worker, not
-##   java.  It needs manually updating.
-##   Replace the java exec with:
-##     export JENA_CP
-##     exec tdbloader2worker "$@"
+##   It is split into several scripts that leverage a mixture of 
+##   POSIX and java tools and should be maintained separately
 
 CMDS=$(cat <<EOF
 jena.rdfcat
@@ -86,5 +80,3 @@ do
     make_bat $cmd
 done
 
-## Specials
-cp ../jena-tdb/bin/tdbloader2worker bin/tdbloader2worker

http://git-wip-us.apache.org/repos/asf/jena/blob/40fe2cd9/apache-jena/template.bin
----------------------------------------------------------------------
diff --git a/apache-jena/template.bin b/apache-jena/template.bin
index aad767a..bbc91fb 100644
--- a/apache-jena/template.bin
+++ b/apache-jena/template.bin
@@ -1,23 +1,68 @@
 #!/bin/sh
 ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
+function resolveLink() {
+  local NAME=$1
+
+  if [ -L "$NAME" ]; then
+    case "$OSTYPE" in
+      darwin*|bsd*)
+        # BSD style readlink behaves differently to GNU readlink
+        # Have to manually follow links
+        while [ -L "$NAME" ]; do
+          NAME=$(readlink "$NAME")
+        done
+        ;;
+      *)
+        # Assuming standard GNU readlink with -f for
+        # canonicalize and follow
+        NAME=$(readlink -f "$NAME")
+        ;;
+    esac
+  fi
+
+  echo "$NAME"
+}
+
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ]
-	then
-    SCRIPT="$0"
-    # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]
-		then
-		SCRIPT="$(readlink "$0")"
-		# If link is relative
-		case "$SCRIPT" in
-   			/*) ;; # fine
-			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
-		esac
-	fi
-
-    # Work out root from script location
-    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+if [ -z "$JENA_HOME" ]; then
+  echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
+  SCRIPT="$0"
+  # Catch common issue: script has been symlinked
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
+      /*)
+        # Already absolute
+        ;;
+      *)
+        # Relative, make absolute
+        SCRIPT=$( dirname "$0" )/$SCRIPT
+        ;;
+    esac
+  fi
+
+  # Work out root from script location
+  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
+  export JENA_HOME
+  echo "Located JENA_HOME at ${JENA_HOME}"
+fi
+# If JENA_HOME is a symbolic link need to resolve
+if [ -L "${JENA_HOME}" ]; then
+  JENA_HOME=$(resolveLink "$JENA_HOME")
+  # If link is relative
+  case "$JENA_HOME" in
+    /*)
+      # Already absolute
+      ;;
+    *)
+      # Relative, make absolute
+      JENA_HOME=$(dirname "$JENA_HOME")
+      ;;
+  esac
+  export JENA_HOME
+  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
 fi
 
 # ---- Setup


[03/18] jena git commit: Further refactoring of tdbloader2 scripts (JENA-977)

Posted by rv...@apache.org.
Further refactoring of tdbloader2 scripts (JENA-977)

- Proper usage summaries in all scripts
- -k/--keep-work option instead of hidden environment variable
  for keeping work
- Short forms for all options


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/a96b0164
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/a96b0164
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/a96b0164

Branch: refs/heads/master
Commit: a96b0164c43142791ac030e5332b3f54df6fb4ba
Parents: 7b61a14
Author: Rob Vesse <rv...@apache.org>
Authored: Fri Jun 26 12:25:57 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Fri Jun 26 16:30:53 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2      |  72 ++++++++++++++++------
 apache-jena/bin/tdbloader2data  |  82 ++++++++++++++++++++-----
 apache-jena/bin/tdbloader2index | 116 +++++++++++++++++++++++++----------
 3 files changed, 204 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/a96b0164/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 34ee029..9ff2727 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -17,24 +17,53 @@
 
 function printUsage() {
   cat << EOF
-Usage: tdbloader2 <Options> <Data>
+tdbloader2 - TDB Bulk Loader
 
-Options are as follows:
+Usage: tdbloader2 --loc <Directory> [Options] <Data> ...
 
+Bulk loader for TDB which manipulates the data files directly and so
+can only be used to create new databases.  This command relies on
+POSIX utilities so will only work on POSIX operating systems.
+
+If you wish to bulk load to an existing database please use tdbloader
+instead.
+
+Required options are as follows:
+
+  -l <DatabaseDirectory>
+  --loc <DatabaseDirectory>
+    Sets the location in which the database should be created.
+
+    This location must be a directory and must be empty, if a
+    non-existent path is specified it will be created as a new
+    directory.
+
+  <Data>
+    Specifies the path to one/more data files to load
+
+Common additional options are as follows:
+
+  -h
   --help
     Prints this help summary and exits
 
-  --loc <DatabaseDirectory>
-    Sets the location in which the database should be created
+Advanced additional options are as follows:
 
+  -k
+  --keep-work
+    Keeps the temporary work files around after they are no longer
+    needed.  May be useful for debugging.
+
+  -p <Phase>
   --phase <Phase>
     Sets the phase of the build to run, supported values are:
 
-      all    Full bulk load
-      data   Data phase only
-      index  Index phase only, requires the data phase to previously have been run
+      all      Full bulk load
+      data     Data phase only
+      index    Index phase only, requires the data phase to
+               previously have been run
 
-    When not specified defaults to all
+    When no phase is specified it defaults to all
 
 EOF
 }
@@ -86,12 +115,18 @@ export SORT_ARGS
 # Process arguments
 LOC=
 PHASE=
+KEEP_WORK=0
 
 while [ $# -gt 0 ]
 do
   ARG=$1
   case "$ARG" in
-    --loc|-loc)
+    -k|--keep-work)
+      # Keep work files
+      shift
+      KEEP_WORK=1
+      ;;
+    -l|--loc|-loc)
       # Location space separated
       shift
       LOC="$1"
@@ -102,13 +137,13 @@ do
       LOC=${ARG/-*loc=/}
       shift
       ;;
-    --phase)
+    -p|--phase)
       # Phase space separated
       shift
       PHASE="$1"
       shift
       ;;
-    --help)
+    -h|--help)
       # Help
       printUsage
       exit 0
@@ -123,9 +158,10 @@ done
 if [ -z "$PHASE" ]; then
   PHASE="all"
 fi
-
-#echo "Location is '$LOC'"
-#echo "Phase is '$PHASE'"
+COMMON_ARGS=
+if [ $KEEP_WORK = 0 ]; then
+  COMMON_ARGS="--keep-work"
+fi
 
 log() { echo " $(date $DATE)" "$@" ; }
 
@@ -138,14 +174,14 @@ TIME1="$(date +%s)"
 
 case "$PHASE" in
   all)
-    exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@"
-    exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC"
+    exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC"
     ;;
   data)
-    exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@"
+    exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
     ;;
   index)
-    exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC"
+    exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC"
     ;;
   *)
     echo "Unrecognized phase $PHASE" 1>&2

http://git-wip-us.apache.org/repos/asf/jena/blob/a96b0164/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index 90200e4..5aceb27 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -18,6 +18,48 @@
 
 # The environment for this sub-script is setup by "tdbloader2"
 
+function printUsage() {
+  cat << EOF
+tdbloader2data - TDB Bulk Loader - Data Phase
+
+Usage tdbloader2data --loc <Directory> [Options] <Data> ...
+
+Bulk Loader for TDB which generates the Node Table.  This command
+relies on POSIX utilities so will only work on POSIX operating
+systems.
+
+This command can only be used to create new database. If you wish to
+bulk load to an existing database please use tdbloader instead.
+
+Required options are as follows:
+
+  -l <DatabaseDirectory>
+  --loc <DatabaseDirectory>
+    Sets the location in which the database should be created.
+
+    This location must be a directory and must be empty, if a
+    non-existent path is specified it will be created as a new
+    directory.
+
+  <Data>
+    Specifies the path to one/more data files to load
+
+Common additional options are as follows:
+
+  -h
+  --help
+    Prints this help summary and exits
+
+Advanced additional options are as follows:
+
+  -k
+  --keep-work
+    Keeps the temporary work files around after they are no longer
+    needed.  May be useful for debugging.
+
+EOF
+}
+
 # Exit on error.
 set -e
 
@@ -29,24 +71,24 @@ log() { echo " $(date $DATE)" "$@" ; }
 #DATE="+%Y-%m-%dT%H:%M:%S%:z"
 DATE="+%H:%M:%S"
 
-## JVM Arguments
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-
-# Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
-fi
-
-USAGE="Usage: tdbloader2data --loc location datafile ..."
 PKG=org.apache.jena.tdb.store.bulkloader2
 
+# Process Arguments
+LOC=
+KEEP_WORK=0
+
 while [ $# -gt 0 ]
 do
   ARG=$1
   case "$ARG" in
-    --loc|-loc)
+    -k|--keep-work)
+      # Keep work files
+      # This option is actually not used by this script but may be passed in
+      # by the parent tdbloader2 script
+      shift
+      KEEP_WORK=1
+      ;;
+    -l|--loc|-loc)
       # Location space separated
       shift
       LOC="$1"
@@ -57,8 +99,8 @@ do
       LOC=${ARG/-*loc=/}
       shift
       ;;
-    --help)
-      echo $USAGE
+    -h|--help)
+      printUsage
       exit 0
       ;;
     *)
@@ -91,8 +133,16 @@ fi
 if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
 
 FILES="$@"
-## Stdin?
-KEEPWORKFILES="${KEEPWORKFILES:-}"
+
+## JVM Arguments
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
+if [ -z "$JENA_CP" ]
+then
+    echo "Classpath not provided : set JENA_CP" 1>&2
+    exit 1
+fi
 
 # ---- Data loading phase
 log "Data Load Phase"

http://git-wip-us.apache.org/repos/asf/jena/blob/a96b0164/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 5624854..2730af1 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -18,6 +18,45 @@
 
 # The environment for this sub-script is setup by "tdbloader2"
 
+function printUsage() {
+  cat << EOF
+tdbloader2index - TDB Bulk Loader - Index Phase
+
+Usage: tdbloader2index --loc <Directory> [Options]
+
+Bulk Loader for TDB which generates the Index files based upon the
+temporary data files generated by tdbloader2data.  This command relies
+on POSIX utilities so will only work on POSIX operating systems.
+
+This command can only be used to create new database. If you wish to
+bulk load to an existing database please use tdbloader instead.
+
+Required options are as follows:
+
+  -l <DatabaseDirectory>
+  --loc <DatabaseDirectory>
+    Sets the location in which the database should be created.
+
+    This location must be a directory and must be empty, if a
+    non-existent path is specified it will be created as a new
+    directory.
+
+Common additional options are as follows:
+
+  -h
+  --help
+    Prints this help summary and exits
+
+Advanced additional options are as follows:
+
+  -k
+  --keep-work
+    Keeps the temporary work files around after they are no longer
+    needed.  May be useful for debugging.
+
+EOF
+}
+
 # Exit on error.
 set -e
 
@@ -30,25 +69,22 @@ TMP=$$
 #DATE="+%Y-%m-%dT%H:%M:%S%:z"
 DATE="+%H:%M:%S"
 
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-
-# Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
-fi
-
-USAGE="Usage: tdbloader2index --loc location"
 PKG=org.apache.jena.tdb.store.bulkloader2
 
+# Process Arguments
+LOC=
+KEEP_WORK=0
+
 while [ $# -gt 0 ]
 do
   ARG=$1
   case "$ARG" in
-    --loc|-loc)
+    -k|--keep-work)
+      # Keep work files
+      shift
+      KEEP_WORK=1
+      ;;
+    -l|--loc|-loc)
       # Location space separated
       shift
       LOC="$1"
@@ -59,8 +95,8 @@ do
       LOC=${ARG/-*loc=/}
       shift
       ;;
-    --help)
-      echo $USAGE
+    -h|--help)
+      printUsage
       exit 0
       ;;
     *)
@@ -75,8 +111,6 @@ if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi
 if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi
 if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi
 
-KEEPWORKFILES="${KEEPWORKFILES:-}"
-
 DATA_TRIPLES="$LOC/data-triples.tmp"
 DATA_QUADS="$LOC/data-quads.tmp"
 
@@ -89,14 +123,29 @@ if [ ! -e "$DATA_QUADS" ]; then
   exit 1
 fi
 
+##--parallel is not always available.
+SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
+if [ -z "$JENA_CP" ]
+then
+    echo "Classpath not provided : set JENA_CP" 1>&2
+    exit 1
+fi
+
 # ---- Index intermediates
 ## All files are writtern S P O / G S P O columns per row but in different sort orders.
 log "Index Building Phase"
 
+# Check whether Pipe Viewer is available
+# Needs to temporarily disable exit on error
+set +e
 which pv >/dev/null 2>&1
 HAS_PV=$?
+set -e
 
-process_rows()
+generate_index()
 {
     local KEYS="$1"
     local DATA="$2"
@@ -109,6 +158,8 @@ process_rows()
 	  fi
 
     log "Creating Index $IDX"
+
+    # Sort the input data
     log "  Sort $IDX"
     if [ $HAS_PV = 0 ]; then
       # Use pv (pipe viewer) to monitor sort progress
@@ -120,14 +171,16 @@ process_rows()
       sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
     fi
     log "  Sort $IDX Completed"
+
+    # Build into an index
     log "  Build $IDX"
     rm -f "$LOC/$IDX.dat"
     rm -f "$LOC/$IDX.idn"
     java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
     log "  Build $IDX Completed"
-    # Remove intermediary file.
-    if [ "$KEEPWORKFILES" != "yes" ] 
-    then
+
+    # Remove work file unless keeping
+    if [ $KEEP_WORK = 1 ]; then
 	    rm "$WORK"
     fi
 }
@@ -137,28 +190,27 @@ K2="-k 2,2"
 K3="-k 3,3"
 K4="-k 4,4"
 
-process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
+generate_index "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
 
-process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
+generate_index "$K2 $K3 $K1" "$DATA_TRIPLES" POS
 
-process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
+generate_index "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
 
-process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
+generate_index "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
 
-process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
+generate_index "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
 
-process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
+generate_index "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
 
-process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
+generate_index "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
 
-process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
+generate_index "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
 
-process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
+generate_index "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
 
 log "Index Building Phase Completed"
 
 # ---- Clean up.
-if [ "$KEEPWORKFILES" != "yes" ] 
-then
+if [ $KEEP_WORK = 1 ]; then
     rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
 fi


[17/18] jena git commit: Merge branch 'JENA-977'

Posted by rv...@apache.org.
Merge branch 'JENA-977'

This closes #84


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/2dc063f3
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/2dc063f3
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/2dc063f3

Branch: refs/heads/master
Commit: 2dc063f3b8496538e84ee55b3e3189881ab84072
Parents: 459f069 ea55883
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jul 7 10:28:43 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jul 7 10:28:43 2015 +0100

----------------------------------------------------------------------
 apache-jena/README               |   9 +-
 apache-jena/bin/arq              |  77 ++++--
 apache-jena/bin/infer            |  77 ++++--
 apache-jena/bin/iri              |  77 ++++--
 apache-jena/bin/juuid            |  77 ++++--
 apache-jena/bin/nquads           |  77 ++++--
 apache-jena/bin/ntriples         |  77 ++++--
 apache-jena/bin/qparse           |  77 ++++--
 apache-jena/bin/rdfcat           |  77 ++++--
 apache-jena/bin/rdfcompare       |  77 ++++--
 apache-jena/bin/rdfcopy          |  77 ++++--
 apache-jena/bin/rdfparse         |  77 ++++--
 apache-jena/bin/rdfxml           |  77 ++++--
 apache-jena/bin/riot             |  77 ++++--
 apache-jena/bin/rset             |  77 ++++--
 apache-jena/bin/rsparql          |  77 ++++--
 apache-jena/bin/rupdate          |  77 ++++--
 apache-jena/bin/schemagen        |  77 ++++--
 apache-jena/bin/sparql           |  77 ++++--
 apache-jena/bin/tdbbackup        |  77 ++++--
 apache-jena/bin/tdbdump          |  77 ++++--
 apache-jena/bin/tdbloader        |  77 ++++--
 apache-jena/bin/tdbloader2       | 331 ++++++++++++++++++++++++--
 apache-jena/bin/tdbloader2common | 195 ++++++++++++++++
 apache-jena/bin/tdbloader2data   | 288 +++++++++++++++++++++++
 apache-jena/bin/tdbloader2index  | 426 ++++++++++++++++++++++++++++++++++
 apache-jena/bin/tdbloader2worker | 154 ------------
 apache-jena/bin/tdbquery         |  77 ++++--
 apache-jena/bin/tdbstats         |  77 ++++--
 apache-jena/bin/tdbupdate        |  77 ++++--
 apache-jena/bin/trig             |  77 ++++--
 apache-jena/bin/turtle           |  77 ++++--
 apache-jena/bin/uparse           |  77 ++++--
 apache-jena/bin/update           |  77 ++++--
 apache-jena/bin/utf8             |  77 ++++--
 apache-jena/bin/wwwdec           |  77 ++++--
 apache-jena/bin/wwwenc           |  77 ++++--
 apache-jena/cmd-maker            |  12 +-
 apache-jena/pom.xml              |  78 +++++--
 apache-jena/template.bin         |  77 ++++--
 jena-tdb/bin/tdbloader2          |  47 ----
 jena-tdb/bin/tdbloader2worker    | 154 ------------
 42 files changed, 3234 insertions(+), 924 deletions(-)
----------------------------------------------------------------------



[05/18] jena git commit: Fix script usage in dev environment (JENA-977)

Posted by rv...@apache.org.
Fix script usage in dev environment (JENA-977)

This commit enhances the distribution module to make it much easier to
use in dev environments.  The dependency plugin is used with the
copy-dependencies goal to produce the lib/ directory during a package
phase and then clean plugin is configured to clean the lib/ directory
during a clean.  This means that developers can now set JENA_HOME to the
distribution module directory in their working copy and provided they
have done a mvn package all the scripts should work.

This also allows the temporary hacks in the new tdbloader2 scripts to be
removed so these scripts now run against Jena 3 libraries and don't need
the path to the new scripts to be hacked.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/3c59213e
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/3c59213e
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/3c59213e

Branch: refs/heads/master
Commit: 3c59213e273711836628d9d030df23dac142ee1b
Parents: 7770596
Author: Rob Vesse <rv...@apache.org>
Authored: Mon Jun 29 13:12:03 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Mon Jun 29 13:12:03 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2      | 12 +++---
 apache-jena/bin/tdbloader2data  |  2 -
 apache-jena/bin/tdbloader2index |  2 -
 apache-jena/pom.xml             | 78 +++++++++++++++++++++++++++---------
 4 files changed, 64 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/3c59213e/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 9508031..55a0faf 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -198,19 +198,17 @@ DATE="+%H:%M:%S"
 log "-- TDB Bulk Loader Start"
 TIME1="$(date +%s)"
 
-TOOL_DIR=$JENA_HOME/bin/
-# DEV - Following is just for debugging
-TOOL_DIR=
+TOOL_DIR="$JENA_HOME/bin"
 case "$PHASE" in
   all)
-    exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
-    exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
     ;;
   data)
-    exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
     ;;
   index)
-    exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
     ;;
   *)
     echo "Unrecognized phase $PHASE" 1>&2

http://git-wip-us.apache.org/repos/asf/jena/blob/3c59213e/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index efb590a..eaf9069 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -83,8 +83,6 @@ function debug() {
 DATE="+%H:%M:%S"
 
 PKG=org.apache.jena.tdb.store.bulkloader2
-#DEV - Allows use against Jena 2 API
-PKG=com.hp.hpl.jena.tdb.store.bulkloader2
 
 # Process Arguments
 LOC=

http://git-wip-us.apache.org/repos/asf/jena/blob/3c59213e/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 971b824..f506df9 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -90,8 +90,6 @@ function getSize() {
 DATE="+%H:%M:%S"
 
 PKG=org.apache.jena.tdb.store.bulkloader2
-#DEV - Allows use against Jena 2 API
-PKG=com.hp.hpl.jena.tdb.store.bulkloader2
 
 # Process Arguments
 LOC=

http://git-wip-us.apache.org/repos/asf/jena/blob/3c59213e/apache-jena/pom.xml
----------------------------------------------------------------------
diff --git a/apache-jena/pom.xml b/apache-jena/pom.xml
index b718501..cd91792 100644
--- a/apache-jena/pom.xml
+++ b/apache-jena/pom.xml
@@ -16,7 +16,8 @@
    limitations under the License.
 -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
   <!-- Build the Jena download file.
        Assumes it is run as part of the module build (not run standalone)
@@ -39,7 +40,7 @@
     <artifactId>jena-parent</artifactId>
     <version>14-SNAPSHOT</version>
     <relativePath>../jena-parent</relativePath>
-  </parent> 
+  </parent>
 
   <organization>
     <name>Apache Jena</name>
@@ -123,23 +124,62 @@
     <plugins>
       <plugin>
         <artifactId>maven-assembly-plugin</artifactId>
-	<configuration>
-	  <tarLongFileMode>gnu</tarLongFileMode>    
-	</configuration>
-	<executions>
-	  <execution>
-	    <id>create-jena-download</id>
-	    <phase>package</phase>
-	    <goals><goal>single</goal></goals>
-	    <configuration>
-	      <appendAssemblyId>false</appendAssemblyId>
-	      <descriptors>
-		<descriptor>assembly-jena-zip.xml</descriptor>
-	      </descriptors>
-	      <tarLongFileFormat>gnu</tarLongFileFormat>
-	    </configuration>
-	  </execution>
-	</executions>
+        <configuration>
+          <tarLongFileMode>gnu</tarLongFileMode>
+        </configuration>
+        <executions>
+          <execution>
+            <id>create-jena-download</id>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+            <configuration>
+              <appendAssemblyId>false</appendAssemblyId>
+              <descriptors>
+                <descriptor>assembly-jena-zip.xml</descriptor>
+              </descriptors>
+              <tarLongFileFormat>gnu</tarLongFileFormat>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      
+      <!-- 
+      Use the dependencies plugin to copy the dependencies into the lib/ directory which makes the scripts work in dev enivronments 
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-libs-for-scripts</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <overWriteSnapshots>true</overWriteSnapshots>
+              <includeScope>runtime</includeScope>
+              <includeTypes>jar</includeTypes>
+              <outputDirectory>lib/</outputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- 
+      As we copy dependencies into lib/ to make scripts work in dev environments we also need to clean up that directory
+      -->
+      <plugin>
+        <artifactId>maven-clean-plugin</artifactId>
+        <configuration>
+          <filesets>
+            <fileset>
+              <directory>lib/</directory>
+            </fileset>
+          </filesets>
+        </configuration>
       </plugin>
     </plugins>
   </build>


[13/18] jena git commit: Final pieces of tdbloader2 script clean up (JENA-977)

Posted by rv...@apache.org.
Final pieces of tdbloader2 script clean up (JENA-977)

- Fix white space inconsistencies in tdbloader2 scripts
- Removed defunct tdbloader2worker script
- Removed defunct and broken scripts from jena-tdb/bin/


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/12dc2cc6
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/12dc2cc6
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/12dc2cc6

Branch: refs/heads/master
Commit: 12dc2cc66640e432a4e2f5b45ebf2fb16c995440
Parents: c25ad5d
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 16:08:52 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 16:08:52 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2       |  16 ++--
 apache-jena/bin/tdbloader2data   |   2 +-
 apache-jena/bin/tdbloader2index  |  10 +--
 apache-jena/bin/tdbloader2worker | 154 ----------------------------------
 jena-tdb/bin/tdbloader2          |  47 -----------
 jena-tdb/bin/tdbloader2worker    | 154 ----------------------------------
 6 files changed, 14 insertions(+), 369 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 12168fa..52950bf 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -122,14 +122,14 @@ function resolveLink() {
 }
 
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ];	then
+if [ -z "$JENA_HOME" ]; then
   echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
   SCRIPT="$0"
   # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]; then
-		SCRIPT=$(resolveLink "$0")
-		# If link is relative
-		case "$SCRIPT" in
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
       /*)
         # Already absolute
         ;;
@@ -137,8 +137,8 @@ if [ -z "$JENA_HOME" ];	then
         # Relative, make absolute
         SCRIPT=$( dirname "$0" )/$SCRIPT
         ;;
-		esac
-	fi
+    esac
+  fi
 
   # Work out root from script location
   JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
@@ -149,7 +149,7 @@ fi
 if [ -L "${JENA_HOME}" ]; then
   JENA_HOME=$(resolveLink "$JENA_HOME")
   # If link is relative
-	case "$JENA_HOME" in
+  case "$JENA_HOME" in
     /*)
       # Already absolute
       ;;

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index d0ca066..ff44695 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -50,7 +50,7 @@ fi
 if [ -L "${JENA_HOME}" ]; then
   JENA_HOME=$(resolveLink "$JENA_HOME")
   # If link is relative
-	case "$JENA_HOME" in
+  case "$JENA_HOME" in
     /*)
       # Already absolute
       ;;

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 4d50f93..c057b49 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -50,7 +50,7 @@ fi
 if [ -L "${JENA_HOME}" ]; then
   JENA_HOME=$(resolveLink "$JENA_HOME")
   # If link is relative
-	case "$JENA_HOME" in
+  case "$JENA_HOME" in
     /*)
       # Already absolute
       ;;
@@ -247,7 +247,7 @@ if [ -z "$SORT_ARGS" ]; then
     set +e
     sort --parallel=3 < /dev/null 2>/dev/null
     if [ $? = 0 ]; then
-    	SORT_ARGS="$SORT_ARGS --parallel=3"
+      SORT_ARGS="$SORT_ARGS --parallel=3"
     fi
     set -e
 fi
@@ -315,8 +315,8 @@ generate_index()
 
     if [ ! -s "$DATA" ]; then
       debug "Skipping Index $IDX as no relevant data to index"
-	    return
-	  fi
+      return
+    fi
 
     info "Creating Index $IDX"
 
@@ -390,7 +390,7 @@ generate_index()
     # Remove work file unless keeping
     if [ $KEEP_WORK = 0 ]; then
       debug "Cleaning up work file $WORK"
-	    rm "$WORK"
+      rm "$WORK"
     fi
 }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2worker
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2worker b/apache-jena/bin/tdbloader2worker
deleted file mode 100755
index ca26d82..0000000
--- a/apache-jena/bin/tdbloader2worker
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env bash
-
-## Licensed to the Apache Software Foundation (ASF) under one
-## or more contributor license agreements.  See the NOTICE file
-## distributed with this work for additional information
-## regarding copyright ownership.  The ASF licenses this file
-## to you under the Apache License, Version 2.0 (the
-## "License"); you may not use this file except in compliance
-## with the License.  You may obtain a copy of the License at
-##
-##     http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-
-# The environment for this sub-script is setup by "tdbloader2"
-
-# Exit on error.
-set -e
-
-# Sort order is ASCII
-export LC_ALL="C"
-
-log() { echo " $(date $DATE)" "$@" ; }
-
-TMP=$$
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-
-# Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
-fi
-
-USAGE="Usage: tdbloader2 --loc location datafile ..."
-PKG=org.apache.jena.tdb.store.bulkloader2
-
-if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi
-
-## Process --loc. Yuk.
-ARG1="$1"
-shift
-if [ "$ARG1" = "-loc" -o  "$ARG1" = "--loc" ]
-then
-    LOC="$1"
-    shift
-else 
-    LOC="${ARG1/-*loc=/}"
-    if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi
-fi
-
-# Look for any index and data files in the directory.
-# Skip a possible configuration file
-if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
-then 
-    echo "Not empty: $LOC"
-    exit 1
-fi
-
-if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi
-if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi
-
-FILES="$@"
-## Stdin?
-KEEPWORKFILES="${KEEPWORKFILES:-}"
-# ---- Start
-log "-- TDB Bulk Loader Start"
-TIME1="$(date +%s)"
-
-# ---- Data loading phase
-log "Data phase"
-# Produce nodes file and triples/quads text file.
-
-DATA_TRIPLES="$LOC/data-triples.$TMP"
-DATA_QUADS="$LOC/data-quads.$TMP"
-
-java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
-    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
-
-# ---- Index intermediates
-## All files are writtern S P O / G S P O columns per row but in different sort orders.
-log "Index phase"
-
-process_rows()
-{
-    local KEYS="$1"
-    local DATA="$2"
-    local IDX=$3
-    local WORK="$LOC/$IDX-txt"
-
-    if [ ! -s "$DATA" ]
-    then
-	return
-	fi
-
-    log "Index $IDX"
-    sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
-    log "Build $IDX"
-    rm -f "$LOC/$IDX.dat"
-    rm -f "$LOC/$IDX.idn"
-    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    # Remove intermediary file.
-    if [ "$KEEPWORKFILES" != "yes" ] 
-    then
-	rm "$WORK" 
-    fi
-}
-
-K1="-k 1,1"
-K2="-k 2,2"
-K3="-k 3,3"
-K4="-k 4,4"
-
-process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
-
-process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
-
-process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
-
-process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
-
-process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
-
-process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
-
-process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
-
-process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
-
-process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
-
-log "Index phase end"
-TIME2="$(date +%s)"
-
-# ---- Clean up.
-
-if [ "$KEEPWORKFILES" != "yes" ] 
-then
-    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
-fi
-
-# ---- End
-log "-- TDB Bulk Loader Finish"
-ELAPSED=$(($TIME2-$TIME1))
-log "-- $ELAPSED seconds"

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/jena-tdb/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/jena-tdb/bin/tdbloader2 b/jena-tdb/bin/tdbloader2
deleted file mode 100755
index fff1358..0000000
--- a/jena-tdb/bin/tdbloader2
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-## Licensed to the Apache Software Foundation (ASF) under one
-## or more contributor license agreements.  See the NOTICE file
-## distributed with this work for additional information
-## regarding copyright ownership.  The ASF licenses this file
-## to you under the Apache License, Version 2.0 (the
-## "License"); you may not use this file except in compliance
-## with the License.  You may obtain a copy of the License at
-##
-##     http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-
-if [ "$TDBROOT" = "" ]
-    then 
-    echo "TDBROOT is not set" 1>&2
-    exit 1
-fi
-
-INIT="$TDBROOT/bin/tdb_init"
-
-if [ ! -r "$INIT" ] 
-then 
-    echo "Script $INIT (\$TDBROOT/bin/tdb_init) does not exist or is not readable"
-    exit 1
-fi
-
-. "$INIT"
-
-#echo "$TDB_CP"
-export JENA_CP="$TDB_CP"
-if [ -z "$SORT_ARGS" ]
-then
-    SORT_ARGS="--buffer-size=50%"
-    if $(sort --parallel=3 < /dev/null 2>/dev/null) 
-    then
-	SORT_ARGS="$SORT_ARGS --parallel=3"
-    fi
-fi
-export SORT_ARGS
-
-exec "$TDBROOT/bin/tdbloader2worker" "$@"

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/jena-tdb/bin/tdbloader2worker
----------------------------------------------------------------------
diff --git a/jena-tdb/bin/tdbloader2worker b/jena-tdb/bin/tdbloader2worker
deleted file mode 100755
index ca26d82..0000000
--- a/jena-tdb/bin/tdbloader2worker
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env bash
-
-## Licensed to the Apache Software Foundation (ASF) under one
-## or more contributor license agreements.  See the NOTICE file
-## distributed with this work for additional information
-## regarding copyright ownership.  The ASF licenses this file
-## to you under the Apache License, Version 2.0 (the
-## "License"); you may not use this file except in compliance
-## with the License.  You may obtain a copy of the License at
-##
-##     http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-
-# The environment for this sub-script is setup by "tdbloader2"
-
-# Exit on error.
-set -e
-
-# Sort order is ASCII
-export LC_ALL="C"
-
-log() { echo " $(date $DATE)" "$@" ; }
-
-TMP=$$
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-
-# Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
-fi
-
-USAGE="Usage: tdbloader2 --loc location datafile ..."
-PKG=org.apache.jena.tdb.store.bulkloader2
-
-if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi
-
-## Process --loc. Yuk.
-ARG1="$1"
-shift
-if [ "$ARG1" = "-loc" -o  "$ARG1" = "--loc" ]
-then
-    LOC="$1"
-    shift
-else 
-    LOC="${ARG1/-*loc=/}"
-    if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi
-fi
-
-# Look for any index and data files in the directory.
-# Skip a possible configuration file
-if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
-then 
-    echo "Not empty: $LOC"
-    exit 1
-fi
-
-if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi
-if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi
-
-FILES="$@"
-## Stdin?
-KEEPWORKFILES="${KEEPWORKFILES:-}"
-# ---- Start
-log "-- TDB Bulk Loader Start"
-TIME1="$(date +%s)"
-
-# ---- Data loading phase
-log "Data phase"
-# Produce nodes file and triples/quads text file.
-
-DATA_TRIPLES="$LOC/data-triples.$TMP"
-DATA_QUADS="$LOC/data-quads.$TMP"
-
-java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
-    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
-
-# ---- Index intermediates
-## All files are writtern S P O / G S P O columns per row but in different sort orders.
-log "Index phase"
-
-process_rows()
-{
-    local KEYS="$1"
-    local DATA="$2"
-    local IDX=$3
-    local WORK="$LOC/$IDX-txt"
-
-    if [ ! -s "$DATA" ]
-    then
-	return
-	fi
-
-    log "Index $IDX"
-    sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
-    log "Build $IDX"
-    rm -f "$LOC/$IDX.dat"
-    rm -f "$LOC/$IDX.idn"
-    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    # Remove intermediary file.
-    if [ "$KEEPWORKFILES" != "yes" ] 
-    then
-	rm "$WORK" 
-    fi
-}
-
-K1="-k 1,1"
-K2="-k 2,2"
-K3="-k 3,3"
-K4="-k 4,4"
-
-process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
-
-process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
-
-process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
-
-process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
-
-process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
-
-process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
-
-process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
-
-process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
-
-process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
-
-log "Index phase end"
-TIME2="$(date +%s)"
-
-# ---- Clean up.
-
-if [ "$KEEPWORKFILES" != "yes" ] 
-then
-    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
-fi
-
-# ---- End
-log "-- TDB Bulk Loader Finish"
-ELAPSED=$(($TIME2-$TIME1))
-log "-- $ELAPSED seconds"


[08/18] jena git commit: Check for return codes from children in tdbloader2 (JENA-977)

Posted by rv...@apache.org.
Check for return codes from children in tdbloader2 (JENA-977)

Ensures that the main script checks for the return code of the child
scripts and aborts if they fail


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/cc4a80ac
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/cc4a80ac
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/cc4a80ac

Branch: refs/heads/master
Commit: cc4a80ac3c44d738a8904ac91b1ece71b446d74a
Parents: a7ac279
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 14:25:46 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 14:29:52 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2 | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/cc4a80ac/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 310ee66..d0d906c 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -198,14 +198,38 @@ TIME1="$(date +%s)"
 TOOL_DIR="$JENA_HOME/bin"
 case "$PHASE" in
   all)
-    exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
-    exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    # All Phases
+    # Data Phase
+    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    RET=$?
+    if [ $RET -ne 0 ]; then
+      abort $RET "Failed during data phase"
+    fi
+
+    # Index Phase
+    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    RET=$?
+    if [ $RET -ne 0 ]; then
+      abort $RET "Failed during data phase"
+    fi
     ;;
+
   data)
-    exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    # Data Phase
+    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@"
+    RET=$?
+    if [ $RET -ne 0 ]; then
+      abort $RET "Failed during data phase"
+    fi
     ;;
+
   index)
-    exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    # Index Phase
+    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC"
+    RET=$?
+    if [ $RET -ne 0 ]; then
+      abort $RET "Failed during index phase"
+    fi
     ;;
   *)
     abort 1 "Unrecognized phase $PHASE"


[18/18] jena git commit: Don't merge extends when looking for inlining oppurtunities (JENA-780)

Posted by rv...@apache.org.
Don't merge extends when looking for inlining oppurtunities (JENA-780)

In checking the new optimization it was noticed that this introduces a
regression in the query JENA-779 that originally spawned this proposal.
This was because we used OpExtend.extend() rather than OpExtend.create()
which could result in other optimizations being blocked.

We now use OpExtend.create() instead and add a test to check that we
don't merge extends.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/7e2c9527
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/7e2c9527
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/7e2c9527

Branch: refs/heads/jena2
Commit: 7e2c9527fc4807c14175a529ac9c598f6aab35e2
Parents: b02309f
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jul 7 13:22:07 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jul 7 13:22:07 2015 +0100

----------------------------------------------------------------------
 .../algebra/optimize/TransformEliminateAssignments.java |  2 +-
 .../algebra/optimize/TransformRemoveAssignment.java     |  4 ++--
 .../optimize/TestTransformEliminateAssignments.java     | 12 ++++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/7e2c9527/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java
index c468272..e984c07 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java
@@ -257,7 +257,7 @@ public class TransformEliminateAssignments extends TransformCopy {
 
         // May be able to eliminate the extend entirely in some cases
         if (newAssignments.size() > 0) {
-            return OpExtend.extend(subOp, newAssignments);
+            return OpExtend.create(subOp, newAssignments);
         } else {
             return subOp;
         }

http://git-wip-us.apache.org/repos/asf/jena/blob/7e2c9527/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java
index d7c08d4..88bd048 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java
@@ -99,14 +99,14 @@ public class TransformRemoveAssignment extends TransformCopy {
             // If topmost only ignore any transformations lower down the tree
             // hence call getSubOp() rather than using the provided subOp
             if (assignments.size() > 0) {
-                return OpExtend.extend(opExtend.getSubOp(), assignments);
+                return OpExtend.create(opExtend.getSubOp(), assignments);
             } else {
                 return opExtend.getSubOp();
             }
         } else {
             // Otherwise preserve any transformations from lower down the tree
             if (assignments.size() > 0) {
-                return OpExtend.extend(subOp, assignments);
+                return OpExtend.create(subOp, assignments);
             } else {
                 return subOp;
             }

http://git-wip-us.apache.org/repos/asf/jena/blob/7e2c9527/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java b/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java
index 7d6cf40..fa16c94 100644
--- a/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java
+++ b/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java
@@ -379,6 +379,18 @@ public class TestTransformEliminateAssignments {
                      "      (bgp (triple ?x ?y ?z)))))");
         //@formatter:on
     }
+    
+    @Test
+    public void no_merge_01() {
+        // We should not merge extends
+        //@formatter:off
+        testNoChange("(project (?x ?y)",
+                     "  (filter (exprlist ?x)",
+                     "    (extend (?x true)",
+                     "      (extend (?y false)",
+                     "        (table unit)))))");
+        //@formatter:on
+    }
 
     @Test
     public void scope_01() {


[14/18] jena git commit: Comment style consistent (JENA-977)

Posted by rv...@apache.org.
Comment style consistent (JENA-977)


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/9b070391
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/9b070391
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/9b070391

Branch: refs/heads/master
Commit: 9b0703919277bf4197241504feef355a14f1fe56
Parents: 12dc2cc
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 16:36:38 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 16:36:38 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2data  | 2 +-
 apache-jena/bin/tdbloader2index | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/9b070391/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index ff44695..d0dde29 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -237,7 +237,7 @@ if [ ! -d "$LOC" ]; then
   abort 1 "Database location is not a directory: $LOC"
 fi
 
-## Prepare JVM Arguments
+# Prepare JVM Arguments
 JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 debug "JVM Arguments are $JVM_ARGS"
 

http://git-wip-us.apache.org/repos/asf/jena/blob/9b070391/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index c057b49..458698c 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -242,7 +242,7 @@ debug "Data text files are $DATA_TRIPLES and $DATA_QUADS"
 if [ -z "$SORT_ARGS" ]; then
     SORT_ARGS="--buffer-size=50%"
 
-    ##--parallel is not always available.
+    # --parallel is not always available.
     # Temporarily disable exit on error while we check for --parallel support
     set +e
     sort --parallel=3 < /dev/null 2>/dev/null
@@ -263,7 +263,7 @@ fi
 debug "Jena Classpath is $JENA_CP"
 
 # ---- Index intermediates
-## All files are written S P O / G S P O columns per row but in different sort orders.
+# All files are written S P O / G S P O columns per row but in different sort orders.
 info "Index Building Phase"
 
 # Check whether Pipe Viewer is available