You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2012/08/10 13:41:57 UTC

svn commit: r1371666 - in /jena/trunk/apache-jena: README bin/tdbloader2 bin/tdbloader2worker cmd-maker template.bat

Author: andy
Date: Fri Aug 10 11:41:56 2012
New Revision: 1371666

URL: http://svn.apache.org/viewvc?rev=1371666&view=rev
Log:
Updates for tdbloader2

Added:
    jena/trunk/apache-jena/bin/tdbloader2worker   (with props)
Modified:
    jena/trunk/apache-jena/README
    jena/trunk/apache-jena/bin/tdbloader2
    jena/trunk/apache-jena/cmd-maker
    jena/trunk/apache-jena/template.bat

Modified: jena/trunk/apache-jena/README
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/README?rev=1371666&r1=1371665&r2=1371666&view=diff
==============================================================================
--- jena/trunk/apache-jena/README (original)
+++ jena/trunk/apache-jena/README Fri Aug 10 11:41:56 2012
@@ -32,24 +32,27 @@
   unzipped the Jena distribution:
 
   Windows:
-    set JENAROOT=\path\to\apache-jena-2.7.2
+    set JENA_HOME=\path\to\apache-jena-2.7.5
     bat\sparql.bat --version    
 
-  Linux
-    export JENAROOT=/path/to/apache-jena-2.7.2
+  Linux:
+    The command scripts automatically set JENA_HOME but if you want
+    to switch to a different version fro the same scripts:
+
+    export JENA_HOMEROOT=/path/to/apache-jena-2.7.5
     bin/sparql --version    
     
   If you receive a class not found exception when trying to run one of the 
-  scripts then you may have set JENAROOT incorrectly. A quick and easy way
-  to validate that JENAROOT is set correctly is to run the following:
+  scripts then you may have set JENA_HOME incorrectly. A quick and easy way
+  to validate that JENA_HOME is set correctly is to run the following:
   
   Windows:
-    cd %JENAROOT%
+    cd %JENA_HOME%
     
   Linux:
-    cd $JENAROOT
+    cd $JENA_HOME
     
-  If this command returns an error then JENAROOT is not pointed to a valid directory
+  If this command returns an error then JENA_HOME is not pointed to a valid directory
 
 
   Website

Modified: jena/trunk/apache-jena/bin/tdbloader2
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/bin/tdbloader2?rev=1371666&r1=1371665&r2=1371666&view=diff
==============================================================================
--- jena/trunk/apache-jena/bin/tdbloader2 (original)
+++ jena/trunk/apache-jena/bin/tdbloader2 Fri Aug 10 11:41:56 2012
@@ -1,134 +1,37 @@
-#!/bin/bash
+#!/bin/sh
+## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
-## Licensed to the Apache Software Foundation (ASF) under one
-## or more contributor license agreements.  See the NOTICE file
-## distributed with this work for additional information
-## regarding copyright ownership.  The ASF licenses this file
-## to you under the Apache License, Version 2.0 (the
-## "License"); you may not use this file except in compliance
-## with the License.  You may obtain a copy of the License at
-##
-##     http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-
-# Exit on error.
-set -e
-
-# Sort order is ASCII
-export LC_LOCALE="C"
-
-log() { echo " $(date $DATE)" "$@" ; }
-
-TMP=$$
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-CP="$($TDBROOT/bin/tdb_path $TDBROOT)"
-USAGE="Usage: $(basename $0) --loc location datafile ..."
-PKG=com.hp.hpl.jena.tdb.store.bulkloader2
-
-if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi
-
-## Process --loc. Yuk.
-ARG1="$1"
-shift
-if [ "$ARG1" = "-loc" -o  "$ARG1" = "--loc" ]
-then
-    LOC="$1"
-    shift
-else 
-    LOC="${ARG1/-*loc=/}"
-    if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi
-fi
-
-if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi
-if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi
-
-FILES="$@"
-## Stdin?
-KEEPWORKFILES="${KEEPWORKFILES:-}"
-# ---- Start
-log "-- TDB Bulk Loader Start"
-TIME1="$(date +%s)"
-
-# ---- Data loading phase
-log "Data phase"
-# Produce nodes file and triples/quads text file.
-
-DATA_TRIPLES="$LOC/data-triples.$TMP"
-DATA_QUADS="$LOC/data-quads.$TMP"
-
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-java $JVM_ARGS -cp "$CP" "$PKG".CmdNodeTableBuilder \
-    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
-
-# ---- Index intermediates
-## All files are writtern S P O / G S P O columns per row but in different sort orders.
-log "Index phase"
-
-process_rows()
-{
-    local KEYS="$1"
-    local DATA="$2"
-    local IDX=$3
-    local WORK="$LOC/$IDX-txt"
-
-    if [ ! -s "$DATA" ]
-    then
-	return
+# If JENA_HOME is empty
+if [ -z "$JENA_HOME" ]
+	then
+    SCRIPT="$0"
+    # Catch common issue: script has been symlinked
+	if [ -L "$SCRIPT" ]
+		then
+		SCRIPT="$(readlink "$0")"
+		# If link is relative
+		case "$SCRIPT" in
+   			/*) ;; # fine
+			*) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix
+		esac
 	fi
 
-    log "Index $IDX"
-    sort -u $KEYS < "$DATA" > $WORK
-    log "Build $IDX"
-    rm -f "$LOC/$IDX.dat"
-    rm -f "$LOC/$IDX.idn"
-    java -cp "$CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    # Remove intermediary file.
-    if [ "$KEEPWORKFILES" != "yes" ] 
-    then
-	rm "$WORK" 
-    fi
-}
-
-K1="-k 1,1"
-K2="-k 2,2"
-K3="-k 3,3"
-K4="-k 4,4"
-
-process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
-
-process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
-
-process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
-
-process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
-
-process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
-
-process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
-
-process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
-
-process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
-
-process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
-
-log "Index phase end"
-TIME2="$(date +%s)"
-
-# ---- Clean up.
-
-if [ "$KEEPWORKFILES" != "yes" ] 
-then
-    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
+    # Work out root from script location
+    JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
 fi
 
-# ---- End
-log "-- TDB Bulk Loader Finish"
-ELAPSED=$(($TIME2-$TIME1))
-log "-- $ELAPSED seconds"
+# ---- Setup
+JVM_ARGS=${JVM_ARGS:--Xmx1024M}
+# Expand JENA_HOME but literal *
+JENA_CP="$JENA_HOME"'/lib/*'
+SOCKS=
+LOGGING="-Dlog4j.configuration=file:$JENA_HOME/jena-log4j.properties"
+
+# Platform specific fixup
+# On CYGWIN convert path and end with a ';' 
+case "$(uname)" in
+   CYGWIN*) JENA_CP="$(cygpath -wp "$JENA_CP");";;
+esac
+
+export JENA_CP
+exec tdbloader2worker "$@"

Added: jena/trunk/apache-jena/bin/tdbloader2worker
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/bin/tdbloader2worker?rev=1371666&view=auto
==============================================================================
--- jena/trunk/apache-jena/bin/tdbloader2worker (added)
+++ jena/trunk/apache-jena/bin/tdbloader2worker Fri Aug 10 11:41:56 2012
@@ -0,0 +1,143 @@
+#!/bin/sh
+
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+
+# The environment for this sub-script is setup by "tdbloader2"
+
+# Exit on error.
+set -e
+
+# Sort order is ASCII
+export LC_LOCALE="C"
+
+log() { echo " $(date $DATE)" "$@" ; }
+
+TMP=$$
+#DATE="+%Y-%m-%dT%H:%M:%S%:z"
+DATE="+%H:%M:%S"
+# Classpath set in "tdbloader2"
+
+if [ -z "$JENA_CP" ]
+then
+    echo "Classpath not provided : set JENA_CP" 1>&2
+    exit 1
+fi
+
+USAGE="Usage: $(basename $0) --loc location datafile ..."
+PKG=com.hp.hpl.jena.tdb.store.bulkloader2
+
+if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi
+
+## Process --loc. Yuk.
+ARG1="$1"
+shift
+if [ "$ARG1" = "-loc" -o  "$ARG1" = "--loc" ]
+then
+    LOC="$1"
+    shift
+else 
+    LOC="${ARG1/-*loc=/}"
+    if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi
+fi
+
+if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi
+if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi
+
+FILES="$@"
+## Stdin?
+KEEPWORKFILES="${KEEPWORKFILES:-}"
+# ---- Start
+log "-- TDB Bulk Loader Start"
+TIME1="$(date +%s)"
+
+# ---- Data loading phase
+log "Data phase"
+# Produce nodes file and triples/quads text file.
+
+DATA_TRIPLES="$LOC/data-triples.$TMP"
+DATA_QUADS="$LOC/data-quads.$TMP"
+
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
+    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
+
+# ---- Index intermediates
+## All files are writtern S P O / G S P O columns per row but in different sort orders.
+log "Index phase"
+
+process_rows()
+{
+    local KEYS="$1"
+    local DATA="$2"
+    local IDX=$3
+    local WORK="$LOC/$IDX-txt"
+
+    if [ ! -s "$DATA" ]
+    then
+	return
+	fi
+
+    log "Index $IDX"
+    sort -u $KEYS < "$DATA" > $WORK
+    log "Build $IDX"
+    rm -f "$LOC/$IDX.dat"
+    rm -f "$LOC/$IDX.idn"
+    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
+    # Remove intermediary file.
+    if [ "$KEEPWORKFILES" != "yes" ] 
+    then
+	rm "$WORK" 
+    fi
+}
+
+K1="-k 1,1"
+K2="-k 2,2"
+K3="-k 3,3"
+K4="-k 4,4"
+
+process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
+
+process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
+
+process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
+
+process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
+
+process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
+
+process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
+
+process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
+
+process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
+
+process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
+
+log "Index phase end"
+TIME2="$(date +%s)"
+
+# ---- Clean up.
+
+if [ "$KEEPWORKFILES" != "yes" ] 
+then
+    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
+fi
+
+# ---- End
+log "-- TDB Bulk Loader Finish"
+ELAPSED=$(($TIME2-$TIME1))
+log "-- $ELAPSED seconds"

Propchange: jena/trunk/apache-jena/bin/tdbloader2worker
------------------------------------------------------------------------------
    svn:executable = *

Modified: jena/trunk/apache-jena/cmd-maker
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/cmd-maker?rev=1371666&r1=1371665&r2=1371666&view=diff
==============================================================================
--- jena/trunk/apache-jena/cmd-maker (original)
+++ jena/trunk/apache-jena/cmd-maker Fri Aug 10 11:41:56 2012
@@ -16,7 +16,18 @@
 ## See the License for the specific language governing permissions and
 ## limitations under the License.
 
-# Not tdbloader2
+# Not tdbloader2.
+## tdbloader2 is slightly different.
+##   The main program is not a java program
+##   It is split into tdbloader2 and tdbloader2worker
+##   tdbloader2worker (the mainporgram) is the same in 
+##   developement and here. tdbloader2 is like the script 
+##   wrappers except it execs tdbloader2worker, not
+##   java.  It needs manually updating.
+##   Replace the java exec with:
+##     export JENA_CP
+##     exec tdbloader2worker "$@"
+
 CMDS=$(cat <<EOF
 jena.rdfcat
 jena.rdfcompare

Modified: jena/trunk/apache-jena/template.bat
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/template.bat?rev=1371666&r1=1371665&r2=1371666&view=diff
==============================================================================
--- jena/trunk/apache-jena/template.bat (original)
+++ jena/trunk/apache-jena/template.bat Fri Aug 10 11:41:56 2012
@@ -1,15 +1,18 @@
 @echo off
 @rem Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0
 
-if NOT "%JENAROOT%" == "" goto :okRoot
-echo JENAROOT not set
-exit /B
+if "%JENAROOT%" == "" goto :rootNotSet
+set JENA_HOME=%JENAROOT%
+:rootNotSet
 
-:okRoot
+if NOT "%JENA_HOME%" == "" goto :okHome
+echo JENA_HOME not set
+exit /B
 
+:okHome
 set JVM_ARGS=-Xmx1024M
-set JENA_CP=%JENAROOT%\lib\*;
-set LOGGING=file:%JENAROOT%/jena-log4j.properties
+set JENA_CP=%JENA_HOME%\lib\*;
+set LOGGING=file:%JENA_HOME%/jena-log4j.properties
 
 java %JVM_ARGS% -Dlog4j.configuration="%LOGGING%" -cp "%JENA_CP%" JENA_CMD %*
 exit /B