You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2013/01/01 12:55:28 UTC

svn commit: r1427394 - in /jena/trunk/apache-jena: bin/tdbloader2worker cmd-maker

Author: andy
Date: Tue Jan  1 11:55:28 2013
New Revision: 1427394

URL: http://svn.apache.org/viewvc?rev=1427394&view=rev
Log:
Copy main tdbloader2 script

Modified:
    jena/trunk/apache-jena/bin/tdbloader2worker
    jena/trunk/apache-jena/cmd-maker

Modified: jena/trunk/apache-jena/bin/tdbloader2worker
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/bin/tdbloader2worker?rev=1427394&r1=1427393&r2=1427394&view=diff
==============================================================================
--- jena/trunk/apache-jena/bin/tdbloader2worker (original)
+++ jena/trunk/apache-jena/bin/tdbloader2worker Tue Jan  1 11:55:28 2013
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/bash
 
 ## Licensed to the Apache Software Foundation (ASF) under one
 ## or more contributor license agreements.  See the NOTICE file
@@ -16,131 +16,24 @@
 ## See the License for the specific language governing permissions and
 ## limitations under the License.
 
-# The environment for this sub-script is setup by "tdbloader2"
-
-# Exit on error.
-set -e
-
-# Sort order is ASCII
-export LC_LOCALE="C"
-
-log() { echo " $(date $DATE)" "$@" ; }
-
-TMP=$$
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-
-# Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
+if [ "$TDBROOT" = "" ]
+    then 
+    echo "TDBROOT is not set" 1>&2
     exit 1
 fi
 
-USAGE="Usage: $(basename $0) --loc location datafile ..."
-PKG=com.hp.hpl.jena.tdb.store.bulkloader2
+INIT="$TDBROOT/bin/tdb_init"
 
-if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi
-
-## Process --loc. Yuk.
-ARG1="$1"
-shift
-if [ "$ARG1" = "-loc" -o  "$ARG1" = "--loc" ]
-then
-    LOC="$1"
-    shift
-else 
-    LOC="${ARG1/-*loc=/}"
-    if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi
+if [ ! -r "$INIT" ] 
+then 
+    echo "Script $INIT (\$TDBROOT/bin/tdb_init) does not exist or is not readable"
+    exit 1
 fi
 
-if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi
-if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi
-
-FILES="$@"
-## Stdin?
-KEEPWORKFILES="${KEEPWORKFILES:-}"
-# ---- Start
-log "-- TDB Bulk Loader Start"
-TIME1="$(date +%s)"
-
-# ---- Data loading phase
-log "Data phase"
-# Produce nodes file and triples/quads text file.
-
-DATA_TRIPLES="$LOC/data-triples.$TMP"
-DATA_QUADS="$LOC/data-quads.$TMP"
-
-java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
-    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
-
-# ---- Index intermediates
-## All files are writtern S P O / G S P O columns per row but in different sort orders.
-log "Index phase"
-
-process_rows()
-{
-    local KEYS="$1"
-    local DATA="$2"
-    local IDX=$3
-    local WORK="$LOC/$IDX-txt"
-
-    if [ ! -s "$DATA" ]
-    then
-	return
-	fi
-
-    log "Index $IDX"
-    sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
-    log "Build $IDX"
-    rm -f "$LOC/$IDX.dat"
-    rm -f "$LOC/$IDX.idn"
-    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    # Remove intermediary file.
-    if [ "$KEEPWORKFILES" != "yes" ] 
-    then
-	rm "$WORK" 
-    fi
-}
-
-K1="-k 1,1"
-K2="-k 2,2"
-K3="-k 3,3"
-K4="-k 4,4"
-
-process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
-
-process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
-
-process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
-
-process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
-
-process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
-
-process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
-
-process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
-
-process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
-
-process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
-
-log "Index phase end"
-TIME2="$(date +%s)"
-
-# ---- Clean up.
-
-if [ "$KEEPWORKFILES" != "yes" ] 
-then
-    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
-fi
+. "$INIT"
 
-# ---- End
-log "-- TDB Bulk Loader Finish"
-ELAPSED=$(($TIME2-$TIME1))
-log "-- $ELAPSED seconds"
+#echo "$TDB_CP"
+export JENA_CP="$TDB_CP"
+## And --parallel=3 if available.
+export SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+exec "$TDBROOT/bin/tdbloader2worker" "$@"

Modified: jena/trunk/apache-jena/cmd-maker
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/cmd-maker?rev=1427394&r1=1427393&r2=1427394&view=diff
==============================================================================
--- jena/trunk/apache-jena/cmd-maker (original)
+++ jena/trunk/apache-jena/cmd-maker Tue Jan  1 11:55:28 2013
@@ -89,4 +89,4 @@ do
 done
 
 ## Specials
-cp ../jena-tdb/bin/tdbloader2 bin
+cp ../jena-tdb/bin/tdbloader2 bin/tdbloader2worker