You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/06/30 17:09:48 UTC

jena git commit: Final pieces of tdbloader2 script clean up (JENA-977)

Repository: jena
Updated Branches:
  refs/heads/JENA-977 c25ad5d80 -> 12dc2cc66


Final pieces of tdbloader2 script clean up (JENA-977)

- Fix white space inconsistencies in tdbloader2 scripts
- Removed defunct tdbloader2worker script
- Removed defunct and broken scripts from jena-tdb/bin/


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/12dc2cc6
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/12dc2cc6
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/12dc2cc6

Branch: refs/heads/JENA-977
Commit: 12dc2cc66640e432a4e2f5b45ebf2fb16c995440
Parents: c25ad5d
Author: Rob Vesse <rv...@apache.org>
Authored: Tue Jun 30 16:08:52 2015 +0100
Committer: Rob Vesse <rv...@apache.org>
Committed: Tue Jun 30 16:08:52 2015 +0100

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2       |  16 ++--
 apache-jena/bin/tdbloader2data   |   2 +-
 apache-jena/bin/tdbloader2index  |  10 +--
 apache-jena/bin/tdbloader2worker | 154 ----------------------------------
 jena-tdb/bin/tdbloader2          |  47 -----------
 jena-tdb/bin/tdbloader2worker    | 154 ----------------------------------
 6 files changed, 14 insertions(+), 369 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2
index 12168fa..52950bf 100755
--- a/apache-jena/bin/tdbloader2
+++ b/apache-jena/bin/tdbloader2
@@ -122,14 +122,14 @@ function resolveLink() {
 }
 
 # If JENA_HOME is empty
-if [ -z "$JENA_HOME" ];	then
+if [ -z "$JENA_HOME" ]; then
   echo "JENA_HOME not set, attempting to locate JENA_HOME automatically"
   SCRIPT="$0"
   # Catch common issue: script has been symlinked
-	if [ -L "$SCRIPT" ]; then
-		SCRIPT=$(resolveLink "$0")
-		# If link is relative
-		case "$SCRIPT" in
+  if [ -L "$SCRIPT" ]; then
+    SCRIPT=$(resolveLink "$0")
+    # If link is relative
+    case "$SCRIPT" in
       /*)
         # Already absolute
         ;;
@@ -137,8 +137,8 @@ if [ -z "$JENA_HOME" ];	then
         # Relative, make absolute
         SCRIPT=$( dirname "$0" )/$SCRIPT
         ;;
-		esac
-	fi
+    esac
+  fi
 
   # Work out root from script location
   JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
@@ -149,7 +149,7 @@ fi
 if [ -L "${JENA_HOME}" ]; then
   JENA_HOME=$(resolveLink "$JENA_HOME")
   # If link is relative
-	case "$JENA_HOME" in
+  case "$JENA_HOME" in
     /*)
       # Already absolute
       ;;

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2data
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data
index d0ca066..ff44695 100755
--- a/apache-jena/bin/tdbloader2data
+++ b/apache-jena/bin/tdbloader2data
@@ -50,7 +50,7 @@ fi
 if [ -L "${JENA_HOME}" ]; then
   JENA_HOME=$(resolveLink "$JENA_HOME")
   # If link is relative
-	case "$JENA_HOME" in
+  case "$JENA_HOME" in
     /*)
       # Already absolute
       ;;

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2index
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index
index 4d50f93..c057b49 100755
--- a/apache-jena/bin/tdbloader2index
+++ b/apache-jena/bin/tdbloader2index
@@ -50,7 +50,7 @@ fi
 if [ -L "${JENA_HOME}" ]; then
   JENA_HOME=$(resolveLink "$JENA_HOME")
   # If link is relative
-	case "$JENA_HOME" in
+  case "$JENA_HOME" in
     /*)
       # Already absolute
       ;;
@@ -247,7 +247,7 @@ if [ -z "$SORT_ARGS" ]; then
     set +e
     sort --parallel=3 < /dev/null 2>/dev/null
     if [ $? = 0 ]; then
-    	SORT_ARGS="$SORT_ARGS --parallel=3"
+      SORT_ARGS="$SORT_ARGS --parallel=3"
     fi
     set -e
 fi
@@ -315,8 +315,8 @@ generate_index()
 
     if [ ! -s "$DATA" ]; then
       debug "Skipping Index $IDX as no relevant data to index"
-	    return
-	  fi
+      return
+    fi
 
     info "Creating Index $IDX"
 
@@ -390,7 +390,7 @@ generate_index()
     # Remove work file unless keeping
     if [ $KEEP_WORK = 0 ]; then
       debug "Cleaning up work file $WORK"
-	    rm "$WORK"
+      rm "$WORK"
     fi
 }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/apache-jena/bin/tdbloader2worker
----------------------------------------------------------------------
diff --git a/apache-jena/bin/tdbloader2worker b/apache-jena/bin/tdbloader2worker
deleted file mode 100755
index ca26d82..0000000
--- a/apache-jena/bin/tdbloader2worker
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env bash
-
-## Licensed to the Apache Software Foundation (ASF) under one
-## or more contributor license agreements.  See the NOTICE file
-## distributed with this work for additional information
-## regarding copyright ownership.  The ASF licenses this file
-## to you under the Apache License, Version 2.0 (the
-## "License"); you may not use this file except in compliance
-## with the License.  You may obtain a copy of the License at
-##
-##     http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-
-# The environment for this sub-script is setup by "tdbloader2"
-
-# Exit on error.
-set -e
-
-# Sort order is ASCII
-export LC_ALL="C"
-
-log() { echo " $(date $DATE)" "$@" ; }
-
-TMP=$$
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-
-# Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
-fi
-
-USAGE="Usage: tdbloader2 --loc location datafile ..."
-PKG=org.apache.jena.tdb.store.bulkloader2
-
-if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi
-
-## Process --loc. Yuk.
-ARG1="$1"
-shift
-if [ "$ARG1" = "-loc" -o  "$ARG1" = "--loc" ]
-then
-    LOC="$1"
-    shift
-else 
-    LOC="${ARG1/-*loc=/}"
-    if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi
-fi
-
-# Look for any index and data files in the directory.
-# Skip a possible configuration file
-if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
-then 
-    echo "Not empty: $LOC"
-    exit 1
-fi
-
-if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi
-if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi
-
-FILES="$@"
-## Stdin?
-KEEPWORKFILES="${KEEPWORKFILES:-}"
-# ---- Start
-log "-- TDB Bulk Loader Start"
-TIME1="$(date +%s)"
-
-# ---- Data loading phase
-log "Data phase"
-# Produce nodes file and triples/quads text file.
-
-DATA_TRIPLES="$LOC/data-triples.$TMP"
-DATA_QUADS="$LOC/data-quads.$TMP"
-
-java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
-    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
-
-# ---- Index intermediates
-## All files are writtern S P O / G S P O columns per row but in different sort orders.
-log "Index phase"
-
-process_rows()
-{
-    local KEYS="$1"
-    local DATA="$2"
-    local IDX=$3
-    local WORK="$LOC/$IDX-txt"
-
-    if [ ! -s "$DATA" ]
-    then
-	return
-	fi
-
-    log "Index $IDX"
-    sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
-    log "Build $IDX"
-    rm -f "$LOC/$IDX.dat"
-    rm -f "$LOC/$IDX.idn"
-    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    # Remove intermediary file.
-    if [ "$KEEPWORKFILES" != "yes" ] 
-    then
-	rm "$WORK" 
-    fi
-}
-
-K1="-k 1,1"
-K2="-k 2,2"
-K3="-k 3,3"
-K4="-k 4,4"
-
-process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
-
-process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
-
-process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
-
-process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
-
-process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
-
-process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
-
-process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
-
-process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
-
-process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
-
-log "Index phase end"
-TIME2="$(date +%s)"
-
-# ---- Clean up.
-
-if [ "$KEEPWORKFILES" != "yes" ] 
-then
-    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
-fi
-
-# ---- End
-log "-- TDB Bulk Loader Finish"
-ELAPSED=$(($TIME2-$TIME1))
-log "-- $ELAPSED seconds"

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/jena-tdb/bin/tdbloader2
----------------------------------------------------------------------
diff --git a/jena-tdb/bin/tdbloader2 b/jena-tdb/bin/tdbloader2
deleted file mode 100755
index fff1358..0000000
--- a/jena-tdb/bin/tdbloader2
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-## Licensed to the Apache Software Foundation (ASF) under one
-## or more contributor license agreements.  See the NOTICE file
-## distributed with this work for additional information
-## regarding copyright ownership.  The ASF licenses this file
-## to you under the Apache License, Version 2.0 (the
-## "License"); you may not use this file except in compliance
-## with the License.  You may obtain a copy of the License at
-##
-##     http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-
-if [ "$TDBROOT" = "" ]
-    then 
-    echo "TDBROOT is not set" 1>&2
-    exit 1
-fi
-
-INIT="$TDBROOT/bin/tdb_init"
-
-if [ ! -r "$INIT" ] 
-then 
-    echo "Script $INIT (\$TDBROOT/bin/tdb_init) does not exist or is not readable"
-    exit 1
-fi
-
-. "$INIT"
-
-#echo "$TDB_CP"
-export JENA_CP="$TDB_CP"
-if [ -z "$SORT_ARGS" ]
-then
-    SORT_ARGS="--buffer-size=50%"
-    if $(sort --parallel=3 < /dev/null 2>/dev/null) 
-    then
-	SORT_ARGS="$SORT_ARGS --parallel=3"
-    fi
-fi
-export SORT_ARGS
-
-exec "$TDBROOT/bin/tdbloader2worker" "$@"

http://git-wip-us.apache.org/repos/asf/jena/blob/12dc2cc6/jena-tdb/bin/tdbloader2worker
----------------------------------------------------------------------
diff --git a/jena-tdb/bin/tdbloader2worker b/jena-tdb/bin/tdbloader2worker
deleted file mode 100755
index ca26d82..0000000
--- a/jena-tdb/bin/tdbloader2worker
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env bash
-
-## Licensed to the Apache Software Foundation (ASF) under one
-## or more contributor license agreements.  See the NOTICE file
-## distributed with this work for additional information
-## regarding copyright ownership.  The ASF licenses this file
-## to you under the Apache License, Version 2.0 (the
-## "License"); you may not use this file except in compliance
-## with the License.  You may obtain a copy of the License at
-##
-##     http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-
-# The environment for this sub-script is setup by "tdbloader2"
-
-# Exit on error.
-set -e
-
-# Sort order is ASCII
-export LC_ALL="C"
-
-log() { echo " $(date $DATE)" "$@" ; }
-
-TMP=$$
-#DATE="+%Y-%m-%dT%H:%M:%S%:z"
-DATE="+%H:%M:%S"
-
-##--parallel is not always available.
-SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
-
-# Classpath set in "tdbloader2"
-if [ -z "$JENA_CP" ]
-then
-    echo "Classpath not provided : set JENA_CP" 1>&2
-    exit 1
-fi
-
-USAGE="Usage: tdbloader2 --loc location datafile ..."
-PKG=org.apache.jena.tdb.store.bulkloader2
-
-if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi
-
-## Process --loc. Yuk.
-ARG1="$1"
-shift
-if [ "$ARG1" = "-loc" -o  "$ARG1" = "--loc" ]
-then
-    LOC="$1"
-    shift
-else 
-    LOC="${ARG1/-*loc=/}"
-    if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi
-fi
-
-# Look for any index and data files in the directory.
-# Skip a possible configuration file
-if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)"
-then 
-    echo "Not empty: $LOC"
-    exit 1
-fi
-
-if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi
-if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi
-
-FILES="$@"
-## Stdin?
-KEEPWORKFILES="${KEEPWORKFILES:-}"
-# ---- Start
-log "-- TDB Bulk Loader Start"
-TIME1="$(date +%s)"
-
-# ---- Data loading phase
-log "Data phase"
-# Produce nodes file and triples/quads text file.
-
-DATA_TRIPLES="$LOC/data-triples.$TMP"
-DATA_QUADS="$LOC/data-quads.$TMP"
-
-java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
-    "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
-
-# ---- Index intermediates
-## All files are writtern S P O / G S P O columns per row but in different sort orders.
-log "Index phase"
-
-process_rows()
-{
-    local KEYS="$1"
-    local DATA="$2"
-    local IDX=$3
-    local WORK="$LOC/$IDX-txt"
-
-    if [ ! -s "$DATA" ]
-    then
-	return
-	fi
-
-    log "Index $IDX"
-    sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
-    log "Build $IDX"
-    rm -f "$LOC/$IDX.dat"
-    rm -f "$LOC/$IDX.idn"
-    java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK"
-    # Remove intermediary file.
-    if [ "$KEEPWORKFILES" != "yes" ] 
-    then
-	rm "$WORK" 
-    fi
-}
-
-K1="-k 1,1"
-K2="-k 2,2"
-K3="-k 3,3"
-K4="-k 4,4"
-
-process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO
-
-process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS
-
-process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP
-
-process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO
-
-process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS
-
-process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP
-
-process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG
-
-process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG
-
-process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG
-
-log "Index phase end"
-TIME2="$(date +%s)"
-
-# ---- Clean up.
-
-if [ "$KEEPWORKFILES" != "yes" ] 
-then
-    rm -f "$DATA_TRIPLES" "$DATA_QUADS" 
-fi
-
-# ---- End
-log "-- TDB Bulk Loader Finish"
-ELAPSED=$(($TIME2-$TIME1))
-log "-- $ELAPSED seconds"