You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2012/08/10 15:35:52 UTC

svn commit: r1371696 - in /jena/trunk: apache-jena/bin/tdbloader2 apache-jena/bin/tdbloader2worker jena-tdb/bin/tdbloader2 jena-tdb/bin/tdbloader2worker

Author: andy
Date: Fri Aug 10 13:35:52 2012
New Revision: 1371696

URL: http://svn.apache.org/viewvc?rev=1371696&view=rev
Log:
Allocate more resources to sorting.

Modified:
    jena/trunk/apache-jena/bin/tdbloader2
    jena/trunk/apache-jena/bin/tdbloader2worker
    jena/trunk/jena-tdb/bin/tdbloader2
    jena/trunk/jena-tdb/bin/tdbloader2worker

Modified: jena/trunk/apache-jena/bin/tdbloader2
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/bin/tdbloader2?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/apache-jena/bin/tdbloader2 (original)
+++ jena/trunk/apache-jena/bin/tdbloader2 Fri Aug 10 13:35:52 2012
@@ -34,4 +34,8 @@ case "$(uname)" in
 esac
 
 export JENA_CP
-exec tdbloader2worker "$@"
+
+## And --parallel=3 if available.
+export SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+
+exec "$JENA_HOME/bin/tdbloader2worker" "$@"

Modified: jena/trunk/apache-jena/bin/tdbloader2worker
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/bin/tdbloader2worker?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/apache-jena/bin/tdbloader2worker (original)
+++ jena/trunk/apache-jena/bin/tdbloader2worker Fri Aug 10 13:35:52 2012
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
 
 ## Licensed to the Apache Software Foundation (ASF) under one
 ## or more contributor license agreements.  See the NOTICE file
@@ -29,8 +29,12 @@ log() { echo " $(date $DATE)" "$@" ; }
 TMP=$$
 #DATE="+%Y-%m-%dT%H:%M:%S%:z"
 DATE="+%H:%M:%S"
-# Classpath set in "tdbloader2"
 
+##--parallel is not always available.
+SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
 if [ -z "$JENA_CP" ]
 then
     echo "Classpath not provided : set JENA_CP" 1>&2
@@ -71,7 +75,6 @@ log "Data phase"
 DATA_TRIPLES="$LOC/data-triples.$TMP"
 DATA_QUADS="$LOC/data-quads.$TMP"
 
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
     "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
 
@@ -92,7 +95,7 @@ process_rows()
 	fi
 
     log "Index $IDX"
-    sort -u $KEYS < "$DATA" > $WORK
+    sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
     log "Build $IDX"
     rm -f "$LOC/$IDX.dat"
     rm -f "$LOC/$IDX.idn"

Modified: jena/trunk/jena-tdb/bin/tdbloader2
URL: http://svn.apache.org/viewvc/jena/trunk/jena-tdb/bin/tdbloader2?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/jena-tdb/bin/tdbloader2 (original)
+++ jena/trunk/jena-tdb/bin/tdbloader2 Fri Aug 10 13:35:52 2012
@@ -34,4 +34,6 @@ fi
 
 #echo "$TDB_CP"
 export JENA_CP="$TDB_CP"
-exec tdbloader2worker "$@"
+## And --parallel=3 if available.
+export SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+exec "$TDBROOT/bin/tdbloader2worker" "$@"

Modified: jena/trunk/jena-tdb/bin/tdbloader2worker
URL: http://svn.apache.org/viewvc/jena/trunk/jena-tdb/bin/tdbloader2worker?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/jena-tdb/bin/tdbloader2worker (original)
+++ jena/trunk/jena-tdb/bin/tdbloader2worker Fri Aug 10 13:35:52 2012
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
 
 ## Licensed to the Apache Software Foundation (ASF) under one
 ## or more contributor license agreements.  See the NOTICE file
@@ -29,8 +29,12 @@ log() { echo " $(date $DATE)" "$@" ; }
 TMP=$$
 #DATE="+%Y-%m-%dT%H:%M:%S%:z"
 DATE="+%H:%M:%S"
-# Classpath set in "tdbloader2"
 
+##--parallel is not always available.
+SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
 if [ -z "$JENA_CP" ]
 then
     echo "Classpath not provided : set JENA_CP" 1>&2
@@ -71,7 +75,6 @@ log "Data phase"
 DATA_TRIPLES="$LOC/data-triples.$TMP"
 DATA_QUADS="$LOC/data-quads.$TMP"
 
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
 java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
     "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
 
@@ -92,7 +95,7 @@ process_rows()
 	fi
 
     log "Index $IDX"
-    sort -u $KEYS < "$DATA" > $WORK
+    sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
     log "Build $IDX"
     rm -f "$LOC/$IDX.dat"
     rm -f "$LOC/$IDX.idn"