You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2012/08/10 15:35:52 UTC
svn commit: r1371696 - in /jena/trunk: apache-jena/bin/tdbloader2
apache-jena/bin/tdbloader2worker jena-tdb/bin/tdbloader2
jena-tdb/bin/tdbloader2worker
Author: andy
Date: Fri Aug 10 13:35:52 2012
New Revision: 1371696
URL: http://svn.apache.org/viewvc?rev=1371696&view=rev
Log:
Allocate more resources to sorting.
Modified:
jena/trunk/apache-jena/bin/tdbloader2
jena/trunk/apache-jena/bin/tdbloader2worker
jena/trunk/jena-tdb/bin/tdbloader2
jena/trunk/jena-tdb/bin/tdbloader2worker
Modified: jena/trunk/apache-jena/bin/tdbloader2
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/bin/tdbloader2?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/apache-jena/bin/tdbloader2 (original)
+++ jena/trunk/apache-jena/bin/tdbloader2 Fri Aug 10 13:35:52 2012
@@ -34,4 +34,8 @@ case "$(uname)" in
esac
export JENA_CP
-exec tdbloader2worker "$@"
+
+## And --parallel=3 if available.
+export SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+
+exec "$JENA_HOME/bin/tdbloader2worker" "$@"
Modified: jena/trunk/apache-jena/bin/tdbloader2worker
URL: http://svn.apache.org/viewvc/jena/trunk/apache-jena/bin/tdbloader2worker?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/apache-jena/bin/tdbloader2worker (original)
+++ jena/trunk/apache-jena/bin/tdbloader2worker Fri Aug 10 13:35:52 2012
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
## Licensed to the Apache Software Foundation (ASF) under one
## or more contributor license agreements. See the NOTICE file
@@ -29,8 +29,12 @@ log() { echo " $(date $DATE)" "$@" ; }
TMP=$$
#DATE="+%Y-%m-%dT%H:%M:%S%:z"
DATE="+%H:%M:%S"
-# Classpath set in "tdbloader2"
+##--parallel is not always available.
+SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
if [ -z "$JENA_CP" ]
then
echo "Classpath not provided : set JENA_CP" 1>&2
@@ -71,7 +75,6 @@ log "Data phase"
DATA_TRIPLES="$LOC/data-triples.$TMP"
DATA_QUADS="$LOC/data-quads.$TMP"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
"--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
@@ -92,7 +95,7 @@ process_rows()
fi
log "Index $IDX"
- sort -u $KEYS < "$DATA" > $WORK
+ sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
log "Build $IDX"
rm -f "$LOC/$IDX.dat"
rm -f "$LOC/$IDX.idn"
Modified: jena/trunk/jena-tdb/bin/tdbloader2
URL: http://svn.apache.org/viewvc/jena/trunk/jena-tdb/bin/tdbloader2?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/jena-tdb/bin/tdbloader2 (original)
+++ jena/trunk/jena-tdb/bin/tdbloader2 Fri Aug 10 13:35:52 2012
@@ -34,4 +34,6 @@ fi
#echo "$TDB_CP"
export JENA_CP="$TDB_CP"
-exec tdbloader2worker "$@"
+## And --parallel=3 if available.
+export SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+exec "$TDBROOT/bin/tdbloader2worker" "$@"
Modified: jena/trunk/jena-tdb/bin/tdbloader2worker
URL: http://svn.apache.org/viewvc/jena/trunk/jena-tdb/bin/tdbloader2worker?rev=1371696&r1=1371695&r2=1371696&view=diff
==============================================================================
--- jena/trunk/jena-tdb/bin/tdbloader2worker (original)
+++ jena/trunk/jena-tdb/bin/tdbloader2worker Fri Aug 10 13:35:52 2012
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
## Licensed to the Apache Software Foundation (ASF) under one
## or more contributor license agreements. See the NOTICE file
@@ -29,8 +29,12 @@ log() { echo " $(date $DATE)" "$@" ; }
TMP=$$
#DATE="+%Y-%m-%dT%H:%M:%S%:z"
DATE="+%H:%M:%S"
-# Classpath set in "tdbloader2"
+##--parallel is not always available.
+SORT_ARGS="${SORT_ARGS:---buffer-size=50%}"
+JVM_ARGS=${JVM_ARGS:--Xmx1200M}
+
+# Classpath set in "tdbloader2"
if [ -z "$JENA_CP" ]
then
echo "Classpath not provided : set JENA_CP" 1>&2
@@ -71,7 +75,6 @@ log "Data phase"
DATA_TRIPLES="$LOC/data-triples.$TMP"
DATA_QUADS="$LOC/data-quads.$TMP"
-JVM_ARGS=${JVM_ARGS:--Xmx1200M}
java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \
"--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES
@@ -92,7 +95,7 @@ process_rows()
fi
log "Index $IDX"
- sort -u $KEYS < "$DATA" > $WORK
+ sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK
log "Build $IDX"
rm -f "$LOC/$IDX.dat"
rm -f "$LOC/$IDX.idn"