You are viewing a plain text version of this content. The canonical link for it is here.
Posted to pr@jena.apache.org by GitBox <gi...@apache.org> on 2021/12/10 09:39:16 UTC

[GitHub] [jena] kinow commented on a change in pull request #1127: Xloader (more)

kinow commented on a change in pull request #1127:
URL: https://github.com/apache/jena/pull/1127#discussion_r766517320



##########
File path: apache-jena/bin/tdb2.xloader
##########
@@ -278,65 +287,94 @@ fi
 JAVA="${JAVA:-java}"
 
 info "Setup:"
-info "  Data:     $DATAFILES"
 info "  Database: $LOC"
-info "  Tmpdir:   $TMPDIR"
+info "  Data:     $DATAFILES"
+info "  TMPDIR:   $TMPDIR"
 
 # Large heap not required.
 JVM_ARGS="${JVM_ARGS:--Xmx2G}"
 
-## Time points.
+## Time point.
 
 TIME_START="$(now)"
 
-## Node table loading.
+## ======== Node table loading.
 if [ "$SYSTEM" == "TDB2" ]; then
+    ## TDB2 only.
+    info
     T="$(now)"
     info "Load node table"
     exec_java $PKG.CmdxBuildNodeTable --loc $LOC --tmpdir "$TMPDIR" $DATAFILES
     TIME_NODE_TABLE=$(($(now)-$T))
 fi
 
-## Ingest data, create workfiles
+## ======== Ingest data, creates workfiles
 info
 info "Ingest data"
 T="$(now)"
 exec_java $PKG.CmdxIngestData --loc $LOC --tmpdir "$TMPDIR" --triples "$TMPDIR/triples.tmp" --quads "$TMPDIR/quads.tmp" $DATAFILES
 TIME_INGEST=$(($(now)-$T))
 
-## @@ triples.tmp quads.tmp
+## ======== Indexes
+INFO="$TMPDIR/load.json"
+
+## Bash assocative array
+declare -A TIME_IDX
 
 function index() {
     local IDX="$1"
+    info
+    info "Build $IDX"
+    local T="$(now)"
     exec_java $PKG.CmdxBuildIndex --loc $LOC --tmpdir "$TMPDIR" --index $IDX \
 	      "$TMPDIR/triples.tmp" "$TMPDIR/quads.tmp"
+    local T_IDX=$(($(now)-$T))
+    TIME_IDX[$IDX]=$T_IDX
 }
 
-info
-info "Build SPO"
-T="$(now)"
-index SPO
-TIME_IDX_SPO=$(($(now)-$T))
+## Decide which indexes to generate.
+TRIPLES_DFT="SPO POS OSP"
+QUADS_DFT="GSPO GPOS GOSP SPOG POSG OSPG"
 
-info
-info "Build POS"
-T="$(now)"
-index POS
-TIME_IDX_POS=$(($(now)-$T))
+TRIPLES_IDX="${TRIPLES_IDX:-$TRIPLES_DFT}"
+QUADS_IDX="${QUADS_IDX:-$QUADS_DFT}"
 
-info
-info "Build OSP"
-T="$(now)"
-index OSP
-let TIME_IDX_OSP=$(($(now)-$T))
+if [ -e "$INFO" ] ; then
+    ## Skip a phase if there are no items to index.
+    TRIPLES="$(jq .triples < $INFO)"

Review comment:
       Or tell users that somehow they must install it beforehand. In containers it's common to have a very limited number of utilities installed.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: pr-unsubscribe@jena.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: pr-unsubscribe@jena.apache.org
For additional commands, e-mail: pr-help@jena.apache.org