You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/08/16 18:03:47 UTC
jena git commit: JENA-959 : Add --compress.
Repository: jena
Updated Branches:
refs/heads/master b2045b222 -> 43efe529c
JENA-959 : Add --compress.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/43efe529
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/43efe529
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/43efe529
Branch: refs/heads/master
Commit: 43efe529cb99aeaca6e94e91ac819e5ce4c20c2c
Parents: b2045b2
Author: Andy Seaborne <an...@apache.org>
Authored: Sun Aug 16 17:01:41 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Sun Aug 16 17:01:41 2015 +0100
----------------------------------------------------------------------
.../main/java/arq/cmdline/ModLangOutput.java | 33 +++-----
.../src/main/java/riotcmd/CmdLangParse.java | 79 ++++++++++----------
2 files changed, 51 insertions(+), 61 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/43efe529/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java b/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
index 7b12231..a4ed89d 100644
--- a/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
+++ b/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
@@ -39,6 +39,8 @@ public class ModLangOutput extends ModBase
protected ArgDecl argOutput = new ArgDecl(ArgDecl.HasValue, "out", "output") ;
protected ArgDecl argPretty = new ArgDecl(ArgDecl.HasValue, "formatted", "pretty", "fmt") ;
protected ArgDecl argStream = new ArgDecl(ArgDecl.HasValue, "stream") ;
+ protected ArgDecl argCompress = new ArgDecl(ArgDecl.NoValue, "compress") ;
+ private boolean compressedOutput = false ;
private RDFFormat streamOutput = null ;
private RDFFormat formattedOutput = null ;
@@ -48,6 +50,7 @@ public class ModLangOutput extends ModBase
cmdLine.add(argOutput, "--output=FMT", "Output in the given format, streaming if possible.") ;
cmdLine.add(argPretty, "--formatted=FMT", "Output, using pretty printing (consumes memory)") ;
cmdLine.add(argStream, "--stream=FMT", "Output, using a streaming format") ;
+ cmdLine.add(argCompress, "--compress=FMT", "Compress the output with gzip") ;
}
@Override
@@ -96,6 +99,9 @@ public class ModLangOutput extends ModBase
}
}
+ if ( cmdLine.contains(argCompress))
+ compressedOutput = true ;
+
if ( streamOutput == null && formattedOutput == null )
streamOutput = RDFFormat.NQUADS ;
}
@@ -129,28 +135,7 @@ public class ModLangOutput extends ModBase
out.println(" "+lang.getLabel()) ;
}
}
- // Stream-only code.
-// if ( ! StreamRDFWriter.registered(output) ) {
-// // ** Java8
-//// StreamRDFWriter.registered().stream()
-//// .map(fmt -> fmt.getLang())
-//// .distinct()
-//// .forEach(x -> System.err.println(" "+x.getLabel())) ;
-//
-// System.err.println("Language '"+output.getLabel()+"' can not be used for streamed out (try rdfcat)") ;
-// System.err.println("Streaming languages are:") ;
-// Set<Lang> seen = new HashSet<>() ;
-// for ( RDFFormat fmt : StreamRDFWriter.registered()) {
-// if ( seen.contains(fmt.getLang()) )
-// continue ;
-// seen.add(fmt.getLang()) ;
-// System.err.println(" "+fmt.getLang().getLabel()) ;
-// }
-//
-// throw new CmdException("Not a streaming RDF language : '"+langName+"'") ;
-// }
-// format = StreamRDFWriter.defaultSerialization(output) ;
-
+
public RDFFormat getOutputStreamFormat() {
return streamOutput ;
}
@@ -158,4 +143,8 @@ public class ModLangOutput extends ModBase
public RDFFormat getOutputFormatted() {
return formattedOutput ;
}
+
+ public boolean compressedOutput() {
+ return compressedOutput ;
+ }
}
http://git-wip-us.apache.org/repos/asf/jena/blob/43efe529/jena-arq/src/main/java/riotcmd/CmdLangParse.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/riotcmd/CmdLangParse.java b/jena-arq/src/main/java/riotcmd/CmdLangParse.java
index 754f749..aa04052 100644
--- a/jena-arq/src/main/java/riotcmd/CmdLangParse.java
+++ b/jena-arq/src/main/java/riotcmd/CmdLangParse.java
@@ -18,9 +18,15 @@
package riotcmd;
+import java.io.IOException ;
import java.io.InputStream ;
import java.io.OutputStream ;
+import java.util.zip.GZIPOutputStream ;
+import arq.cmdline.ModLangOutput ;
+import arq.cmdline.ModLangParse ;
+import arq.cmdline.ModSymbol ;
+import arq.cmdline.ModTime ;
import jena.cmd.ArgDecl ;
import jena.cmd.CmdException;
import jena.cmd.CmdGeneral ;
@@ -42,7 +48,6 @@ import org.apache.jena.riot.tokens.Tokenizer ;
import org.apache.jena.riot.tokens.TokenizerFactory ;
import org.apache.jena.sparql.core.DatasetGraph ;
import org.apache.jena.sparql.core.DatasetGraphFactory ;
-import arq.cmdline.* ;
/** Common framework for running RIOT parsers */
public abstract class CmdLangParse extends CmdGeneral
@@ -62,22 +67,19 @@ public abstract class CmdLangParse extends CmdGeneral
String getRateName() ;
}
- static LangHandler langHandlerQuads = new LangHandler()
- {
+ static LangHandler langHandlerQuads = new LangHandler() {
@Override
public String getItemsName() { return "quads" ; }
@Override
public String getRateName() { return "QPS" ; }
} ;
- static LangHandler langHandlerTriples = new LangHandler()
- {
+ static LangHandler langHandlerTriples = new LangHandler() {
@Override
public String getItemsName() { return "triples" ; }
@Override
public String getRateName() { return "TPS" ; }
} ;
- static LangHandler langHandlerAny = new LangHandler()
- {
+ static LangHandler langHandlerAny = new LangHandler() {
@Override
public String getItemsName() { return "tuples" ; }
@Override
@@ -104,10 +106,8 @@ public abstract class CmdLangParse extends CmdGeneral
}
@Override
- protected String getSummary()
- {
- //return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--skip | --stopOnError] file ..." ;
- return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--out=FORMAT] file ..." ;
+ protected String getSummary() {
+ return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--out=FORMAT] [--compress] file ..." ;
}
protected long totalMillis = 0 ;
@@ -115,7 +115,6 @@ public abstract class CmdLangParse extends CmdGeneral
OutputStream output = System.out ;
StreamRDF outputStream = null ;
-
@Override
protected void processModulesAndArgs() {
@@ -125,14 +124,18 @@ public abstract class CmdLangParse extends CmdGeneral
protected interface PostParseHandler { void postParse(); }
@Override
- protected void exec()
- {
+ protected void exec() {
if ( modLangParse.strictMode() )
RIOT.setStrictMode(true) ;
if ( modLangParse.getRDFSVocab() != null )
setup = new InferenceSetupRDFS(modLangParse.getRDFSVocab()) ;
+ if ( modLangOutput.compressedOutput() ) {
+ try { output = new GZIPOutputStream(output, true) ; }
+ catch (IOException e) { IO.exception(e);}
+ }
+
outputStream = null ;
PostParseHandler postParse = null ;
@@ -145,20 +148,23 @@ public abstract class CmdLangParse extends CmdGeneral
try {
if ( super.getPositional().isEmpty() )
- parseFile("-") ;
- else
- {
- boolean b = super.getPositional().size() > 1 ;
- for ( String fn : super.getPositional() )
- {
- if ( b && ! super.isQuiet() )
- SysRIOT.getLogger().info("File: "+fn) ;
- parseFile(fn) ;
+ parseFile("-");
+ else {
+ boolean b = super.getPositional().size() > 1;
+ for ( String fn : super.getPositional() ) {
+ if ( b && !super.isQuiet() )
+ SysRIOT.getLogger().info("File: " + fn);
+ parseFile(fn);
}
}
+ if ( postParse != null )
+ postParse.postParse();
} finally {
+ if ( output != System.out )
+ IO.close(output) ;
+ else
+ IO.flush(output);
System.err.flush() ;
- System.out.flush() ;
if ( super.getPositional().size() > 1 && modTime.timingEnabled() )
output("Total", totalTuples, totalMillis, langHandlerOverall) ;
}
@@ -167,8 +173,7 @@ public abstract class CmdLangParse extends CmdGeneral
postParse.postParse() ;
}
- public void parseFile(String filename)
- {
+ public void parseFile(String filename) {
TypedInputStream in = null ;
if ( filename.equals("-") ) {
in = new TypedInputStream(System.in) ;
@@ -182,11 +187,11 @@ public abstract class CmdLangParse extends CmdGeneral
}
parseFile(null, filename, in) ;
IO.close(in) ;
+
}
}
- public void parseFile(String defaultBaseURI, String filename, TypedInputStream in)
- {
+ public void parseFile(String defaultBaseURI, String filename, TypedInputStream in) {
String baseURI = modLangParse.getBaseIRI() ;
if ( baseURI == null )
baseURI = defaultBaseURI ;
@@ -195,8 +200,7 @@ public abstract class CmdLangParse extends CmdGeneral
protected abstract Lang selectLang(String filename, ContentType contentType, Lang dftLang ) ;
- protected void parseRIOT(String baseURI, String filename, TypedInputStream in)
- {
+ protected void parseRIOT(String baseURI, String filename, TypedInputStream in) {
ContentType ct = in.getMediaType() ;
baseURI = SysRIOT.chooseBaseIRI(baseURI, filename) ;
@@ -293,7 +297,6 @@ public abstract class CmdLangParse extends CmdGeneral
totalTuples += n ;
}
-
/** Create a streaming output sink if possible */
protected StreamRDF createStreamSink() {
if ( modLangParse.toBitBucket() )
@@ -302,7 +305,8 @@ public abstract class CmdLangParse extends CmdGeneral
RDFFormat fmt = modLangOutput.getOutputStreamFormat() ;
if ( fmt == null )
return null ;
- return StreamRDFWriter.getWriterStream(System.out, fmt) ;
+ /** Create an accumulating output stream for later pretty printing */
+ return StreamRDFWriter.getWriterStream(output, fmt) ;
}
/** Create an accumulating output stream for later pretty printing */
@@ -316,7 +320,7 @@ public abstract class CmdLangParse extends CmdGeneral
// Try as dataset, then as graph.
WriterDatasetRIOTFactory w = RDFWriterRegistry.getWriterDatasetFactory(fmt) ;
if ( w != null ) {
- RDFDataMgr.write(System.out, dsg, fmt) ;
+ RDFDataMgr.write(output, dsg.getDefaultGraph(), fmt) ;
return ;
}
WriterGraphRIOTFactory wg = RDFWriterRegistry.getWriterGraphFactory(fmt) ;
@@ -330,14 +334,12 @@ public abstract class CmdLangParse extends CmdGeneral
return Pair.create(sink, handler) ;
}
- protected Tokenizer makeTokenizer(InputStream in)
- {
+ protected Tokenizer makeTokenizer(InputStream in) {
Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ;
return tokenizer ;
}
- protected void output(String label, long numberTriples, long timeMillis, LangHandler handler)
- {
+ protected void output(String label, long numberTriples, long timeMillis, LangHandler handler) {
double timeSec = timeMillis/1000.0 ;
System.out.flush() ;
@@ -349,8 +351,7 @@ public abstract class CmdLangParse extends CmdGeneral
handler.getRateName()) ;
}
- protected void output(String label)
- {
+ protected void output(String label) {
System.err.printf("%s : \n", label) ;
}
}