You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/08/17 17:10:26 UTC

[47/50] [abbrv] jena git commit: JENA-959 : Add --compress.

JENA-959 : Add --compress.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/43efe529
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/43efe529
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/43efe529

Branch: refs/heads/JENA-491-construct-quads
Commit: 43efe529cb99aeaca6e94e91ac819e5ce4c20c2c
Parents: b2045b2
Author: Andy Seaborne <an...@apache.org>
Authored: Sun Aug 16 17:01:41 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Sun Aug 16 17:01:41 2015 +0100

----------------------------------------------------------------------
 .../main/java/arq/cmdline/ModLangOutput.java    | 33 +++-----
 .../src/main/java/riotcmd/CmdLangParse.java     | 79 ++++++++++----------
 2 files changed, 51 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/43efe529/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java b/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
index 7b12231..a4ed89d 100644
--- a/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
+++ b/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java
@@ -39,6 +39,8 @@ public class ModLangOutput extends ModBase
     protected ArgDecl argOutput       = new ArgDecl(ArgDecl.HasValue, "out", "output") ;
     protected ArgDecl argPretty       = new ArgDecl(ArgDecl.HasValue, "formatted", "pretty", "fmt") ;
     protected ArgDecl argStream       = new ArgDecl(ArgDecl.HasValue, "stream") ;
+    protected ArgDecl argCompress     = new ArgDecl(ArgDecl.NoValue, "compress") ;
+    private boolean compressedOutput = false ;
     private RDFFormat streamOutput    = null ;
     private RDFFormat formattedOutput = null ;
 
@@ -48,6 +50,7 @@ public class ModLangOutput extends ModBase
         cmdLine.add(argOutput,    "--output=FMT",     "Output in the given format, streaming if possible.") ;
         cmdLine.add(argPretty,    "--formatted=FMT",  "Output, using pretty printing (consumes memory)") ;
         cmdLine.add(argStream,    "--stream=FMT",     "Output, using a streaming format") ;
+        cmdLine.add(argCompress,  "--compress=FMT",   "Compress the output with gzip") ;
     }
 
     @Override
@@ -96,6 +99,9 @@ public class ModLangOutput extends ModBase
             }
         }
         
+        if ( cmdLine.contains(argCompress))
+            compressedOutput = true ;
+        
         if ( streamOutput == null && formattedOutput == null )
             streamOutput = RDFFormat.NQUADS ;
     }
@@ -129,28 +135,7 @@ public class ModLangOutput extends ModBase
             out.println("   "+lang.getLabel()) ;
         }
     }
-            // Stream-only code.
-//            if ( ! StreamRDFWriter.registered(output) ) {
-//                // ** Java8
-////                StreamRDFWriter.registered().stream()
-////                    .map(fmt -> fmt.getLang()) 
-////                    .distinct()
-////                    .forEach(x -> System.err.println("   "+x.getLabel())) ;
-//                
-//                System.err.println("Language '"+output.getLabel()+"' can not be used for streamed out (try rdfcat)") ;
-//                System.err.println("Streaming languages are:") ;
-//                Set<Lang> seen = new HashSet<>() ;
-//                for ( RDFFormat fmt : StreamRDFWriter.registered()) {
-//                    if ( seen.contains(fmt.getLang()) )
-//                        continue ;
-//                    seen.add(fmt.getLang()) ;
-//                    System.err.println("   "+fmt.getLang().getLabel()) ;
-//                }
-//                
-//                throw new CmdException("Not a streaming RDF language : '"+langName+"'") ;
-//            }
-//            format = StreamRDFWriter.defaultSerialization(output) ;
-
+    
     public RDFFormat getOutputStreamFormat() {
         return streamOutput ;
     }
@@ -158,4 +143,8 @@ public class ModLangOutput extends ModBase
     public RDFFormat getOutputFormatted() {
         return formattedOutput ;
     }
+    
+    public boolean compressedOutput() {
+        return compressedOutput ;
+    }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/43efe529/jena-arq/src/main/java/riotcmd/CmdLangParse.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/riotcmd/CmdLangParse.java b/jena-arq/src/main/java/riotcmd/CmdLangParse.java
index 754f749..aa04052 100644
--- a/jena-arq/src/main/java/riotcmd/CmdLangParse.java
+++ b/jena-arq/src/main/java/riotcmd/CmdLangParse.java
@@ -18,9 +18,15 @@
 
 package riotcmd;
 
+import java.io.IOException ;
 import java.io.InputStream ;
 import java.io.OutputStream ;
+import java.util.zip.GZIPOutputStream ;
 
+import arq.cmdline.ModLangOutput ;
+import arq.cmdline.ModLangParse ;
+import arq.cmdline.ModSymbol ;
+import arq.cmdline.ModTime ;
 import jena.cmd.ArgDecl ;
 import jena.cmd.CmdException;
 import jena.cmd.CmdGeneral ;
@@ -42,7 +48,6 @@ import org.apache.jena.riot.tokens.Tokenizer ;
 import org.apache.jena.riot.tokens.TokenizerFactory ;
 import org.apache.jena.sparql.core.DatasetGraph ;
 import org.apache.jena.sparql.core.DatasetGraphFactory ;
-import arq.cmdline.* ;
 
 /** Common framework for running RIOT parsers */
 public abstract class CmdLangParse extends CmdGeneral
@@ -62,22 +67,19 @@ public abstract class CmdLangParse extends CmdGeneral
         String getRateName() ;
     }
 
-    static LangHandler langHandlerQuads = new LangHandler()
-    {
+    static LangHandler langHandlerQuads = new LangHandler() {
         @Override
         public String getItemsName()        { return "quads" ; }
         @Override
         public String getRateName()         { return "QPS" ; }
     } ;
-    static LangHandler langHandlerTriples = new LangHandler()
-    {
+    static LangHandler langHandlerTriples = new LangHandler() {
         @Override
         public String getItemsName()        { return "triples" ; }
         @Override
         public String getRateName()         { return "TPS" ; }
     } ;
-    static LangHandler langHandlerAny = new LangHandler()
-    {
+    static LangHandler langHandlerAny = new LangHandler() {
         @Override
         public String getItemsName()        { return "tuples" ; }
         @Override
@@ -104,10 +106,8 @@ public abstract class CmdLangParse extends CmdGeneral
     }
 
     @Override
-    protected String getSummary()
-    {
-        //return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--skip | --stopOnError] file ..." ;
-        return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--out=FORMAT] file ..." ;
+    protected String getSummary() {
+        return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--out=FORMAT] [--compress] file ..." ;
     }
 
     protected long totalMillis = 0 ; 
@@ -115,7 +115,6 @@ public abstract class CmdLangParse extends CmdGeneral
     
     OutputStream output = System.out ;
     StreamRDF outputStream = null ;
-    
 
     @Override
     protected void processModulesAndArgs() {
@@ -125,14 +124,18 @@ public abstract class CmdLangParse extends CmdGeneral
     protected interface PostParseHandler { void postParse(); }
     
     @Override
-    protected void exec()
-    {
+    protected void exec() {
         if ( modLangParse.strictMode() )
             RIOT.setStrictMode(true) ; 
         
         if ( modLangParse.getRDFSVocab() != null )
             setup = new InferenceSetupRDFS(modLangParse.getRDFSVocab()) ;
      
+        if ( modLangOutput.compressedOutput() ) {
+            try { output = new GZIPOutputStream(output, true) ; }
+            catch (IOException e) { IO.exception(e);}
+        }
+            
         outputStream = null ;
         PostParseHandler postParse = null ;
 
@@ -145,20 +148,23 @@ public abstract class CmdLangParse extends CmdGeneral
         
         try {
             if ( super.getPositional().isEmpty() )
-                parseFile("-") ;
-            else
-            {
-                boolean b = super.getPositional().size() > 1 ;
-                for ( String fn : super.getPositional() )
-                {
-                    if ( b && ! super.isQuiet() )
-                        SysRIOT.getLogger().info("File: "+fn) ;
-                    parseFile(fn) ;
+                parseFile("-");
+            else {
+                boolean b = super.getPositional().size() > 1;
+                for ( String fn : super.getPositional() ) {
+                    if ( b && !super.isQuiet() )
+                        SysRIOT.getLogger().info("File: " + fn);
+                    parseFile(fn);
                 }
             }
+            if ( postParse != null )
+                postParse.postParse();
         } finally {
+            if ( output != System.out )
+                IO.close(output) ;
+            else
+                IO.flush(output);    
             System.err.flush() ;
-            System.out.flush() ;
             if ( super.getPositional().size() > 1 && modTime.timingEnabled() )
                 output("Total", totalTuples, totalMillis, langHandlerOverall) ;
         }
@@ -167,8 +173,7 @@ public abstract class CmdLangParse extends CmdGeneral
             postParse.postParse() ;
     }
     
-    public void parseFile(String filename)
-    {
+    public void parseFile(String filename) {
         TypedInputStream in = null ;
         if ( filename.equals("-") ) {
             in = new TypedInputStream(System.in) ;
@@ -182,11 +187,11 @@ public abstract class CmdLangParse extends CmdGeneral
             }
             parseFile(null, filename, in) ;
             IO.close(in) ;
+            
         }
     }
 
-    public void parseFile(String defaultBaseURI, String filename, TypedInputStream in)
-    {   
+    public void parseFile(String defaultBaseURI, String filename, TypedInputStream in) {   
         String baseURI = modLangParse.getBaseIRI() ;
         if ( baseURI == null )
             baseURI = defaultBaseURI ;
@@ -195,8 +200,7 @@ public abstract class CmdLangParse extends CmdGeneral
     
     protected abstract Lang selectLang(String filename, ContentType contentType, Lang dftLang  ) ;
 
-    protected void parseRIOT(String baseURI, String filename, TypedInputStream in)
-    {
+    protected void parseRIOT(String baseURI, String filename, TypedInputStream in) {
         ContentType ct = in.getMediaType() ;
         
         baseURI = SysRIOT.chooseBaseIRI(baseURI, filename) ;
@@ -293,7 +297,6 @@ public abstract class CmdLangParse extends CmdGeneral
         totalTuples += n ;
     }
     
-    
     /** Create a streaming output sink if possible */
     protected StreamRDF createStreamSink() {
         if ( modLangParse.toBitBucket() )
@@ -302,7 +305,8 @@ public abstract class CmdLangParse extends CmdGeneral
         RDFFormat fmt = modLangOutput.getOutputStreamFormat() ;
         if ( fmt == null )
             return null ;
-        return StreamRDFWriter.getWriterStream(System.out, fmt) ;
+        /** Create an accumulating output stream for later pretty printing */        
+        return StreamRDFWriter.getWriterStream(output, fmt) ;
     }
     
     /** Create an accumulating output stream for later pretty printing */
@@ -316,7 +320,7 @@ public abstract class CmdLangParse extends CmdGeneral
                 // Try as dataset, then as graph.
                 WriterDatasetRIOTFactory w = RDFWriterRegistry.getWriterDatasetFactory(fmt) ;
                 if ( w != null ) {
-                    RDFDataMgr.write(System.out, dsg, fmt) ;
+                    RDFDataMgr.write(output, dsg.getDefaultGraph(), fmt) ;
                     return ;
                 }
                 WriterGraphRIOTFactory wg = RDFWriterRegistry.getWriterGraphFactory(fmt) ;
@@ -330,14 +334,12 @@ public abstract class CmdLangParse extends CmdGeneral
         return Pair.create(sink, handler) ;
     }
     
-    protected Tokenizer makeTokenizer(InputStream in)
-    {
+    protected Tokenizer makeTokenizer(InputStream in) {
         Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ;
         return tokenizer ;
     }
     
-    protected void output(String label, long numberTriples, long timeMillis, LangHandler handler)
-    {
+    protected void output(String label, long numberTriples, long timeMillis, LangHandler handler) {
         double timeSec = timeMillis/1000.0 ;
         
         System.out.flush() ;
@@ -349,8 +351,7 @@ public abstract class CmdLangParse extends CmdGeneral
                           handler.getRateName()) ;
     }
     
-    protected void output(String label)
-    {
+    protected void output(String label) {
         System.err.printf("%s : \n", label) ;
     }
 }