You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2020/11/24 23:42:27 UTC

[jena] branch windows-file updated (13dffc1 -> c8c91f8)

This is an automated email from the ASF dual-hosted git repository.

andy pushed a change to branch windows-file
in repository https://gitbox.apache.org/repos/asf/jena.git.


 discard 13dffc1  JENA-2003: Handle file URIs with URI scheme name
     new c8c91f8  JENA-2003: Handle file URIs with URI scheme name

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (13dffc1)
            \
             N -- N -- N   refs/heads/windows-file (c8c91f8)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 jena-base/src/main/java/org/apache/jena/atlas/io/IO.java | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)


[jena] 01/01: JENA-2003: Handle file URIs with URI scheme name

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch windows-file
in repository https://gitbox.apache.org/repos/asf/jena.git

commit c8c91f8b2e1c4b54e004b55f2cf50f8feb8b4b90
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Tue Nov 24 22:43:22 2020 +0000

    JENA-2003: Handle file URIs with URI scheme name
---
 .../main/java/org/apache/jena/riot/RDFDataMgr.java |  2 +-
 .../java/org/apache/jena/riot/RDFLanguages.java    | 28 ++++----
 .../jena/riot/system/stream/LocatorFile.java       | 39 +++++------
 .../src/main/java/org/apache/jena/atlas/io/IO.java | 76 ++++++++++++++++++----
 .../java/org/apache/jena/atlas/lib/IRILib.java     |  2 +-
 .../java/org/apache/jena/fuseki/system/Upload.java |  4 +-
 6 files changed, 103 insertions(+), 48 deletions(-)

diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
index 6e98e1a..aa432af 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
@@ -707,7 +707,7 @@ public class RDFDataMgr
         if ( base == null )
             base = SysRIOT.chooseBaseIRI(uri) ;
         if ( hintLang == null )
-            hintLang = RDFLanguages.filenameToLang(uri) ;
+            hintLang = RDFLanguages.pathnameToLang(uri) ;
         parseFromURI(sink, uri, base, hintLang, context);
     }
 
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
index 0c8393e..33a1a38 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
@@ -28,7 +28,8 @@ import org.apache.jena.atlas.web.ContentType ;
 import org.apache.jena.atlas.web.MediaType ;
 import org.apache.jena.util.FileUtils ;
 
-/** Central registry of RDF languages and syntaxes.
+/**
+ * Central registry of RDF languages and syntaxes.
  * @see RDFParserRegistry
  * @see RDFFormat
  */
@@ -399,31 +400,34 @@ public class RDFLanguages
     }
 
     /** Try to map a resource name to a {@link Lang}; return null on no registered mapping */
-    public static Lang resourceNameToLang(String resourceName) { return filenameToLang(resourceName) ; }
+    public static Lang resourceNameToLang(String resourceName) { return pathnameToLang(resourceName) ; }
 
     /** Try to map a resource name to a {@link Lang}; return the given default where there is no registered mapping */
     public static Lang resourceNameToLang(String resourceName, Lang dftLang) { return filenameToLang(resourceName, dftLang) ; }
 
-    /** Try to map a URI or file name to a {@link Lang}; return null on no registered mapping. */
-    public static Lang filenameToLang(String filename)
+    /** Try to map a file name to a {@link Lang}; return null on no registered mapping. */
+    public static Lang filenameToLang(String uriOrFilename) { return pathnameToLang(uriOrFilename); }
+
+    /** Try to map a URI or URI path name to a {@link Lang}; return null on no registered mapping. */
+    public static Lang pathnameToLang(String pathname)
     {
-        if ( filename == null )
+        if ( pathname == null )
             return null;
         // Remove any URI fragment (there can be only one # in a URI).
         // Pragmatically, assume any # is URI related.
         // URIs can be relative.
-        int iHash = filename.indexOf('#');
+        int iHash = pathname.indexOf('#');
         if ( iHash  > 0 )
-            filename = filename.substring(0, iHash);
-        // Gzip or BZip2 compressed?
-        filename = IO.filenameNoCompression(filename);
-        return fileExtToLang(FileUtils.getFilenameExt(filename));
+            pathname = pathname.substring(0, iHash);
+        // Compressed?
+        pathname = IO.filenameNoCompression(pathname);
+        return fileExtToLang(FileUtils.getFilenameExt(pathname));
     }
 
     /** Try to map a file name to a {@link Lang}; return the given default where there is no registered mapping */
     public static Lang filenameToLang(String filename, Lang dftLang)
     {
-        Lang lang = filenameToLang(filename) ;
+        Lang lang = pathnameToLang(filename) ;
         return (lang == null) ? dftLang : lang ;
     }
 
@@ -449,7 +453,7 @@ public class RDFLanguages
     {
         if ( resourceName == null )
             return null ;
-        Lang lang = filenameToLang(resourceName) ;
+        Lang lang = pathnameToLang(resourceName) ;
         if ( lang == null )
             return null ;
         return lang.getContentType() ;
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
index 984846a..9b45f4f 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
@@ -34,14 +34,11 @@ import org.slf4j.LoggerFactory ;
 
 /** Location files in the filing system.
  *  A FileLocator can have a "current directory" - this is separate from any
- *  location mapping (see @link{LocationMapping}) as it applies only to files.
+ *  location mapping (see {@link LocationMapper}) as it applies only to files.
  */
 
 public class LocatorFile implements Locator
 {
-    // Implementation note:
-    // Java7: Path.resolve may provide an answer from the intricies of MS Windows
-    
     static Logger log = LoggerFactory.getLogger(LocatorFile.class) ;
     private final String thisDir ;
     private final String thisDirLogStr ;
@@ -50,7 +47,7 @@ public class LocatorFile implements Locator
      * Relative file names are relative to the working directory of the JVM.
      */
     public LocatorFile() { this(null) ; }
-    
+
     /** Create a LocatorFile that uses the argument as it's working directory.
      * <p>
      * The working directory should be a UNIX style file name,
@@ -58,7 +55,7 @@ public class LocatorFile implements Locator
      * <p>
      * For MS Window, if asked to {@link #open} a file name with a drive letter,
      * the code assumes it is not relative to the working directory
-     * of this {@code LocatorFile}.  
+     * of this {@code LocatorFile}.
      */
     public LocatorFile(String dir)
     {
@@ -74,24 +71,24 @@ public class LocatorFile implements Locator
     }
 
     /** Processing the filename for file: or relative filename
-     *  and return a filename suitable for file operations. 
+     *  and return a filename suitable for file operations.
      */
     public String toFileName(String filenameIRI)
     {
-        // Do not use directly : it will ignore the directory. 
+        // Do not use directly : it will ignore the directory.
         //IRILib.filenameToIRI
-        
+
         String scheme = FileUtils.getScheme(filenameIRI) ;
         String fn = filenameIRI ;
         // Windows : C:\\ is not a scheme name!
-        if ( scheme != null ) 
+        if ( scheme != null )
         {
             if ( scheme.length() == 1 )
             {
                 // Not perfect for MS Windows but if thisDir is set then
                 // the main use case is resolving relative (no drive)
                 // filenames against thisDir. Treat the presence of a
-                // drive letter as making this a JVM relative filename. 
+                // drive letter as making this a JVM relative filename.
                 return fn ;
             }
             else if ( scheme.length() > 1 )
@@ -101,14 +98,14 @@ public class LocatorFile implements Locator
                     return null ;
                 fn = IRILib.IRIToFilename(filenameIRI) ;
                 // fall through
-            } 
+            }
         }
         // fn is the file name to use.
         return absolute(fn) ;
     }
 
     /** Make a filename (no URI scheme, no windows drive) absolute if there is
-     * a setting for directory name thisDir  
+     * a setting for directory name thisDir
      */
     private String absolute(String fn)
     {
@@ -116,7 +113,7 @@ public class LocatorFile implements Locator
             fn = thisDir+File.separator+fn ;
         return fn ;
     }
-    
+
     public String getThisDir()
     {
         return thisDir ;
@@ -133,10 +130,10 @@ public class LocatorFile implements Locator
         String fn = toFileName(fileIRI) ;
         if ( fn == null )
             return false ;
-        
+
         return exists$(fn) ;
     }
-    
+
     private boolean exists$(String fn)
     {
         if ( fn.equals("-") )
@@ -144,14 +141,14 @@ public class LocatorFile implements Locator
         return new File(fn).exists() ;
     }
 
-    /** Open anything that looks a bit like a file name */ 
+    /** Open anything that looks a bit like a file name */
     @Override
     public TypedInputStream open(String filenameIRI)
     {
         String fn = toFileName(filenameIRI) ;
         if ( fn == null )
             return null ;
-        
+
         try {
             if ( ! exists$(fn) )
             {
@@ -163,13 +160,13 @@ public class LocatorFile implements Locator
             log.warn("Security problem testing for file", e);
             return null;
         }
-        
+
         try {
             InputStream in = IO.openFileEx(fn) ;
 
             if ( StreamManager.logAllLookups && log.isTraceEnabled() )
                 log.trace("Found: "+filenameIRI+thisDirLogStr) ;
-            
+
             ContentType ct = RDFLanguages.guessContentType(filenameIRI) ;
             return new TypedInputStream(in, ct, filenameIRI) ;
         } catch (IOException ioEx)
@@ -180,7 +177,7 @@ public class LocatorFile implements Locator
             return null ;
         }
     }
-    
+
     @Override
     public String getName()
     {
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
index fbc5e15..8b2f336 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
@@ -30,7 +30,6 @@ import java.util.zip.GZIPOutputStream;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
-import org.apache.commons.io.FilenameUtils;
 import org.apache.jena.atlas.RuntimeIOException;
 import org.apache.jena.atlas.lib.IRILib;
 import org.apache.jena.atlas.lib.StrUtils;
@@ -69,6 +68,10 @@ public class IO
         } catch (IOException ex) { IO.exception(ex); return null; }
     }
 
+    private static final String ext_gz = "gz";
+    private static final String ext_bz2 = "bz2";
+    private static final String ext_sz = "sz";
+
     /** Open an input stream to a file; do not mask IOExceptions.
      * If the filename is null or "-", return System.in
      * If the filename ends in .gz, wrap in GZIPInputStream
@@ -85,24 +88,75 @@ public class IO
             filename = IRILib.decodeHex(filename);
         }
         InputStream in = new FileInputStream(filename);
-        String ext = FilenameUtils.getExtension(filename);
+        String ext = getExtension(filename);
         switch ( ext ) {
             case "":        return in;
-            case "gz":      return new GZIPInputStream(in);
-            case "bz2":     return new BZip2CompressorInputStream(in);
-            case "sz":      return new SnappyCompressorInputStream(in);
+            case ext_gz:    return new GZIPInputStream(in);
+            case ext_bz2:   return new BZip2CompressorInputStream(in);
+            case ext_sz:    return new SnappyCompressorInputStream(in);
         }
         return in;
     }
 
-    private static String[] extensions = { "gz", "bz2", "sz" };
+    // ---- Extracted from Apache CommonsIO : FilenameUtils (2.8.0) because of the drive letter handling.
+    private static final int NOT_FOUND = -1;
+    private static final String EMPTY_STRING = "";
+    private static final String EXTENSION_SEPARATOR = ".";
+    private static final char UNIX_SEPARATOR = '/';
+    private static final char WINDOWS_SEPARATOR = '\\';
+
+    private static int indexOfLastSeparator(final String fileName) {
+        if (fileName == null) {
+            return NOT_FOUND;
+        }
+        final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR);
+        final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR);
+        return Math.max(lastUnixPos, lastWindowsPos);
+    }
 
-    /** The filename without any compression extension, or the original filename.
-     *  It tests for compression types handled by {@link #openFileEx}.
+    private static int indexOfExtension(final String fileName) throws IllegalArgumentException {
+        if (fileName == null) {
+            return NOT_FOUND;
+        }
+//        if (isSystemWindows()) {
+//            // Special handling for NTFS ADS: Don't accept colon in the fileName.
+//            final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
+//            if (offset != -1) {
+//                throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
+//            }
+//        }
+        final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
+        final int lastSeparator = indexOfLastSeparator(fileName);
+        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
+    }
+
+    private static String getExtension(final String fileName) {
+        if (fileName == null) {
+            return null;
+        }
+        final int index = indexOfExtension(fileName);
+        if (index == -1) {
+            return "";
+        }
+        return fileName.substring(index + 1);
+    }
+
+    // ---- Apache CommonsIO : FilenameUtils
+
+    /**
+     * The filename without any compression extension, or the original filename.
+     * It tests for compression types handled by {@link #openFileEx}.
      */
     static public String filenameNoCompression(String filename) {
-        if ( FilenameUtils.isExtension(filename, extensions) ) {
-            return FilenameUtils.removeExtension(filename);
+        String ext = getExtension(filename);
+        switch ( ext ) {
+            case EMPTY_STRING:
+                return filename;
+            case ext_gz:
+            case ext_bz2:
+            case ext_sz:
+                // +1 for the "."
+                return filename.substring(0, filename.length()-(ext.length()+1));
         }
         return filename;
     }
@@ -180,7 +234,7 @@ public class IO
             filename = IRILib.decodeHex(filename);
         }
         OutputStream out = new FileOutputStream(filename);
-        String ext = FilenameUtils.getExtension(filename);
+        String ext = getExtension(filename);
         switch ( ext ) {
             case "":        return out;
             case "gz":      return new GZIPOutputStream(out);
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
index 878eca1..3d35dd8 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
@@ -181,7 +181,7 @@ public class IRILib
         // so need strip the leading "/"
         fn = fixupWindows(fn);
 
-        return decode(fn) ;
+        return decodeHex(fn) ;
     }
 
     /** Convert a plain file name (no file:) to a file: URL */
diff --git a/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java b/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
index ae619fd..034ed31 100644
--- a/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
+++ b/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
@@ -134,7 +134,7 @@ public class Upload {
                     String name = fileStream.getName();
                     if ( name == null || name.equals("") )
                         ServletOps.errorBadRequest("No name for content - can't determine RDF syntax");
-                    lang = RDFLanguages.filenameToLang(name);
+                    lang = RDFLanguages.pathnameToLang(name);
                     if (name.endsWith(".gz"))
                         input = new GZIPInputStream(input);
                 }
@@ -240,7 +240,7 @@ public class Upload {
 
                     lang = RDFLanguages.contentTypeToLang(ct.getContentTypeStr());
                     if ( lang == null ) {
-                        lang = RDFLanguages.filenameToLang(name);
+                        lang = RDFLanguages.pathnameToLang(name);
 
                         // JENA-600 filenameToLang() strips off certain
                         // extensions such as .gz and