You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2020/11/24 22:52:37 UTC

[jena] 01/01: JENA-2003: Handle file URIs with URI scheme name

This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch windows-file
in repository https://gitbox.apache.org/repos/asf/jena.git

commit 13dffc12e7e0877ceb9c13f86f1e076608ddf887
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Tue Nov 24 22:43:22 2020 +0000

    JENA-2003: Handle file URIs with URI scheme name
---
 .../main/java/org/apache/jena/riot/RDFDataMgr.java |  2 +-
 .../java/org/apache/jena/riot/RDFLanguages.java    | 28 ++++----
 .../jena/riot/system/stream/LocatorFile.java       | 39 +++++------
 .../src/main/java/org/apache/jena/atlas/io/IO.java | 76 +++++++++++++++++++---
 .../java/org/apache/jena/atlas/lib/IRILib.java     |  2 +-
 .../java/org/apache/jena/fuseki/system/Upload.java |  4 +-
 6 files changed, 105 insertions(+), 46 deletions(-)

diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
index 6e98e1a..aa432af 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
@@ -707,7 +707,7 @@ public class RDFDataMgr
         if ( base == null )
             base = SysRIOT.chooseBaseIRI(uri) ;
         if ( hintLang == null )
-            hintLang = RDFLanguages.filenameToLang(uri) ;
+            hintLang = RDFLanguages.pathnameToLang(uri) ;
         parseFromURI(sink, uri, base, hintLang, context);
     }
 
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
index 0c8393e..33a1a38 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
@@ -28,7 +28,8 @@ import org.apache.jena.atlas.web.ContentType ;
 import org.apache.jena.atlas.web.MediaType ;
 import org.apache.jena.util.FileUtils ;
 
-/** Central registry of RDF languages and syntaxes.
+/**
+ * Central registry of RDF languages and syntaxes.
  * @see RDFParserRegistry
  * @see RDFFormat
  */
@@ -399,31 +400,34 @@ public class RDFLanguages
     }
 
     /** Try to map a resource name to a {@link Lang}; return null on no registered mapping */
-    public static Lang resourceNameToLang(String resourceName) { return filenameToLang(resourceName) ; }
+    public static Lang resourceNameToLang(String resourceName) { return pathnameToLang(resourceName) ; }
 
     /** Try to map a resource name to a {@link Lang}; return the given default where there is no registered mapping */
     public static Lang resourceNameToLang(String resourceName, Lang dftLang) { return filenameToLang(resourceName, dftLang) ; }
 
-    /** Try to map a URI or file name to a {@link Lang}; return null on no registered mapping. */
-    public static Lang filenameToLang(String filename)
+    /** Try to map a file name to a {@link Lang}; return null on no registered mapping. */
+    public static Lang filenameToLang(String uriOrFilename) { return pathnameToLang(uriOrFilename); }
+
+    /** Try to map a URI or URI path name to a {@link Lang}; return null on no registered mapping. */
+    public static Lang pathnameToLang(String pathname)
     {
-        if ( filename == null )
+        if ( pathname == null )
             return null;
         // Remove any URI fragment (there can be only one # in a URI).
         // Pragmatically, assume any # is URI related.
         // URIs can be relative.
-        int iHash = filename.indexOf('#');
+        int iHash = pathname.indexOf('#');
         if ( iHash  > 0 )
-            filename = filename.substring(0, iHash);
-        // Gzip or BZip2 compressed?
-        filename = IO.filenameNoCompression(filename);
-        return fileExtToLang(FileUtils.getFilenameExt(filename));
+            pathname = pathname.substring(0, iHash);
+        // Compressed?
+        pathname = IO.filenameNoCompression(pathname);
+        return fileExtToLang(FileUtils.getFilenameExt(pathname));
     }
 
     /** Try to map a file name to a {@link Lang}; return the given default where there is no registered mapping */
     public static Lang filenameToLang(String filename, Lang dftLang)
     {
-        Lang lang = filenameToLang(filename) ;
+        Lang lang = pathnameToLang(filename) ;
         return (lang == null) ? dftLang : lang ;
     }
 
@@ -449,7 +453,7 @@ public class RDFLanguages
     {
         if ( resourceName == null )
             return null ;
-        Lang lang = filenameToLang(resourceName) ;
+        Lang lang = pathnameToLang(resourceName) ;
         if ( lang == null )
             return null ;
         return lang.getContentType() ;
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
index 984846a..9b45f4f 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
@@ -34,14 +34,11 @@ import org.slf4j.LoggerFactory ;
 
 /** Location files in the filing system.
  *  A FileLocator can have a "current directory" - this is separate from any
- *  location mapping (see @link{LocationMapping}) as it applies only to files.
+ *  location mapping (see {@link LocationMapper}) as it applies only to files.
  */
 
 public class LocatorFile implements Locator
 {
-    // Implementation note:
-    // Java7: Path.resolve may provide an answer from the intricies of MS Windows
-    
     static Logger log = LoggerFactory.getLogger(LocatorFile.class) ;
     private final String thisDir ;
     private final String thisDirLogStr ;
@@ -50,7 +47,7 @@ public class LocatorFile implements Locator
      * Relative file names are relative to the working directory of the JVM.
      */
     public LocatorFile() { this(null) ; }
-    
+
     /** Create a LocatorFile that uses the argument as it's working directory.
      * <p>
      * The working directory should be a UNIX style file name,
@@ -58,7 +55,7 @@ public class LocatorFile implements Locator
      * <p>
      * For MS Window, if asked to {@link #open} a file name with a drive letter,
      * the code assumes it is not relative to the working directory
-     * of this {@code LocatorFile}.  
+     * of this {@code LocatorFile}.
      */
     public LocatorFile(String dir)
     {
@@ -74,24 +71,24 @@ public class LocatorFile implements Locator
     }
 
     /** Processing the filename for file: or relative filename
-     *  and return a filename suitable for file operations. 
+     *  and return a filename suitable for file operations.
      */
     public String toFileName(String filenameIRI)
     {
-        // Do not use directly : it will ignore the directory. 
+        // Do not use directly : it will ignore the directory.
         //IRILib.filenameToIRI
-        
+
         String scheme = FileUtils.getScheme(filenameIRI) ;
         String fn = filenameIRI ;
         // Windows : C:\\ is not a scheme name!
-        if ( scheme != null ) 
+        if ( scheme != null )
         {
             if ( scheme.length() == 1 )
             {
                 // Not perfect for MS Windows but if thisDir is set then
                 // the main use case is resolving relative (no drive)
                 // filenames against thisDir. Treat the presence of a
-                // drive letter as making this a JVM relative filename. 
+                // drive letter as making this a JVM relative filename.
                 return fn ;
             }
             else if ( scheme.length() > 1 )
@@ -101,14 +98,14 @@ public class LocatorFile implements Locator
                     return null ;
                 fn = IRILib.IRIToFilename(filenameIRI) ;
                 // fall through
-            } 
+            }
         }
         // fn is the file name to use.
         return absolute(fn) ;
     }
 
     /** Make a filename (no URI scheme, no windows drive) absolute if there is
-     * a setting for directory name thisDir  
+     * a setting for directory name thisDir
      */
     private String absolute(String fn)
     {
@@ -116,7 +113,7 @@ public class LocatorFile implements Locator
             fn = thisDir+File.separator+fn ;
         return fn ;
     }
-    
+
     public String getThisDir()
     {
         return thisDir ;
@@ -133,10 +130,10 @@ public class LocatorFile implements Locator
         String fn = toFileName(fileIRI) ;
         if ( fn == null )
             return false ;
-        
+
         return exists$(fn) ;
     }
-    
+
     private boolean exists$(String fn)
     {
         if ( fn.equals("-") )
@@ -144,14 +141,14 @@ public class LocatorFile implements Locator
         return new File(fn).exists() ;
     }
 
-    /** Open anything that looks a bit like a file name */ 
+    /** Open anything that looks a bit like a file name */
     @Override
     public TypedInputStream open(String filenameIRI)
     {
         String fn = toFileName(filenameIRI) ;
         if ( fn == null )
             return null ;
-        
+
         try {
             if ( ! exists$(fn) )
             {
@@ -163,13 +160,13 @@ public class LocatorFile implements Locator
             log.warn("Security problem testing for file", e);
             return null;
         }
-        
+
         try {
             InputStream in = IO.openFileEx(fn) ;
 
             if ( StreamManager.logAllLookups && log.isTraceEnabled() )
                 log.trace("Found: "+filenameIRI+thisDirLogStr) ;
-            
+
             ContentType ct = RDFLanguages.guessContentType(filenameIRI) ;
             return new TypedInputStream(in, ct, filenameIRI) ;
         } catch (IOException ioEx)
@@ -180,7 +177,7 @@ public class LocatorFile implements Locator
             return null ;
         }
     }
-    
+
     @Override
     public String getName()
     {
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
index fbc5e15..dd57a87 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
@@ -69,6 +69,10 @@ public class IO
         } catch (IOException ex) { IO.exception(ex); return null; }
     }
 
+    private static final String ext_gz = "gz";
+    private static final String ext_bz2 = "bz2";
+    private static final String ext_sz = "sz";
+
     /** Open an input stream to a file; do not mask IOExceptions.
      * If the filename is null or "-", return System.in
      * If the filename ends in .gz, wrap in GZIPInputStream
@@ -85,24 +89,78 @@ public class IO
             filename = IRILib.decodeHex(filename);
         }
         InputStream in = new FileInputStream(filename);
-        String ext = FilenameUtils.getExtension(filename);
+        String ext = getExtension(filename);
         switch ( ext ) {
             case "":        return in;
-            case "gz":      return new GZIPInputStream(in);
-            case "bz2":     return new BZip2CompressorInputStream(in);
-            case "sz":      return new SnappyCompressorInputStream(in);
+            case ext_gz:    return new GZIPInputStream(in);
+            case ext_bz2:   return new BZip2CompressorInputStream(in);
+            case ext_sz:    return new SnappyCompressorInputStream(in);
         }
         return in;
     }
 
-    private static String[] extensions = { "gz", "bz2", "sz" };
+    // ---- Extracted from Apache CommonsIO : FilenameUtils (2.8.0) because of the drive letter handling.
+    private static final int NOT_FOUND = -1;
+    private static final String EMPTY_STRING = "";
+    private static final String EXTENSION_SEPARATOR = ".";
+    private static final char UNIX_SEPARATOR = '/';
+    private static final char WINDOWS_SEPARATOR = '\\';
+
+    private static int indexOfLastSeparator(final String fileName) {
+        if (fileName == null) {
+            return NOT_FOUND;
+        }
+        final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR);
+        final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR);
+        return Math.max(lastUnixPos, lastWindowsPos);
+    }
+
+    private static int indexOfExtension(final String fileName) throws IllegalArgumentException {
+        if (fileName == null) {
+            return NOT_FOUND;
+        }
+//        if (isSystemWindows()) {
+//            // Special handling for NTFS ADS: Don't accept colon in the fileName.
+//            final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
+//            if (offset != -1) {
+//                throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
+//            }
+//        }
+        final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
+        final int lastSeparator = indexOfLastSeparator(fileName);
+        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
+    }
+
+    private static String getExtension(final String fileName) {
+        if (fileName == null) {
+            return null;
+        }
+        final int index = indexOfExtension(fileName);
+        if (index == -1) {
+            return "";
+        }
+        return fileName.substring(index + 1);
+    }
+
+    // ---- Apache CommonsIO : FilenameUtils
 
-    /** The filename without any compression extension, or the original filename.
-     *  It tests for compression types handled by {@link #openFileEx}.
+    /**
+     * The filename without any compression extension, or the original filename.
+     * It tests for compression types handled by {@link #openFileEx}.
      */
     static public String filenameNoCompression(String filename) {
-        if ( FilenameUtils.isExtension(filename, extensions) ) {
-            return FilenameUtils.removeExtension(filename);
+        // Apache CommonsIO 2.7+ rejects files names with ':' in
+        // where it is not a drive letters: C:/ -- "file:D.ttl" is rejected.
+        // Code extracted.
+        String ext = getExtension(filename);
+        switch ( ext ) {
+            case EMPTY_STRING:
+                return filename;
+            case ext_gz:
+            case ext_bz2:
+            case ext_sz:
+                // +1 for the "."
+                return filename.substring(0, filename.length()-(ext.length()+1));
         }
         return filename;
     }
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
index 878eca1..3d35dd8 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
@@ -181,7 +181,7 @@ public class IRILib
         // so need strip the leading "/"
         fn = fixupWindows(fn);
 
-        return decode(fn) ;
+        return decodeHex(fn) ;
     }
 
     /** Convert a plain file name (no file:) to a file: URL */
diff --git a/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java b/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
index ae619fd..034ed31 100644
--- a/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
+++ b/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
@@ -134,7 +134,7 @@ public class Upload {
                     String name = fileStream.getName();
                     if ( name == null || name.equals("") )
                         ServletOps.errorBadRequest("No name for content - can't determine RDF syntax");
-                    lang = RDFLanguages.filenameToLang(name);
+                    lang = RDFLanguages.pathnameToLang(name);
                     if (name.endsWith(".gz"))
                         input = new GZIPInputStream(input);
                 }
@@ -240,7 +240,7 @@ public class Upload {
 
                     lang = RDFLanguages.contentTypeToLang(ct.getContentTypeStr());
                     if ( lang == null ) {
-                        lang = RDFLanguages.filenameToLang(name);
+                        lang = RDFLanguages.pathnameToLang(name);
 
                         // JENA-600 filenameToLang() strips off certain
                         // extensions such as .gz and