You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2020/11/24 22:52:37 UTC
[jena] 01/01: JENA-2003: Handle file URIs with URI scheme name
This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch windows-file
in repository https://gitbox.apache.org/repos/asf/jena.git
commit 13dffc12e7e0877ceb9c13f86f1e076608ddf887
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Tue Nov 24 22:43:22 2020 +0000
JENA-2003: Handle file URIs with URI scheme name
---
.../main/java/org/apache/jena/riot/RDFDataMgr.java | 2 +-
.../java/org/apache/jena/riot/RDFLanguages.java | 28 ++++----
.../jena/riot/system/stream/LocatorFile.java | 39 +++++------
.../src/main/java/org/apache/jena/atlas/io/IO.java | 76 +++++++++++++++++++---
.../java/org/apache/jena/atlas/lib/IRILib.java | 2 +-
.../java/org/apache/jena/fuseki/system/Upload.java | 4 +-
6 files changed, 105 insertions(+), 46 deletions(-)
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
index 6e98e1a..aa432af 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFDataMgr.java
@@ -707,7 +707,7 @@ public class RDFDataMgr
if ( base == null )
base = SysRIOT.chooseBaseIRI(uri) ;
if ( hintLang == null )
- hintLang = RDFLanguages.filenameToLang(uri) ;
+ hintLang = RDFLanguages.pathnameToLang(uri) ;
parseFromURI(sink, uri, base, hintLang, context);
}
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
index 0c8393e..33a1a38 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
@@ -28,7 +28,8 @@ import org.apache.jena.atlas.web.ContentType ;
import org.apache.jena.atlas.web.MediaType ;
import org.apache.jena.util.FileUtils ;
-/** Central registry of RDF languages and syntaxes.
+/**
+ * Central registry of RDF languages and syntaxes.
* @see RDFParserRegistry
* @see RDFFormat
*/
@@ -399,31 +400,34 @@ public class RDFLanguages
}
/** Try to map a resource name to a {@link Lang}; return null on no registered mapping */
- public static Lang resourceNameToLang(String resourceName) { return filenameToLang(resourceName) ; }
+ public static Lang resourceNameToLang(String resourceName) { return pathnameToLang(resourceName) ; }
/** Try to map a resource name to a {@link Lang}; return the given default where there is no registered mapping */
public static Lang resourceNameToLang(String resourceName, Lang dftLang) { return filenameToLang(resourceName, dftLang) ; }
- /** Try to map a URI or file name to a {@link Lang}; return null on no registered mapping. */
- public static Lang filenameToLang(String filename)
+ /** Try to map a file name to a {@link Lang}; return null on no registered mapping. */
+ public static Lang filenameToLang(String uriOrFilename) { return pathnameToLang(uriOrFilename); }
+
+ /** Try to map a URI or URI path name to a {@link Lang}; return null on no registered mapping. */
+ public static Lang pathnameToLang(String pathname)
{
- if ( filename == null )
+ if ( pathname == null )
return null;
// Remove any URI fragment (there can be only one # in a URI).
// Pragmatically, assume any # is URI related.
// URIs can be relative.
- int iHash = filename.indexOf('#');
+ int iHash = pathname.indexOf('#');
if ( iHash > 0 )
- filename = filename.substring(0, iHash);
- // Gzip or BZip2 compressed?
- filename = IO.filenameNoCompression(filename);
- return fileExtToLang(FileUtils.getFilenameExt(filename));
+ pathname = pathname.substring(0, iHash);
+ // Compressed?
+ pathname = IO.filenameNoCompression(pathname);
+ return fileExtToLang(FileUtils.getFilenameExt(pathname));
}
/** Try to map a file name to a {@link Lang}; return the given default where there is no registered mapping */
public static Lang filenameToLang(String filename, Lang dftLang)
{
- Lang lang = filenameToLang(filename) ;
+ Lang lang = pathnameToLang(filename) ;
return (lang == null) ? dftLang : lang ;
}
@@ -449,7 +453,7 @@ public class RDFLanguages
{
if ( resourceName == null )
return null ;
- Lang lang = filenameToLang(resourceName) ;
+ Lang lang = pathnameToLang(resourceName) ;
if ( lang == null )
return null ;
return lang.getContentType() ;
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
index 984846a..9b45f4f 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocatorFile.java
@@ -34,14 +34,11 @@ import org.slf4j.LoggerFactory ;
/** Location files in the filing system.
* A FileLocator can have a "current directory" - this is separate from any
- * location mapping (see @link{LocationMapping}) as it applies only to files.
+ * location mapping (see {@link LocationMapper}) as it applies only to files.
*/
public class LocatorFile implements Locator
{
- // Implementation note:
- // Java7: Path.resolve may provide an answer from the intricies of MS Windows
-
static Logger log = LoggerFactory.getLogger(LocatorFile.class) ;
private final String thisDir ;
private final String thisDirLogStr ;
@@ -50,7 +47,7 @@ public class LocatorFile implements Locator
* Relative file names are relative to the working directory of the JVM.
*/
public LocatorFile() { this(null) ; }
-
+
/** Create a LocatorFile that uses the argument as it's working directory.
* <p>
* The working directory should be a UNIX style file name,
@@ -58,7 +55,7 @@ public class LocatorFile implements Locator
* <p>
* For MS Window, if asked to {@link #open} a file name with a drive letter,
* the code assumes it is not relative to the working directory
- * of this {@code LocatorFile}.
+ * of this {@code LocatorFile}.
*/
public LocatorFile(String dir)
{
@@ -74,24 +71,24 @@ public class LocatorFile implements Locator
}
/** Processing the filename for file: or relative filename
- * and return a filename suitable for file operations.
+ * and return a filename suitable for file operations.
*/
public String toFileName(String filenameIRI)
{
- // Do not use directly : it will ignore the directory.
+ // Do not use directly : it will ignore the directory.
//IRILib.filenameToIRI
-
+
String scheme = FileUtils.getScheme(filenameIRI) ;
String fn = filenameIRI ;
// Windows : C:\\ is not a scheme name!
- if ( scheme != null )
+ if ( scheme != null )
{
if ( scheme.length() == 1 )
{
// Not perfect for MS Windows but if thisDir is set then
// the main use case is resolving relative (no drive)
// filenames against thisDir. Treat the presence of a
- // drive letter as making this a JVM relative filename.
+ // drive letter as making this a JVM relative filename.
return fn ;
}
else if ( scheme.length() > 1 )
@@ -101,14 +98,14 @@ public class LocatorFile implements Locator
return null ;
fn = IRILib.IRIToFilename(filenameIRI) ;
// fall through
- }
+ }
}
// fn is the file name to use.
return absolute(fn) ;
}
/** Make a filename (no URI scheme, no windows drive) absolute if there is
- * a setting for directory name thisDir
+ * a setting for directory name thisDir
*/
private String absolute(String fn)
{
@@ -116,7 +113,7 @@ public class LocatorFile implements Locator
fn = thisDir+File.separator+fn ;
return fn ;
}
-
+
public String getThisDir()
{
return thisDir ;
@@ -133,10 +130,10 @@ public class LocatorFile implements Locator
String fn = toFileName(fileIRI) ;
if ( fn == null )
return false ;
-
+
return exists$(fn) ;
}
-
+
private boolean exists$(String fn)
{
if ( fn.equals("-") )
@@ -144,14 +141,14 @@ public class LocatorFile implements Locator
return new File(fn).exists() ;
}
- /** Open anything that looks a bit like a file name */
+ /** Open anything that looks a bit like a file name */
@Override
public TypedInputStream open(String filenameIRI)
{
String fn = toFileName(filenameIRI) ;
if ( fn == null )
return null ;
-
+
try {
if ( ! exists$(fn) )
{
@@ -163,13 +160,13 @@ public class LocatorFile implements Locator
log.warn("Security problem testing for file", e);
return null;
}
-
+
try {
InputStream in = IO.openFileEx(fn) ;
if ( StreamManager.logAllLookups && log.isTraceEnabled() )
log.trace("Found: "+filenameIRI+thisDirLogStr) ;
-
+
ContentType ct = RDFLanguages.guessContentType(filenameIRI) ;
return new TypedInputStream(in, ct, filenameIRI) ;
} catch (IOException ioEx)
@@ -180,7 +177,7 @@ public class LocatorFile implements Locator
return null ;
}
}
-
+
@Override
public String getName()
{
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
index fbc5e15..dd57a87 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
@@ -69,6 +69,10 @@ public class IO
} catch (IOException ex) { IO.exception(ex); return null; }
}
+ private static final String ext_gz = "gz";
+ private static final String ext_bz2 = "bz2";
+ private static final String ext_sz = "sz";
+
/** Open an input stream to a file; do not mask IOExceptions.
* If the filename is null or "-", return System.in
* If the filename ends in .gz, wrap in GZIPInputStream
@@ -85,24 +89,78 @@ public class IO
filename = IRILib.decodeHex(filename);
}
InputStream in = new FileInputStream(filename);
- String ext = FilenameUtils.getExtension(filename);
+ String ext = getExtension(filename);
switch ( ext ) {
case "": return in;
- case "gz": return new GZIPInputStream(in);
- case "bz2": return new BZip2CompressorInputStream(in);
- case "sz": return new SnappyCompressorInputStream(in);
+ case ext_gz: return new GZIPInputStream(in);
+ case ext_bz2: return new BZip2CompressorInputStream(in);
+ case ext_sz: return new SnappyCompressorInputStream(in);
}
return in;
}
- private static String[] extensions = { "gz", "bz2", "sz" };
+ // ---- Extracted from Apache CommonsIO : FilenameUtils (2.8.0) because of the drive letter handling.
+ private static final int NOT_FOUND = -1;
+ private static final String EMPTY_STRING = "";
+ private static final String EXTENSION_SEPARATOR = ".";
+ private static final char UNIX_SEPARATOR = '/';
+ private static final char WINDOWS_SEPARATOR = '\\';
+
+ private static int indexOfLastSeparator(final String fileName) {
+ if (fileName == null) {
+ return NOT_FOUND;
+ }
+ final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR);
+ final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR);
+ return Math.max(lastUnixPos, lastWindowsPos);
+ }
+
+ private static int indexOfExtension(final String fileName) throws IllegalArgumentException {
+ if (fileName == null) {
+ return NOT_FOUND;
+ }
+// if (isSystemWindows()) {
+// // Special handling for NTFS ADS: Don't accept colon in the fileName.
+// final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
+// if (offset != -1) {
+// throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
+// }
+// }
+ final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
+ final int lastSeparator = indexOfLastSeparator(fileName);
+ return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
+ }
+
+ private static String getExtension(final String fileName) {
+ if (fileName == null) {
+ return null;
+ }
+ final int index = indexOfExtension(fileName);
+ if (index == -1) {
+ return "";
+ }
+ return fileName.substring(index + 1);
+ }
+
+ // ---- Apache CommonsIO : FilenameUtils
- /** The filename without any compression extension, or the original filename.
- * It tests for compression types handled by {@link #openFileEx}.
+ /**
+ * The filename without any compression extension, or the original filename.
+ * It tests for compression types handled by {@link #openFileEx}.
*/
static public String filenameNoCompression(String filename) {
- if ( FilenameUtils.isExtension(filename, extensions) ) {
- return FilenameUtils.removeExtension(filename);
+ // Apache CommonsIO 2.7+ rejects files names with ':' in
+ // where it is not a drive letters: C:/ -- "file:D.ttl" is rejected.
+ // Code extracted.
+ String ext = getExtension(filename);
+ switch ( ext ) {
+ case EMPTY_STRING:
+ return filename;
+ case ext_gz:
+ case ext_bz2:
+ case ext_sz:
+ // +1 for the "."
+ return filename.substring(0, filename.length()-(ext.length()+1));
}
return filename;
}
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
index 878eca1..3d35dd8 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
@@ -181,7 +181,7 @@ public class IRILib
// so need strip the leading "/"
fn = fixupWindows(fn);
- return decode(fn) ;
+ return decodeHex(fn) ;
}
/** Convert a plain file name (no file:) to a file: URL */
diff --git a/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java b/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
index ae619fd..034ed31 100644
--- a/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
+++ b/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/system/Upload.java
@@ -134,7 +134,7 @@ public class Upload {
String name = fileStream.getName();
if ( name == null || name.equals("") )
ServletOps.errorBadRequest("No name for content - can't determine RDF syntax");
- lang = RDFLanguages.filenameToLang(name);
+ lang = RDFLanguages.pathnameToLang(name);
if (name.endsWith(".gz"))
input = new GZIPInputStream(input);
}
@@ -240,7 +240,7 @@ public class Upload {
lang = RDFLanguages.contentTypeToLang(ct.getContentTypeStr());
if ( lang == null ) {
- lang = RDFLanguages.filenameToLang(name);
+ lang = RDFLanguages.pathnameToLang(name);
// JENA-600 filenameToLang() strips off certain
// extensions such as .gz and