You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/11/09 03:40:27 UTC

[1/2] systemml git commit: [SYSTEMML-1990] New rewrite for order operation chains

Repository: systemml
Updated Branches:
  refs/heads/master a03065299 -> a66126d49


[SYSTEMML-1990] New rewrite for order operation chains

This patch introduces a new rewrite for merging subsequent order
operations (data, scalar order-by column, and with consistent descending
configuration and single consumers) into a single order operation with
multiple order-by columns.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f366c469
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f366c469
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f366c469

Branch: refs/heads/master
Commit: f366c46960aac412a862c20e07e5f844b58b05a7
Parents: a030652
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Nov 8 17:41:35 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed Nov 8 17:41:35 2017 -0800

----------------------------------------------------------------------
 .../sysml/hops/rewrite/HopRewriteUtils.java     | 72 +++++++++++++++++---
 .../RewriteAlgebraicSimplificationStatic.java   | 60 +++++++++++++++-
 .../cp/StringInitCPInstruction.java             |  2 +-
 .../reorg/MultipleOrderByColsTest.java          | 30 +++++++-
 .../scripts/functions/reorg/OrderMultiBy.dml    |  5 --
 .../scripts/functions/reorg/OrderMultiBy2.R     | 42 ++++++++++++
 .../scripts/functions/reorg/OrderMultiBy2.dml   | 29 ++++++++
 7 files changed, 223 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/f366c469/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 15cc2cb..28b2189 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -67,6 +67,7 @@ import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObjectFactory;
+import org.apache.sysml.runtime.instructions.cp.StringInitCPInstruction;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.util.UtilFunctions;
 
@@ -199,6 +200,17 @@ public class HopRewriteUtils
 			&& getDoubleValueSafe((LiteralOp)hop)==val);
 	}
 	
+	public static boolean isLiteralOfValue( Hop hop, boolean val ) {
+		try {
+			return (hop instanceof LiteralOp 
+				&& (hop.getValueType()==ValueType.BOOLEAN)
+				&& ((LiteralOp)hop).getBooleanValue()==val);
+		}
+		catch(HopsException ex) {
+			throw new RuntimeException(ex);
+		}
+	}
+	
 	public static ScalarObject getScalarObject( LiteralOp op )
 	{
 		try {
@@ -481,6 +493,32 @@ public class HopRewriteUtils
 		return datagen;
 	}
 	
+	public static Hop createDataGenOpByVal( ArrayList<LiteralOp> values, long rows, long cols ) 
+		throws HopsException
+	{
+		StringBuilder sb = new StringBuilder();
+		for(LiteralOp lit : values) {
+			if(sb.length()>0)
+				sb.append(StringInitCPInstruction.DELIM);
+			sb.append(lit.getStringValue());
+		}
+		LiteralOp str = new LiteralOp(sb.toString());
+		
+		HashMap<String, Hop> params = new HashMap<>();
+		params.put(DataExpression.RAND_ROWS, new LiteralOp(rows));
+		params.put(DataExpression.RAND_COLS, new LiteralOp(cols));
+		params.put(DataExpression.RAND_MIN, str);
+		params.put(DataExpression.RAND_MAX, str);
+		params.put(DataExpression.RAND_SEED, new LiteralOp(DataGenOp.UNSPECIFIED_SEED));
+		
+		Hop datagen = new DataGenOp(DataGenMethod.SINIT, new DataIdentifier("tmp"), params);
+		int blksz = ConfigurationManager.getBlocksize();
+		datagen.setOutputBlocksizes(blksz, blksz);
+		copyLineNumbers(values.get(0), datagen);
+		
+		return datagen;
+	}
+	
 	public static boolean isDataGenOp(Hop hop, DataGenMethod... ops) {
 		return (hop instanceof DataGenOp 
 			&& ArrayUtils.contains(ops, ((DataGenOp)hop).getOp()));
@@ -506,14 +544,21 @@ public class HopRewriteUtils
 		return createReorg(input, ReOrgOp.TRANSPOSE);
 	}
 	
-	public static ReorgOp createReorg(Hop input, ReOrgOp rop)
-	{
-		ReorgOp transpose = new ReorgOp(input.getName(), input.getDataType(), input.getValueType(), rop, input);
-		transpose.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
-		copyLineNumbers(input, transpose);
-		transpose.refreshSizeInformation();	
-		
-		return transpose;
+	public static ReorgOp createReorg(Hop input, ReOrgOp rop) {
+		ReorgOp reorg = new ReorgOp(input.getName(), input.getDataType(), input.getValueType(), rop, input);
+		reorg.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, reorg);
+		reorg.refreshSizeInformation();
+		return reorg;
+	}
+	
+	public static ReorgOp createReorg(ArrayList<Hop> inputs, ReOrgOp rop) {
+		Hop main = inputs.get(0);
+		ReorgOp reorg = new ReorgOp(main.getName(), main.getDataType(), main.getValueType(), rop, inputs);
+		reorg.setOutputBlocksizes(main.getRowsInBlock(), main.getColsInBlock());
+		copyLineNumbers(main, reorg);
+		reorg.refreshSizeInformation();
+		return reorg;
 	}
 	
 	public static UnaryOp createUnary(Hop input, OpOp1 type) 
@@ -831,8 +876,17 @@ public class HopRewriteUtils
 		return ret;
 	}
 
+	public static boolean isReorg(Hop hop, ReOrgOp type) {
+		return hop instanceof ReorgOp && ((ReorgOp)hop).getOp()==type;
+	}
+	
+	public static boolean isReorg(Hop hop, ReOrgOp... types) {
+		return ( hop instanceof ReorgOp 
+			&& ArrayUtils.contains(types, ((ReorgOp) hop).getOp()));
+	}
+	
 	public static boolean isTransposeOperation(Hop hop) {
-		return (hop instanceof ReorgOp && ((ReorgOp)hop).getOp()==ReOrgOp.TRANSPOSE);
+		return isReorg(hop, ReOrgOp.TRANSPOSE);
 	}
 	
 	public static boolean isTransposeOperation(Hop hop, int maxParents) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/f366c469/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
index 2d5d881..4c68fe2 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
@@ -171,6 +171,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			hi = simplifySlicedMatrixMult(hop, hi, i);           //e.g., (X%*%Y)[1,1] -> X[1,] %*% Y[,1];
 			hi = simplifyConstantSort(hop, hi, i);               //e.g., order(matrix())->matrix/seq; 
 			hi = simplifyOrderedSort(hop, hi, i);                //e.g., order(matrix())->seq; 
+			hi = fuseOrderOperationChain(hi);                    //e.g., order(order(X,2),1) -> order(X,(12))
 			hi = removeUnnecessaryReorgOperation(hop, hi, i);    //e.g., t(t(X))->X; rev(rev(X))->X potentially introduced by other rewrites
 			hi = simplifyTransposeAggBinBinaryChains(hop, hi, i);//e.g., t(t(A)%*%t(B)+C) -> B%*%A+t(C)
 			hi = removeUnnecessaryMinus(hop, hi, i);             //e.g., -(-X)->X; potentially introduced by simplify binary or dyn rewrites
@@ -1475,12 +1476,69 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						LOG.debug("Applied simplifyOrderedSort2.");
 					}
 				}
-			}	   
+			}
 		}
 		
 		return hi;
 	}
 
+	private static Hop fuseOrderOperationChain(Hop hi) 
+		throws HopsException
+	{
+		//order(order(X,2),1) -> order(X, (12)), 
+		if( HopRewriteUtils.isReorg(hi, ReOrgOp.SORT)
+			&& hi.getInput().get(1) instanceof LiteralOp //scalar by
+			&& hi.getInput().get(2) instanceof LiteralOp //scalar desc
+			&& HopRewriteUtils.isLiteralOfValue(hi.getInput().get(3), false) ) //not ixret 
+		{ 
+			LiteralOp by = (LiteralOp) hi.getInput().get(1);
+			boolean desc = HopRewriteUtils.getBooleanValue((LiteralOp)hi.getInput().get(2));
+			
+			//find chain of order operations with same desc/ixret configuration and single consumers
+			ArrayList<LiteralOp> byList = new ArrayList<LiteralOp>();
+			byList.add(by);
+			Hop input = hi.getInput().get(0);
+			while( HopRewriteUtils.isReorg(input, ReOrgOp.SORT)
+				&& input.getInput().get(1) instanceof LiteralOp //scalar by
+				&& HopRewriteUtils.isLiteralOfValue(input.getInput().get(2), desc)
+				&& HopRewriteUtils.isLiteralOfValue(hi.getInput().get(3), false)
+				&& input.getParent().size() == 1 ) 
+			{
+				byList.add((LiteralOp)input.getInput().get(1));
+				input = input.getInput().get(0);
+			}
+			
+			//merge order chain if at least two instances
+			if( byList.size() >= 2 ) {
+				//create new order operations
+				ArrayList<Hop> inputs = new ArrayList<>();
+				inputs.add(input);
+				inputs.add(HopRewriteUtils.createDataGenOpByVal(byList, 1, byList.size()));
+				inputs.add(new LiteralOp(desc));
+				inputs.add(new LiteralOp(false));
+				Hop hnew = HopRewriteUtils.createReorg(inputs, ReOrgOp.SORT);
+				
+				//cleanup references recursively
+				Hop current = hi;
+				while(current != input ) {
+					Hop tmp = current.getInput().get(0);
+					HopRewriteUtils.removeAllChildReferences(current);
+					current = tmp;
+				}
+				
+				//rewire all parents (avoid anomalies with replicated datagen)
+				List<Hop> parents = new ArrayList<>(hi.getParent());
+				for( Hop p : parents )
+					HopRewriteUtils.replaceChildReference(p, hi, hnew);
+				
+				hi = hnew;
+				LOG.debug("Applied fuseOrderOperationChain (line "+hi.getBeginLine()+").");
+			}
+		}
+		
+		return hi;
+	}
+	
 	/**
 	 * Patterns: t(t(A)%*%t(B)+C) -> B%*%A+t(C)
 	 * 

http://git-wip-us.apache.org/repos/asf/systemml/blob/f366c469/src/main/java/org/apache/sysml/runtime/instructions/cp/StringInitCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/StringInitCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/StringInitCPInstruction.java
index 4b89573..93e02b9 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/StringInitCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/StringInitCPInstruction.java
@@ -30,7 +30,7 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.operators.Operator;
 
 public class StringInitCPInstruction extends UnaryCPInstruction {
-	private static final String DELIM = " ";
+	public static final String DELIM = " ";
 
 	private final long _rlen;
 	private final long _clen;

http://git-wip-us.apache.org/repos/asf/systemml/blob/f366c469/src/test/java/org/apache/sysml/test/integration/functions/reorg/MultipleOrderByColsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/reorg/MultipleOrderByColsTest.java b/src/test/java/org/apache/sysml/test/integration/functions/reorg/MultipleOrderByColsTest.java
index 67c6487..10dc1a4 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/reorg/MultipleOrderByColsTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/reorg/MultipleOrderByColsTest.java
@@ -21,6 +21,7 @@ package org.apache.sysml.test.integration.functions.reorg;
 
 import java.util.HashMap;
 
+import org.junit.Assert;
 import org.junit.Test;
 
 import org.apache.sysml.api.DMLScript;
@@ -30,10 +31,12 @@ import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysml.test.integration.AutomatedTestBase;
 import org.apache.sysml.test.integration.TestConfiguration;
 import org.apache.sysml.test.utils.TestUtils;
+import org.apache.sysml.utils.Statistics;
 
 public class MultipleOrderByColsTest extends AutomatedTestBase 
 {
 	private final static String TEST_NAME1 = "OrderMultiBy";
+	private final static String TEST_NAME2 = "OrderMultiBy2";
 	
 	private final static String TEST_DIR = "functions/reorg/";
 	private static final String TEST_CLASS_DIR = TEST_DIR + MultipleOrderByColsTest.class.getSimpleName() + "/";
@@ -48,6 +51,7 @@ public class MultipleOrderByColsTest extends AutomatedTestBase
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
 		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1,new String[]{"B"}));
+		addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2,new String[]{"B"}));
 	}
 	
 	@Test
@@ -90,6 +94,26 @@ public class MultipleOrderByColsTest extends AutomatedTestBase
 		runOrderTest(TEST_NAME1, true, true, true, ExecType.CP);
 	}
 
+	@Test
+	public void testOrder2DenseAscDataCP() {
+		runOrderTest(TEST_NAME2, false, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testOrder2DenseDescDataCP() {
+		runOrderTest(TEST_NAME2, false, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testOrder2SparseAscDataCP() {
+		runOrderTest(TEST_NAME2, true, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testOrder2SparseDescDataCP() {
+		runOrderTest(TEST_NAME2, true, true, false, ExecType.CP);
+	}
+	
 //TODO enable together with additional spark sort runtime
 //	@Test
 //	public void testOrderDenseAscDataSP() {
@@ -152,7 +176,7 @@ public class MultipleOrderByColsTest extends AutomatedTestBase
 			
 			String HOME = SCRIPT_DIR + TEST_DIR;
 			fullDMLScriptName = HOME + TEST_NAME + ".dml";
-			programArgs = new String[]{"-explain","-args", input("A"), 
+			programArgs = new String[]{"-stats","-args", input("A"), 
 				String.valueOf(desc).toUpperCase(), String.valueOf(ixret).toUpperCase(), output("B") };
 			
 			fullRScriptName = HOME + TEST_NAME + ".R";
@@ -170,6 +194,10 @@ public class MultipleOrderByColsTest extends AutomatedTestBase
 			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B");
 			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("B");
 			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			
+			//check for applied rewrite
+			if( testname.equals(TEST_NAME2) && !ixret )
+				Assert.assertTrue(Statistics.getCPHeavyHitterCount("rsort")==1);
 		}
 		finally {
 			rtplatform = platformOld;

http://git-wip-us.apache.org/repos/asf/systemml/blob/f366c469/src/test/scripts/functions/reorg/OrderMultiBy.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/OrderMultiBy.dml b/src/test/scripts/functions/reorg/OrderMultiBy.dml
index f6d2246..78cf84e 100644
--- a/src/test/scripts/functions/reorg/OrderMultiBy.dml
+++ b/src/test/scripts/functions/reorg/OrderMultiBy.dml
@@ -23,11 +23,6 @@
 A = read($1);
 
 ix = matrix("3 7 14", rows=1, cols=3)
-
-#B = order(target=A, by=14, decreasing=$2, index.return=$3);
-#B = order(target=B, by=7, decreasing=$2, index.return=$3);
-#B = order(target=B, by=3, decreasing=$2, index.return=$3);
-
 B = order(target=A, by=ix, decreasing=$2, index.return=$3);
 
 write(B, $4, format="text");  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/f366c469/src/test/scripts/functions/reorg/OrderMultiBy2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/OrderMultiBy2.R b/src/test/scripts/functions/reorg/OrderMultiBy2.R
new file mode 100644
index 0000000..374dad0
--- /dev/null
+++ b/src/test/scripts/functions/reorg/OrderMultiBy2.R
@@ -0,0 +1,42 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A = readMM(paste(args[1], "A.mtx", sep=""))
+desc = as.logical(args[2]);
+ixret = as.logical(args[3]);
+
+col1 = A[,3];
+col2 = A[,7];
+col3 = A[,14];
+
+
+if( ixret ) {
+  B = order(col1, col2, col3, decreasing=desc);
+} else {
+  B = A[order(col1, col2, col3, decreasing=desc),];
+}
+
+writeMM(as(B,"CsparseMatrix"), paste(args[4], "B", sep=""))

http://git-wip-us.apache.org/repos/asf/systemml/blob/f366c469/src/test/scripts/functions/reorg/OrderMultiBy2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/reorg/OrderMultiBy2.dml b/src/test/scripts/functions/reorg/OrderMultiBy2.dml
new file mode 100644
index 0000000..0c301ae
--- /dev/null
+++ b/src/test/scripts/functions/reorg/OrderMultiBy2.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+A = read($1);
+
+B = order(target=A, by=14, decreasing=$2, index.return=$3);
+B = order(target=B, by=7, decreasing=$2, index.return=$3);
+B = order(target=B, by=3, decreasing=$2, index.return=$3);
+
+write(B, $4, format="text");


[2/2] systemml git commit: [MINOR] Fix robustness empty filename handling for JMLC and MLContext

Posted by mb...@apache.org.
[MINOR] Fix robustness empty filename handling for JMLC and MLContext 

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a66126d4
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a66126d4
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a66126d4

Branch: refs/heads/master
Commit: a66126d49d6f64136d86074cfb6ec666d7c6375a
Parents: f366c46
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Nov 8 19:41:32 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed Nov 8 19:41:32 2017 -0800

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/a66126d4/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
index eb2b141..2e66437 100644
--- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
+++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
@@ -105,7 +105,7 @@ public class MapReduceTool
 
 	public static boolean existsFileOnHDFS(String fname) {
 		//robustness for empty strings (e.g., JMLC, MLContext)
-		if( fname == null || fname.isEmpty() )
+		if( fname == null || fname.isEmpty() || fname.trim().isEmpty() )
 			return false;
 		
 		try {
@@ -121,7 +121,7 @@ public class MapReduceTool
 	
 	public static boolean isDirectory(String fname) {
 		//robustness for empty strings (e.g., JMLC, MLContext)
-		if( fname == null || fname.isEmpty() )
+		if( fname == null || fname.isEmpty() || fname.trim().isEmpty() )
 			return false;
 		
 		try {
@@ -176,7 +176,7 @@ public class MapReduceTool
 
 	public static boolean isHDFSFileEmpty(String dir) throws IOException {
 		//robustness for empty strings (e.g., JMLC, MLContext)
-		if( dir == null || dir.isEmpty() )
+		if( dir == null || dir.isEmpty() || dir.trim().isEmpty())
 			return false;
 		Path path = new Path(dir);
 		FileSystem fs = IOUtilFunctions.getFileSystem(path);