You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/05/13 15:17:08 UTC
incubator-systemml git commit: [HOTFIX] [SYSTEMML-540] Enabled commandline statistics and disabled native by default

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 9ee569e0f -> 59ff8b3d4


[HOTFIX] [SYSTEMML-540] Enabled commandline statistics and disabled native by default

- Fixed a bug that disabled statistics when SystemML is invoked from
  commandline.
- When change was made from boolean native.blas to string, a bug was
  introduced which enabled native BLAS by default. This commit fixes that
  bug.
- Removed OpenMP dependency for MKL-based native backend by using DNN
  primitives in MKL. The performance of these primitives is comparable to
  that implemented by OpenMP and future-proofs us from hardware
  enhancement. It also will help us to support OS where gomp is not easily
  available such as Mac.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/59ff8b3d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/59ff8b3d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/59ff8b3d

Branch: refs/heads/master
Commit: 59ff8b3d4e38d5f0ea38300df237345c86f5de1a
Parents: 9ee569e
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Sat May 13 08:07:54 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Sat May 13 08:07:54 2017 -0700

----------------------------------------------------------------------
 conf/SystemML-config.xml.template               |   2 +-
 src/main/cpp/CMakeLists.txt                     |  16 +--
 .../cpp/lib/libpreload_systemml-Linux-x86_64.so | Bin 7976 -> 0 bytes
 .../cpp/lib/libsystemml_mkl-Linux-x86_64.so     | Bin 31824 -> 27504 bytes
 src/main/cpp/libmatrixdnn.cpp                   | 117 ++++++++++++++++++-
 src/main/cpp/libmatrixdnn.h                     |   8 ++
 src/main/cpp/preload/preload_systemml.cpp       |  35 ------
 src/main/cpp/preload/preload_systemml.h         |  40 -------
 src/main/cpp/systemml.cpp                       |   4 +-
 .../java/org/apache/sysml/api/DMLScript.java    |   2 +-
 .../apache/sysml/api/ScriptExecutorUtils.java   |  18 +--
 .../sysml/api/mlcontext/ScriptExecutor.java     |   2 +-
 .../java/org/apache/sysml/conf/DMLConfig.java   |   2 +-
 .../org/apache/sysml/utils/NativeHelper.java    |  14 ---
 14 files changed, 143 insertions(+), 117 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/conf/SystemML-config.xml.template
----------------------------------------------------------------------
diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template
index 8092ca6..e026f8e 100644
--- a/conf/SystemML-config.xml.template
+++ b/conf/SystemML-config.xml.template
@@ -67,7 +67,7 @@
    <codegen.literals>1</codegen.literals>
    
    <!-- enables native blas for matrix multiplication and convolution, experimental feature (options: auto, mkl, openblas, none) -->
-   <native.blas>auto</native.blas>
+   <native.blas>none</native.blas>
 
    <!-- prints extra statistics information for GPU -->
    <systemml.stats.extraGPU>false</systemml.stats.extraGPU>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt
index c492959..47555bf 100644
--- a/src/main/cpp/CMakeLists.txt
+++ b/src/main/cpp/CMakeLists.txt
@@ -23,27 +23,21 @@ project (systemml)
 # All custom find modules
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
 
-# OpenMP is required
-find_package(OpenMP REQUIRED)
-
 # Options to Use OpenBLAS or Intel MKL
 option(USE_OPEN_BLAS "Whether to use OpenBLAS (Defaults to compiling with Intel MKL, if both set, MKL has priority)" OFF)
 option(USE_INTEL_MKL "Whether to use Intel MKL (Defaults to compiling with Intel MKL)" ON)
 
 # Build a shared libraray
 add_library(systemml SHARED libmatrixdnn.cpp  libmatrixmult.cpp  systemml.cpp)
-add_library(preload SHARED preload/preload_systemml.cpp)
 
 set(MATH_LIBRARIES "")
 
 # sets the installation path to src/main/cpp/lib
 set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR})
-install(TARGETS systemml preload LIBRARY DESTINATION lib)
+install(TARGETS systemml LIBRARY DESTINATION lib)
 
 set(CMAKE_BUILD_TYPE Release)
 
-set_target_properties(preload PROPERTIES OUTPUT_NAME "preload_systemml-${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
-
 if (USE_OPEN_BLAS)
   find_package(OpenBLAS REQUIRED)
   # sets the name of the output to include the os and the architecture
@@ -73,4 +67,10 @@ include_directories(${CMAKE_BINARY_DIR})
 
 
 # Setting CXX compiler flags
-set_target_properties(systemml PROPERTIES LINK_FLAGS "${OpenMP_CXX_FLAGS} ${MATH_LIBRARIES}")
+if (USE_OPEN_BLAS)
+  # OpenMP is required
+  find_package(OpenMP REQUIRED)
+  set_target_properties(systemml PROPERTIES LINK_FLAGS "${OpenMP_CXX_FLAGS} ${MATH_LIBRARIES}")
+elseif(USE_INTEL_MKL)
+  set_target_properties(systemml PROPERTIES LINK_FLAGS ${MATH_LIBRARIES}")
+endif()

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/lib/libpreload_systemml-Linux-x86_64.so
----------------------------------------------------------------------
diff --git a/src/main/cpp/lib/libpreload_systemml-Linux-x86_64.so b/src/main/cpp/lib/libpreload_systemml-Linux-x86_64.so
deleted file mode 100755
index 07e89be..0000000
Binary files a/src/main/cpp/lib/libpreload_systemml-Linux-x86_64.so and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so
----------------------------------------------------------------------
diff --git a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so
index a740930..9793b33 100755
Binary files a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so and b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so differ

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/libmatrixdnn.cpp
----------------------------------------------------------------------
diff --git a/src/main/cpp/libmatrixdnn.cpp b/src/main/cpp/libmatrixdnn.cpp
index ed02042..d6a09b7 100644
--- a/src/main/cpp/libmatrixdnn.cpp
+++ b/src/main/cpp/libmatrixdnn.cpp
@@ -25,11 +25,17 @@
 #include <cstdio>
 #include <cmath>
 #include <cstring>
-#include "omp.h"
+#ifdef USE_INTEL_MKL
+  #include "mkl_dnn.h"
+#else
+  #include "omp.h"
+#endif
 
 int computeNNZ(double* arr, int limit) {
   int nnz = 0;
+#ifndef USE_INTEL_MKL
   #pragma omp parallel for reduction(+: nnz)
+#endif
   for(int i=0; i<limit; i++)
     nnz += (arr[i]!=0) ? 1 : 0;
   return nnz;
@@ -129,16 +135,52 @@ void im2col(double* inputArray, double* outputArray, int N, int C, int H, int W,
       }
     }
   }
-} 
+}
 
+#ifdef USE_INTEL_MKL
+// Returns true if error
+bool MKL_DNN_ERROR(dnnError_t code) {
+  if(code == E_SUCCESS) return false;
+  else if(code == E_INCORRECT_INPUT_PARAMETER) std::cerr << "ERROR: Incorrect input parameter\n";
+  else if(code == E_MEMORY_ERROR) std::cerr << "ERROR: Memory error\n";
+  else if(code == E_UNSUPPORTED_DIMENSION) std::cerr << "ERROR: Unsupported dimensions\n";
+  else if(code == E_UNIMPLEMENTED) std::cerr << "ERROR: Unimplemented operation\n";
+  return true;
+} 
+#endif
 
 int conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
     int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads) {
+  int CRS = C*R*S;
+#ifdef USE_INTEL_MKL
+  setNumThreadsForBLAS(numThreads);
+  // Step 1: Create a description of a DNN operation
+  dnnPrimitive_t pConvolution;
+  size_t dimension = 4;
+  size_t srcSize[4] = {W, H, C, N};
+  size_t dstSize[4] = {Q, P, K, N};
+  size_t filterSize[4] = {S, R, C, K};
+  size_t convolutionStrides[2] = {stride_w, stride_h};
+  int pads[2] = {-pad_w, -pad_h};
+  void* resources[dnnResourceNumber] = {0};
+  resources[dnnResourceDiffDst] = doutPtr;
+  resources[dnnResourceSrc] = inputPtr;
+  resources[dnnResourceDiffFilter] = retPtr;
+  dnnConvolutionCreateBackwardFilter_F64(&pConvolution, NULL, dnnAlgorithmConvolutionDirect, dimension, 
+      srcSize, dstSize, filterSize, convolutionStrides, pads, dnnBorderZeros);
+  
+  // Step 2: Perform the DNN operation
+  if(MKL_DNN_ERROR(dnnExecute_F64(pConvolution, resources))) {
+    return -1; // nnz == -1 indicates error.
+  }
+  
+  // Step 3: Destroy the description of the operation
+  dnnDelete_F64(pConvolution);
+#else
   // First step: Avoids oversubscription and other openmp/internal blas threading issues
   setNumThreadsForBLAS(1);
   
   int CHW = C * H * W;
-  int CRS = C*R*S;
   int PQ = P*Q;
   int KPQ = K*PQ;
   int numRotatedElem = KPQ;
@@ -197,16 +239,42 @@ int conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr,
   }
   
   delete [] temp;
+#endif
   return computeNNZ(retPtr, K*CRS);
 }
 
 int conv2dBackwardDataDense(double* filterPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
     int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads) {
+  int CHW = C * H * W;
+#ifdef USE_INTEL_MKL
+  setNumThreadsForBLAS(numThreads);
+  // Step 1: Create a description of a DNN operation
+  dnnPrimitive_t pConvolution;
+  size_t dimension = 4;
+  size_t srcSize[4] = {W, H, C, N};
+  size_t dstSize[4] = {Q, P, K, N};
+  size_t filterSize[4] = {S, R, C, K};
+  size_t convolutionStrides[2] = {stride_w, stride_h};
+  int pads[2] = {-pad_w, -pad_h};
+  void* resources[dnnResourceNumber] = {0};
+  resources[dnnResourceDiffDst] = doutPtr;
+  resources[dnnResourceFilter] = filterPtr;
+  resources[dnnResourceDiffSrc] = retPtr;
+  dnnConvolutionCreateBackwardData_F64(&pConvolution, NULL, dnnAlgorithmConvolutionDirect, dimension, 
+      srcSize, dstSize, filterSize, convolutionStrides, pads, dnnBorderZeros);
+  
+  // Step 2: Perform the DNN operation
+  if(MKL_DNN_ERROR(dnnExecute_F64(pConvolution, resources))) {
+    return -1; // nnz == -1 indicates error.
+  }
+  
+  // Step 3: Destroy the description of the operation
+  dnnDelete_F64(pConvolution);
+#else 
    // First step: Avoids oversubscription and other openmp/internal blas threading issues
   setNumThreadsForBLAS(1);
   
   int CRS = C * R * S;
-  int CHW = C * H * W;
   int PQ = P * Q;
   int KPQ = K * PQ;
   int numRotatedElem = PQ * K;
@@ -240,6 +308,7 @@ int conv2dBackwardDataDense(double* filterPtr, double* doutPtr, double* retPtr,
   
   delete [] rotatedDoutPtrArrays;
   delete [] col2imInputArrays;
+#endif
   return computeNNZ(retPtr, N*CHW);
 }
 
@@ -306,11 +375,45 @@ void conv2dBackwardFilterSparseDense(int apos, int alen, int* aix, double* avals
 
 int conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
     int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, bool addBias, int numThreads) {
+  int KPQ = K * P * Q;
+  
+#ifdef USE_INTEL_MKL
+  setNumThreadsForBLAS(numThreads);
+  // Step 1: Create a description of a DNN operation
+  dnnPrimitive_t pConvolution;
+  size_t dimension = 4;
+  size_t srcSize[4] = {W, H, C, N};
+  size_t dstSize[4] = {Q, P, K, N};
+  size_t filterSize[4] = {S, R, C, K};
+  size_t convolutionStrides[2] = {stride_w, stride_h};
+  int pads[2] = {-pad_w, -pad_h};
+  void* resources[dnnResourceNumber] = {0};
+  resources[dnnResourceSrc] = inputPtr;
+  resources[dnnResourceFilter] = filterPtr;
+  resources[dnnResourceDst] = retPtr;
+  if(addBias) {
+    dnnConvolutionCreateForwardBias_F64(&pConvolution, NULL, dnnAlgorithmConvolutionDirect, dimension, 
+      srcSize, dstSize, filterSize, convolutionStrides, pads, dnnBorderZeros);
+    resources[dnnResourceBias] = biasPtr;
+  }
+  else { 
+    dnnConvolutionCreateForward_F64(&pConvolution, NULL, dnnAlgorithmConvolutionDirect, dimension, 
+      srcSize, dstSize, filterSize, convolutionStrides, pads, dnnBorderZeros);
+  }
+  
+  // Step 2: Perform the DNN operation
+  if(MKL_DNN_ERROR(dnnExecute_F64(pConvolution, resources))) {
+    return -1; // nnz == -1 indicates error.
+  }
+  
+  // Step 3: Destroy the description of the operation
+  dnnDelete_F64(pConvolution);
+#else 
+  // ------------------------------------------------------------------------------------
   // First step:  Avoids oversubscription and other openmp/internal blas threading issues
   setNumThreadsForBLAS(1);
   
   int CHW = C * H * W;
-  int KPQ = K * P * Q;
   int PQ = P * Q;
   int numIm2ColElem = C * R * S * P * Q;
   
@@ -343,7 +446,9 @@ int conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, dou
 		}
     }
   } // end omp parallel for
-  
   delete [] loweredMatArrays;
+  // ------------------------------------------------------------------------------------
+#endif
+  
   return computeNNZ(retPtr, N*KPQ);
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/libmatrixdnn.h
----------------------------------------------------------------------
diff --git a/src/main/cpp/libmatrixdnn.h b/src/main/cpp/libmatrixdnn.h
index 86e7b24..f7d746f 100644
--- a/src/main/cpp/libmatrixdnn.h
+++ b/src/main/cpp/libmatrixdnn.h
@@ -20,6 +20,14 @@
 #ifndef _libmatrixdnn_h
 #define _libmatrixdnn_h
 
+#ifdef USE_INTEL_MKL
+	#include <mkl.h>
+	#if INTEL_MKL_VERSION < 20170000
+		// Will throw an error at development time in non-standard settings
+		PLEASE DONOT COMPILE SHARED LIBRARIES WITH OLDER MKL VERSIONS
+	#endif
+#endif
+
 int conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
     int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads);
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/preload/preload_systemml.cpp
----------------------------------------------------------------------
diff --git a/src/main/cpp/preload/preload_systemml.cpp b/src/main/cpp/preload/preload_systemml.cpp
deleted file mode 100644
index 6ee20e0..0000000
--- a/src/main/cpp/preload/preload_systemml.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include "preload_systemml.h" 
-#include <cstdlib>
- 
-//  g++ -o libpreload_systemml-linux-x86_64.so preload_systemml.cpp  -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -lm -ldl -O3 -shared -fPIC
-JNIEXPORT void JNICALL Java_org_apache_sysml_utils_EnvironmentHelper_setEnv(JNIEnv * env, jclass c, jstring jname, jstring jvalue) {
-	const char* name = (env)->GetStringUTFChars(jname, NULL);
-    	const char* value = (env)->GetStringUTFChars(jvalue,NULL);
-#if defined _WIN32 || defined _WIN64 
-	_putenv_s(name, value);
-#else 
-	setenv(name, value, 1);
-#endif
-	(env)->ReleaseStringUTFChars(jname, name); 
-    	(env)->ReleaseStringUTFChars(jvalue, value);
-}
- 
- 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/preload/preload_systemml.h
----------------------------------------------------------------------
diff --git a/src/main/cpp/preload/preload_systemml.h b/src/main/cpp/preload/preload_systemml.h
deleted file mode 100644
index 79d58f8..0000000
--- a/src/main/cpp/preload/preload_systemml.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/* DO NOT EDIT THIS FILE - it is machine generated */
-#include <jni.h>
-/* Header for class org_apache_sysml_utils_EnvironmentHelper */
-
-#ifndef _Included_org_apache_sysml_utils_EnvironmentHelper
-#define _Included_org_apache_sysml_utils_EnvironmentHelper
-#ifdef __cplusplus
-extern "C" {
-#endif
-/*
- * Class:     org_apache_sysml_utils_EnvironmentHelper
- * Method:    setEnv
- * Signature: (Ljava/lang/String;Ljava/lang/String;)V
- */
-JNIEXPORT void JNICALL Java_org_apache_sysml_utils_EnvironmentHelper_setEnv
-  (JNIEnv *, jclass, jstring, jstring);
-
-#ifdef __cplusplus
-}
-#endif
-#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/cpp/systemml.cpp
----------------------------------------------------------------------
diff --git a/src/main/cpp/systemml.cpp b/src/main/cpp/systemml.cpp
index 34ae248..0f00afd 100644
--- a/src/main/cpp/systemml.cpp
+++ b/src/main/cpp/systemml.cpp
@@ -23,10 +23,10 @@
 #include "libmatrixdnn.h"
 
 // Linux:
-// g++ -o lib/libsystemml_mkl-Linux-x86_64.so *.cpp  -I$JAVA_HOME/include -I$MKLROOT/include -I$JAVA_HOME/include/linux -lmkl_rt -lpthread  -lm -ldl -DUSE_INTEL_MKL -DUSE_GNU_THREADING -L$MKLROOT/lib/intel64 -m64 -fopenmp -O3 -shared -fPIC
+// g++ -o lib/libsystemml_mkl-Linux-x86_64.so *.cpp  -I$JAVA_HOME/include -I$MKLROOT/include -I$JAVA_HOME/include/linux -lmkl_rt -lpthread  -lm -ldl -DUSE_INTEL_MKL -DUSE_MKL_DNN -L$MKLROOT/lib/intel64 -m64 -O3 -shared -fPIC
 // g++ -o lib/libsystemml_openblas-Linux-x86_64.so *.cpp  -I$JAVA_HOME/include  -I$JAVA_HOME/include/linux -lopenblas -lpthread -lm -ldl -DUSE_OPEN_BLAS -I/opt/OpenBLAS/include/ -L/opt/OpenBLAS/lib/ -fopenmp -O3 -shared -fPIC
 
-// Mac OSX:
+// Mac OSX:	
 // g++ -o libsystemml_mkl-linux-x86_64.dylib *.cpp  -I$JAVA_HOME/include -I$MKLROOT/include -I$JAVA_HOME/include/linux -lmkl_rt -lpthread  -lm -ldl -DUSE_INTEL_MKL -DUSE_GNU_THREADING -L$MKLROOT/lib/intel64 -m64 -fopenmp -O3 -dynamiclib -fPIC -undefined dynamic_lookup
 // g++ -o libsystemml_openblas-linux-x86_64.dylib *.cpp  -I$JAVA_HOME/include  -I$JAVA_HOME/include/linux -lopenblas -lpthread -lm -ldl -DUSE_OPEN_BLAS -L/opt/OpenBLAS/lib/ -fopenmp -O3 -dynamiclib -fPIC -undefined dynamic_lookup
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java
index ea36b12..5d1f7eb 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -830,7 +830,7 @@ public class DMLScript
 		ExecutionContext ec = null;
 		try {
 			ec = ExecutionContextFactory.createContext(rtprog);
-			ScriptExecutorUtils.executeRuntimeProgram(rtprog, ec, dmlconf);
+			ScriptExecutorUtils.executeRuntimeProgram(rtprog, ec, dmlconf, STATISTICS ? STATISTICS_COUNT : 0);
 		}
 		finally {
 			if(ec != null && ec instanceof SparkExecutionContext)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index 0345c62..f582c36 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -19,7 +19,6 @@
 
 package org.apache.sysml.api;
 
-import org.apache.sysml.api.mlcontext.MLContext;
 import org.apache.sysml.api.mlcontext.ScriptExecutor;
 import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.hops.codegen.SpoofCompiler;
@@ -41,14 +40,16 @@ public class ScriptExecutorUtils {
 	 * 
 	 * @param se
 	 *            script executor
+	 * @param statisticsMaxHeavyHitters
+	 *            maximum number of statistics to print
 	 * @throws DMLRuntimeException
 	 *             if exception occurs
 	 */
-	public static void executeRuntimeProgram(ScriptExecutor se) throws DMLRuntimeException {
+	public static void executeRuntimeProgram(ScriptExecutor se, int statisticsMaxHeavyHitters) throws DMLRuntimeException {
 		Program prog = se.getRuntimeProgram();
 		ExecutionContext ec = se.getExecutionContext();
 		DMLConfig config = se.getConfig();
-		executeRuntimeProgram(prog, ec, config);
+		executeRuntimeProgram(prog, ec, config, statisticsMaxHeavyHitters);
 	}
 
 	/**
@@ -62,10 +63,12 @@ public class ScriptExecutorUtils {
 	 *            execution context
 	 * @param dmlconf
 	 *            dml configuration
+	 * @param statisticsMaxHeavyHitters
+	 *            maximum number of statistics to print
 	 * @throws DMLRuntimeException
 	 *             if error occurs
 	 */
-	public static void executeRuntimeProgram(Program rtprog, ExecutionContext ec, DMLConfig dmlconf)
+	public static void executeRuntimeProgram(Program rtprog, ExecutionContext ec, DMLConfig dmlconf, int statisticsMaxHeavyHitters)
 			throws DMLRuntimeException {
 		// Whether extra statistics useful for developers and others interested
 		// in digging into performance problems are recorded and displayed
@@ -99,11 +102,10 @@ public class ScriptExecutorUtils {
 			// display statistics (incl caching stats if enabled)
 			Statistics.stopRunTimer();
 
-			MLContext ml = MLContext.getActiveMLContext();
-			if ((ml != null) && (ml.isStatistics())) {
-				int statisticsMaxHeavyHitters = ml.getStatisticsMaxHeavyHitters();
+			if(statisticsMaxHeavyHitters > 0)
 				System.out.println(Statistics.display(statisticsMaxHeavyHitters));
-			}
+			else
+				System.out.println(Statistics.display());
 		}
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
index 1451ebb..8fa4345 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
@@ -405,7 +405,7 @@ public class ScriptExecutor {
 	 */
 	protected void executeRuntimeProgram() {
 		try {
-			ScriptExecutorUtils.executeRuntimeProgram(this);
+			ScriptExecutorUtils.executeRuntimeProgram(this, statistics ? statisticsMaxHeavyHitters : 0);
 		} catch (DMLRuntimeException e) {
 			throw new MLContextException("Exception occurred while executing runtime program", e);
 		}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/java/org/apache/sysml/conf/DMLConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index f8577f3..ccd8889 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -116,7 +116,7 @@ public class DMLConfig
 		_defaultVals.put(CODEGEN,                "false" );
 		_defaultVals.put(CODEGEN_PLANCACHE,      "true" );
 		_defaultVals.put(CODEGEN_LITERALS,       "1" );
-		_defaultVals.put(NATIVE_BLAS,      			 "auto" );
+		_defaultVals.put(NATIVE_BLAS,      			 "none" );
 
 		_defaultVals.put(EXTRA_GPU_STATS,       "false" );
 		_defaultVals.put(EXTRA_DNN_STATS,       "false" );

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/59ff8b3d/src/main/java/org/apache/sysml/utils/NativeHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/NativeHelper.java b/src/main/java/org/apache/sysml/utils/NativeHelper.java
index fe5e085..883b2a7 100644
--- a/src/main/java/org/apache/sysml/utils/NativeHelper.java
+++ b/src/main/java/org/apache/sysml/utils/NativeHelper.java
@@ -173,20 +173,6 @@ public class NativeHelper {
 	
 	
 	private static boolean isMKLAvailable() {
-		// ------------------------------------------------------------
-		// Set environment variable MKL_THREADING_LAYER to GNU on Linux for performance
-		if(!loadLibraryHelper("libpreload_systemml-Linux-x86_64.so")) {
-			LOG.debug("Unable to load preload_systemml (required for loading MKL-enabled SystemML library)");
-			hintOnFailures = hintOnFailures + " libpreload_systemml-Linux-x86_64.so";
-			return false;
-		}
-		// The most reliable way in my investigation to ensure that MKL runs smoothly with OpenMP (used by conv2d*)
-		// is setting the environment variable MKL_THREADING_LAYER to GNU
-		EnvironmentHelper.setEnv("MKL_THREADING_LAYER", "GNU");
-		if(!loadBLAS("gomp", "gomp required for loading MKL-enabled SystemML library")) 
-			return false;
-		
-		// ------------------------------------------------------------
 		return loadBLAS("mkl_rt", null);
 	}