You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/05/25 07:57:27 UTC

[GitHub] [tvm] cyyfighting12 opened a new issue #8123: GPU result is different from CPU

cyyfighting12 opened a new issue #8123:
URL: https://github.com/apache/tvm/issues/8123


   windows10 , cuda 11,1 , from MxNet , CPU result is right，GPU  results is error.
   CPU:
   device = 'x86.cpu'
   ctx = tvm.cpu(0)
   GPU:
   device = 'x86.cuda'
   ctx=tvm.gpu(0)


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] vinx13 closed issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

vinx13 closed issue #8123:
URL: https://github.com/apache/tvm/issues/8123


   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   # **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   
   device = 'x86.cuda'  //device = 'x86.cpu'
   ctx=tvm.gpu(0)        //ctx = tvm.cpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   print ("the cost time is ", end)
   
   #evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   # **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   # resultfinal**[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]**
   
   # **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   # resultfinal[[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], #**[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   #**the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   
   device = 'x86.cuda'  //device = 'x86.cpu'
   ctx=tvm.gpu(0)        //ctx = tvm.cpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   print ("the cost time is ", end)
   
   # evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   #**CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   #resultfinal**[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]**
   
   #**GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   #resultfinal[[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], #**[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   vv


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] vinx13 commented on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

vinx13 commented on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-855507920


   Thanks for asking the question, please open a new thread on https://discuss.tvm.apache.org/ as we use the forum for related discussions. Could you try to identify which operator goes wrong?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 closed issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 closed issue #8123:
URL: https://github.com/apache/tvm/issues/8123


   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   # **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   
   device = 'x86.cuda'  //device = 'x86.cpu'
   ctx=tvm.gpu(0)        //ctx = tvm.cpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   print ("the cost time is ", end)
   
   #evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   # **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   # resultfinal:[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]
   
   # **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   resultfinal:[[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], # **[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   # **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   
   device = 'x86.cuda'  //device = 'x86.cpu'
   ctx=tvm.gpu(0)        //ctx = tvm.cpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   print ("the cost time is ", end)
   
   #evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   # **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   # resultfinal**[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]**
   
   # **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   # resultfinal[[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], # **[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   # **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   
   device = 'x86.cuda'  //device = 'x86.cpu'
   ctx=tvm.gpu(0)        //ctx = tvm.cpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   print ("the cost time is ", end)
   
   #evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   # **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   resultfinal:**_[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]_**
   
   # **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   resultfinal:[[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], _**[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**_
   
   the last two GPU result equal to CPU result.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 removed a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 removed a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   vv


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   # **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   
   device = 'x86.cuda'  //device = 'x86.cpu'
   ctx=tvm.gpu(0)        //ctx = tvm.cpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   print ("the cost time is ", end)
   
   #evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   # **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   # resultfinal:[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]
   
   # **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   resultfinal:[[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], _**[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**_
   
   the last two GPU result equal to CPU result.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   ## device = 'x86.cpu'
   ## ctx = tvm.cpu(0)
   device = 'x86.cuda'
   ctx=tvm.gpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   #(c,h,w)
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
       #lib.export_library(path_lib, tvm.contrib.cc.create_shared, cc="aarch64-linux-gnu-g++")
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   # Execute the portable graph on TVM
   # ---------------------------------
   # Now we can try deploying the compiled model on target.
   
   from tvm.contrib import graph_runtime
   #from tvm.contrib.debugger import debug_runtime as  graph_runtime
   import time
   # from tvm.contrib.debugger import debug_runtime as graph_runtime
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   # m = graph_runtime.create(graph, lib, ctx, dump_root="/home/kai/tmp/tvmdbg")
   # set inputs
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   # execute
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   # tvm.gpu(0).sync()
   
   end = (time.time()- start)/count
   # print (tvm_output_confidence)
   print ("the cost time is ", end)
   
   # evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   
   # Set inputs
   img_path= model_path+"./00001.jpg" #0002_AN_L_0001.jpg 007096_attrack.jpg 2-1.bmp
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   [[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]
   
   **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   [[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], **[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 commented on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 commented on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       target = tvm.target.cuda()
       target_host = 'llvm'
   
   **the whole .py:**
   # some standard imports
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   # device = 'x86.cpu'
   # ctx = tvm.cpu(0)
   device = 'x86.cuda'
   ctx=tvm.gpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   #(c,h,w)
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   #set the target/target_host
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
   
       #target = tvm.target.create('llvm -device=arm_cpu -target=aarch64-linux-gnu -mattr=+neon')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   # Compile the Graph
   # -----------------
   # Now we would like to port the Gluon model to a portable computational graph.
   # It's as easy as several lines.
   # We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   #build the relay model
   # compile kernels with history best records
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
       #lib.export_library(path_lib, tvm.contrib.cc.create_shared, cc="aarch64-linux-gnu-g++")
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   # Execute the portable graph on TVM
   # ---------------------------------
   # Now we can try deploying the compiled model on target.
   
   from tvm.contrib import graph_runtime
   #from tvm.contrib.debugger import debug_runtime as  graph_runtime
   import time
   # from tvm.contrib.debugger import debug_runtime as graph_runtime
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   # m = graph_runtime.create(graph, lib, ctx, dump_root="/home/kai/tmp/tvmdbg")
   # set inputs
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   # execute
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   # tvm.gpu(0).sync()
   
   end = (time.time()- start)/count
   # print (tvm_output_confidence)
   print ("the cost time is ", end)
   
   # evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # ##test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   
   # Set inputs
   img_path= model_path+"./00001.jpg" #0002_AN_L_0001.jpg 007096_attrack.jpg 2-1.bmp
   img_data = preprocess_img_single(img_path,data_shape) 
   
   # loaded_json = open("%s.%s.json" % (path, device)).read()
   # loaded_lib = tvm.runtime.load_module("%s.%s.dll" % (path, device))
   # loaded_params = bytearray(open("%s.%s.params" % (path, device), "rb").read())
   # m = graph_runtime.create(loaded_json, loaded_lib, ctx)
   # m.load_params(loaded_params)
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   [[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]
   
   **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   [[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], **[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   ## device = 'x86.cpu'
   ## ctx = tvm.cpu(0)
   device = 'x86.cuda'
   ctx=tvm.gpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   #(c,h,w)
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
       #lib.export_library(path_lib, tvm.contrib.cc.create_shared, cc="aarch64-linux-gnu-g++")
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   # print (tvm_output_confidence)
   print ("the cost time is ", end)
   
   # evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   **[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]**
   
   **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   [[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], **[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cyyfighting12 edited a comment on issue #8123: GPU result is different from CPU

Posted by GitBox <gi...@apache.org>.

cyyfighting12 edited a comment on issue #8123:
URL: https://github.com/apache/tvm/issues/8123#issuecomment-847648523


   **the whole .py:**
   
   import mxnet as mx
   import tvm
   import tvm.relay as relay
   import numpy as np
   from tvm.contrib import util
   import os
   
   dtype = 'float32'
   use_arm64 = False
   use_android = False
   
   network = 'IrisAttackCCL'
   
   device = 'x86.cuda'  //device = 'x86.cpu'
   ctx=tvm.gpu(0)        //ctx = tvm.cpu(0)
   
   model_path = './'
   path = model_path +  (network)
   
   #set the input shape/layer
   input_layer = 'data'
   batch_size = 1
   image_shape = (1, 240,320)  
   #(c,h,w)
   input_shape = (batch_size,) + image_shape
   
   ######################################################################
   if device == 'cpu':
       if use_arm64:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
       else:
           target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
       target_host = None
   
   elif device == 'gpu':
       #target = tvm.target.create('opencl -device=mali')
       #target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
       target = tvm.target.create('opencl -device=mali')
   
       if use_arm64:
           target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
       else:
           target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
   
   elif device == 'x86.cpu':
       target = 'llvm'
       target_host = None
   
   elif device == 'x86.cuda':
       target = 'cuda'
       #target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
       target = tvm.target.cuda(model='3060ti')
       target_host = 'llvm'
   
   else:
       target = tvm.target.create('llvm -target=arm64-linux-android')
       target_host = None
   
   
   ######################################################################
   # input the mxnet model
   mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
   
   import pdb
   pdb.set_trace()
   ######################################################################
   shape_dict = {'data': input_shape}
   func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
   
   
   ######################################################################
   # now compile the graph
   with relay.build_config(opt_level=3):
       graph, lib, params = relay.build(func, target, params=params)
   
   ######################################################################
   print("Compile...")
   
   ######################################################################
   #save the relay model
   temp = util.tempdir()
   path_lib = temp.relpath("%s.%s.dll" % (path, device))
   
   if use_android:
       from tvm.contrib import ndk
       if use_arm64:
           lib.export_library(path_lib, ndk.create_shared)
       else:
           lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
   
   else:
       lib.export_library("%s.%s.dll" % (path, device))
       #lib.export_library(path_lib, tvm.contrib.cc.create_shared, cc="aarch64-linux-gnu-g++")
   
   with open("%s.%s.json" % (path, device), "w") as fo:
       fo.write(graph)
   with open("%s.%s.params" % (path, device), "wb") as fo:
       fo.write(relay.save_param_dict(params))
   
   
   print("------convert done!!!------")
   import numpy as np
   img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
   x = np.array(img)
   
   ######################################################################
   from tvm.contrib import graph_runtime
   import time
   dtype = 'float32'
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(x.astype(dtype)))
   m.set_input(**params)
   
   start = time.time()
   count = 1
   for i in range(count):
       m.run()
   
   end = (time.time()- start)/count
   # print (tvm_output_confidence)
   print ("the cost time is ", end)
   
   # evaluate
   print("Evaluate inference time cost...")
   ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
   prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
   print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
         (np.mean(prof_res), np.std(prof_res)))
   tvm_output0 = m.get_output(0)
   print('- tvm_output 0 shape : ', tvm_output0.shape)
   
   
   # ######################################################################
   # test images
   data_shape = input_shape
   import cv2 
   def preprocess_img_single(img_path,data_shape):
       img = cv2.imread(img_path,0)
       img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0    #img.shape(240,320)
       img = np.reshape(img,(data_shape[2],data_shape[3],1))        #单通道 img.shape(240,320,1)
       img_data = np.transpose(np.array(img), (2, 0, 1))            #img.shape(1,240,320)
       img_data = np.expand_dims(img_data, axis=0)                  #img.shape(1,1,240,320)
       return img_data
   
   # Set inputs
   img_path= model_path+"./00001.jpg"
   img_data = preprocess_img_single(img_path,data_shape) 
   
   m = graph_runtime.create(graph, lib, ctx)
   m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
   m.set_input(**params)
   m.run()
   tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
   result=tvm_output[0,:]
   resultfinal = result[result[:,0]!=-1].tolist()
   print(resultfinal)
   
   **CPU result:**
   ------convert done!!!------
   the cost time is  0.008001565933227539
   Evaluate inference time cost...
   Mean inference time (std dev): 8.99 ms (0.39 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   **[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]**
   
   **GPU result:**
   ------convert done!!!------
   the cost time is  0.6151375770568848
   Evaluate inference time cost...
   Mean inference time (std dev): 6.12 ms (0.42 ms)
   - tvm_output 0 shape :  (1, 5228, 6)
   [[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], **[3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org