You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2019/07/17 08:58:25 UTC
[GitHub] [incubator-mxnet] kostayScr edited a comment on issue #15555: Bug
or highly unintuitive and undocumented behaviour corrupting NDArray
data(possibly race condition)[MKLDNN][C++]
kostayScr edited a comment on issue #15555: Bug or highly unintuitive and undocumented behaviour corrupting NDArray data(possibly race condition)[MKLDNN][C++]
URL: https://github.com/apache/incubator-mxnet/issues/15555#issuecomment-512166732
Hello, thanks for looking into it.
Here is the complete code:
```
void TestWaitMapMXNetBug()
{
using namespace mxnet::cpp;
using ArgMap = map<string, NDArray>;
const size_t testDataSz = 10 * 1024 * 1024;
float *testDataOnes = new float[testDataSz];
float *testDataZeroes = new float[testDataSz];
for ( size_t i = 0; i < testDataSz; i++ )
{
testDataOnes[i] = 1.0f;
}
for ( size_t i = 0; i < testDataSz; i++ )
{
testDataZeroes[i] = 0.0f;
}
//NDArray testOnes( testDataOnes, testDataSz ); //crash
//NDArray testZeroes( testDataZeroes, testDataSz );
NDArray testOnes( testDataOnes, Shape( testDataSz ), Context::cpu() );
NDArray testZeroes( testDataZeroes, Shape( testDataSz ), Context::cpu() );
auto CheckNDArrayValues = []( const NDArray &arr, const float val ) //check that all values in array equal to val
{
auto arrData = arr.GetData();
for ( size_t i = 0; i < arr.Size(); i++ )
{
if ( arrData[i] != val )
return false;
}
return true;
};
ArgMap args;
ArgMap auxiliaryState;
ArgMap bestModelArgs;
ArgMap bestModelAux;
auto testName = "testName"s;
args[testName] = testOnes;
bestModelArgs[testName] = testZeroes;
//initial test
std::cout << CheckNDArrayValues( args[testName], 1.0f );
std::cout << CheckNDArrayValues( bestModelArgs[testName], 0.0f );
auto AssignNDArrayMap = []( decltype( args ) &lhs, const decltype( args ) &rhs )
{
for ( auto &[k, arr] : rhs )
arr.CopyTo( &lhs.at( k ) );
};
auto WaitMap = []( auto &map )
{
return; //Workaround - commenting this return statement fixes the problem
for ( auto &[k, v] : map )
{
v.WaitAll();
v.WaitToRead();
v.WaitToWrite();
}
};
auto SaveAsBestModel = [ &WaitMap, &bestModelArgs, &bestModelAux, &AssignNDArrayMap, &args, &auxiliaryState ]()
{
AssignNDArrayMap( bestModelArgs, args );
AssignNDArrayMap( bestModelAux, auxiliaryState );
WaitMap( bestModelArgs );
WaitMap( bestModelAux );
};
auto LoadBestModel = [ &WaitMap, &bestModelArgs, &bestModelAux, &AssignNDArrayMap, &args, &auxiliaryState ]()
{
AssignNDArrayMap( args, bestModelArgs );
AssignNDArrayMap( auxiliaryState, bestModelAux );
WaitMap( args );
WaitMap( auxiliaryState );
};
//copy
SaveAsBestModel(); //a -> b
LoadBestModel(); //b -> a
//Data in args/auxiliaryState is now Corrupt!
//test for corruption, now both must contain ones
std::cout << CheckNDArrayValues( bestModelArgs[testName], 1.0f );
std::cout << CheckNDArrayValues( args[testName], 1.0f );
//bestModelArgs got corrupt, maybe it's all zeroes?
std::cout << CheckNDArrayValues( bestModelArgs[testName], 0.0f );
}
```
Works(fails the check) only in Release build mode for me.
Correct output:
```
true
true
true
true
false
```
Factual output:
```
true
true
false
true
false
```
Enabling the waiting on the NDArrays fixes it.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services