You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2019/05/02 21:15:17 UTC

[GitHub] [incubator-mxnet] scotty3005 opened a new issue #14863: mxnet csv iterator error

scotty3005 opened a new issue #14863: mxnet csv iterator error
URL: https://github.com/apache/incubator-mxnet/issues/14863
 
 
   ## Description
   Running mxnet on R after 50-60 iterations I get the error message posted here below. I am using the custom iterator posted in the code session. I encoutered other discussion about it but I could not figure out if there has been a solution. 
   
   R `sessionInfo()`:
   R version 3.5.2 (2018-12-20)
   Platform: x86_64-pc-linux-gnu (64-bit)
   Running under: Debian GNU/Linux 9 (stretch)
   
   Matrix products: default
   BLAS: /usr/lib/openblas-base/libblas.so.3
   LAPACK: /usr/lib/libopenblasp-r0.2.19.so
   
   locale:
    [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
    [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    [9] LC_ADDRESS=C               LC_TELEPHONE=C            
   [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
   
   attached base packages:
   [1] stats     graphics  grDevices utils     datasets  methods   base     
   
   other attached packages:
   [1] mxnet_1.5.0     bigrquery_1.1.0.9000
   [4] data.table_1.12.2    xgboost_0.81.0.1    
   
   loaded via a namespace (and not attached):
    [1] tidyselect_0.2.5   Rook_1.1-1         purrr_0.3.2        lattice_0.20-38   
    [5] colorspace_1.4-1   htmltools_0.3.6    viridisLite_0.3.0  XML_3.98-1.19     
    [9] rlang_0.3.4        pillar_1.3.1       glue_1.3.1         DBI_1.0.0         
   [13] bit64_0.9-7        RColorBrewer_1.1-2 plyr_1.8.4         stringr_1.4.0     
   [17] munsell_0.5.0      gtable_0.3.0       visNetwork_2.0.6   htmlwidgets_1.3   
   [21] codetools_0.2-15   DiagrammeR_1.0.0   Rcpp_1.0.1         readr_1.3.1       
   [25] scales_1.0.0       jsonlite_1.6       rgexf_0.15.3       bit_1.1-14        
   [29] gridExtra_2.3      brew_1.0-6         ggplot2_3.1.1      hms_0.4.2         
   [33] digest_0.6.18      stringi_1.4.3      dplyr_0.8.0.1      grid_3.5.2        
   [37] influenceR_0.1.0   tools_3.5.2        magrittr_1.5       lazyeval_0.2.2    
   [41] tibble_2.1.1       crayon_1.3.4       tidyr_0.8.3        pkgconfig_2.0.2   
   [45] Matrix_1.2-15      downloader_0.4     assertthat_0.2.1   httr_1.4.0        
   [49] rstudioapi_0.10    viridis_0.5.1      R6_2.4.0           igraph_1.2.4      
   [53] compiler_3.5.2    
   
   ## Build info (Required if built from source)
   
   Compiler (gcc/clang/mingw/visual studio):
   gcc
   
   MXNet commit hash:
   2c5d7f768bdd1599c35f1a3cd1266efd051a9986
   
   Build config:
   (Paste the content of config.mk, or the build command.)
   
   ## Error Message:
   Error in .self$iter.pos$iter.next() : 
     [16:07:41] src/io/iter_csv.cc:137: Check failed: label_parser_->Next() Data CSV's row is smaller than the number of rows in label_csv
   
   Stack trace returned 9 entries:
   [bt] (0) /usr/local/lib/R/site-library/mxnet/libs/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x17b) [0x7faced51acbb]
   [bt] (1) /usr/local/lib/R/site-library/mxnet/libs/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x18) [0x7faced51c238]
   [bt] (2) /usr/local/lib/R/site-library/mxnet/libs/libmxnet.so(mxnet::io::CSVIterTyped<float>::Next()+0x28d) [0x7facf022573d]
   [bt] (3) /usr/local/lib/R/site-library/mxnet/libs/libmxnet.so(mxnet::io::BatchLoader::Next()+0x6e) [0x7facf021009e]
   [bt] (4) /usr/local/lib/R/site-library/mxnet/libs/libmxnet.so(mxnet::io::PrefetcherIter::Init(std::vector<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, s
   Calls: mx.model.FeedForward.create ... mx.model.train -> <Anonymous> -> <Anonymous> -> .External
   Execution halted
   
   
   ## Minimum reproducible example
   trainIter <- setRefClass(
       'trainIterator',
       fields = c('iter.pos', 'iter.neg', 'data.pos', 'label.pos', 'data.neg', 'label.neg',
                  'bz.pos', 'bz.neg', 'data.shape', 'pos.next', 'neg.next', 'epoch.size', 'epc',
                  'features', 'active_features', 'actFeatureIndexes'),
       contains = 'Rcpp_MXArrayDataIter',
       methods = list(
           initialize = function (data.pos, lab.pos, data.neg, lab.neg, bz.pos, bz.neg, epoch.size, active_features) {
               .self$data.pos <- data.pos
               .self$label.pos <- lab.pos
               .self$data.neg <- data.neg
               .self$label.neg <- lab.neg
               .self$bz.pos <- bz.pos
               .self$bz.neg <- bz.neg
               .self$epoch.size <- epoch.size
               .self$epc <- 0
               .self$pos.next <- FALSE
               .self$neg.next <- FALSE
               .self$features <- unlist(read.csv(file.path(.ODR$WRK_PATH, 'feature_names.csv'), header = FALSE))[-1] #1 is the label
               .self$active_features <- active_features
               if (is.null(active_features)) {
                   .self$actFeaturesIndexes = 1:length(.self$features)
               } else {
                   if (!all(active_features %in% .self$features)) stop('wrong active_features')
                   .self$actFeatureIndexes = sapply(active_features, function (x) which(.self$features == x))
               }
               .self$data.shape <- length(.self$actFeatureIndexes)
               .self$iter.pos <- mx.io.CSVIter(data.csv=data.pos,
                                               label.csv=lab.pos,
                                               batch.size=bz.pos,
                                               data.shape=c(length(.self$features)),
                                               dtype='float32')
               .self$iter.neg <- mx.io.CSVIter(data.csv=data.neg,
                                               label.csv=lab.neg,
                                               batch.size=bz.neg,
                                               data.shape=c(length(.self$features)),
                                               dtype='float32')
           },
           value = function () {
               POS = .self$iter.pos$value()
               NEG = .self$iter.neg$value()
               pos.data = as.matrix(POS$data)
               pos.data = pos.data[.self$actFeatureIndexes,,drop=FALSE]
               neg.data = as.matrix(NEG$data)
               neg.data = neg.data[.self$actFeatureIndexes,,drop=FALSE]
               data = mx.nd.array(cbind(pos.data, neg.data))
               lab = c(as.array(POS$label), as.array(NEG$label))
               dim(lab) = c(.self$bz.pos + .self$bz.neg)
               lab = mx.nd.array(lab)
               list(
                   data = data,
                   label = lab
               )
           },
           iter.next = function () {
               .self$epc = .self$epc + 1 
               if (.self$epc %% .self$epoch.size == 0) return(FALSE)
               .self$pos.next = .self$iter.pos$iter.next()
               .self$neg.next = .self$iter.neg$iter.next()
               if (!.self$pos.next) {
                   cat('[*] reset pos 1', sep='\n')
                   .self$iter.pos$reset()
                   .self$pos.next = .self$iter.pos$iter.next()
               }
               if (!.self$neg.next) {
                   cat('[*] reset neg 1', sep='\n')
                   .self$iter.neg$reset()
                   .self$neg.next = .self$iter.neg$iter.next()
               }
               return(TRUE)
           },
           reset = function () {
                ## if (runif(1) > 0.5) {
                ## 	cat('[*] reset pos 2', sep='\n')
                ## 	.self$iter.pos$reset()
                ## }
                ## if (runif(1) > 0.5) {
                ##    cat('[*] reset neg 2', sep='\n')
                ##    .self$iter.neg$reset()
                ## }
           },
           num.pad = function () {
               .self$iter.pos$num.pad()
               .self$iter.neg$num.pad()
           },
           finalize = function () {
               .self$iter.pos$finalize()
               .self$iter.neg$finalize()
           }
       )
   )

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services