You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2017/08/08 23:36:26 UTC
[incubator-mxnet] branch master updated: Add license header (#7379)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 251ae71  Add license header (#7379)
251ae71 is described below

commit 251ae71a20d8318ab20f4c19520f74b881fdf3ff
Author: Mu Li <mu...@cs.cmu.edu>
AuthorDate: Tue Aug 8 16:36:23 2017 -0700

    Add license header (#7379)
    
    * add
    
    * add .py and ci
    
    * fix pylint
    
    * update
---
 Jenkinsfile                                        |    1 +
 amalgamation/amalgamation.py                       |   19 +-
 amalgamation/dmlc-minimum0.cc                      |   20 +-
 amalgamation/jni/org_dmlc_mxnet_Predictor.h        |   19 +
 amalgamation/jni/predictor.cc                      |   19 +
 amalgamation/mxnet_predict0.cc                     |   19 +
 amalgamation/python/mxnet_predict.py               |   17 +
 cmake/Modules/FindAccelerate.cmake                 |   17 +
 cmake/Modules/FindAtlas.cmake                      |   17 +
 cmake/Modules/FindJeMalloc.cmake                   |   17 +
 cmake/Modules/FindMKL.cmake                        |   17 +
 cmake/Modules/FindOpenBLAS.cmake                   |   17 +
 cmake/Utils.cmake                                  |   17 +
 cpp-package/example/feature_extract/run.sh         |   17 +
 cpp-package/example/get_mnist.sh                   |   17 +
 cpp-package/example/run_lenet_with_mxdataiter.sh   |   17 +
 cpp-package/include/mxnet-cpp/MxNetCpp.h           |   20 +-
 cpp-package/include/mxnet-cpp/base.h               |   20 +-
 cpp-package/include/mxnet-cpp/executor.h           |   20 +-
 cpp-package/include/mxnet-cpp/initializer.h        |   20 +-
 cpp-package/include/mxnet-cpp/io.h                 |   20 +-
 cpp-package/include/mxnet-cpp/kvstore.h            |   20 +-
 cpp-package/include/mxnet-cpp/lr_scheduler.h       |   20 +-
 cpp-package/include/mxnet-cpp/metric.h             |   20 +-
 cpp-package/include/mxnet-cpp/model.h              |   20 +-
 cpp-package/include/mxnet-cpp/monitor.h            |   20 +-
 cpp-package/include/mxnet-cpp/ndarray.h            |   20 +-
 cpp-package/include/mxnet-cpp/op_map.h             |   20 +-
 cpp-package/include/mxnet-cpp/op_suppl.h           |   20 +-
 cpp-package/include/mxnet-cpp/op_util.h            |   20 +-
 cpp-package/include/mxnet-cpp/operator.h           |   20 +-
 cpp-package/include/mxnet-cpp/optimizer.h          |   20 +-
 cpp-package/include/mxnet-cpp/shape.h              |   20 +-
 cpp-package/include/mxnet-cpp/symbol.h             |   20 +-
 cpp-package/scripts/OpWrapperGenerator.py          |   19 +-
 cpp-package/scripts/lint.py                        |   18 +
 cpp-package/tests/ci_test.sh                       |   17 +
 cpp-package/tests/travis/run_test.sh               |   18 +
 cpp-package/tests/travis/setup.sh                  |   18 +
 docker/Dockerfiles/Dockerfile.in.scala             |   19 +
 docker/install/cpp.sh                              |   18 +
 docker/install/julia.sh                            |   18 +
 docker/install/perl.sh                             |   18 +
 docker/install/python.sh                           |   18 +
 docker/install/r.sh                                |   18 +
 docker/install/scala.sh                            |   18 +
 docker/run.sh                                      |   18 +
 docker/tool.sh                                     |   18 +
 docs/build_version_doc/AddVersion.py               |   19 +-
 docs/build_version_doc/build_doc.sh                |   22 +-
 docs/conf.py                                       |   17 +
 docs/mxdoc.py                                      |   17 +
 example/adversary/data.py                          |   17 +
 example/autoencoder/autoencoder.py                 |   17 +
 example/autoencoder/data.py                        |   17 +
 example/autoencoder/mnist_sae.py                   |   71 +-
 example/autoencoder/model.py                       |   17 +
 example/autoencoder/solver.py                      |   17 +
 example/bayesian-methods/algos.py                  |   17 +
 example/bayesian-methods/bdk_demo.py               |   17 +
 example/bayesian-methods/data_loader.py            |   17 +
 example/bayesian-methods/utils.py                  |   19 +-
 example/bi-lstm-sort/infer_sort.py                 |   25 +-
 example/bi-lstm-sort/lstm.py                       |   25 +-
 example/bi-lstm-sort/lstm_sort.py                  |   17 +
 example/bi-lstm-sort/rnn_model.py                  |   19 +-
 example/bi-lstm-sort/sort_io.py                    |   17 +
 example/caffe/caffe_net.py                         |   19 +-
 example/caffe/data.py                              |   17 +
 example/caffe/train_model.py                       |   17 +
 example/cnn_text_classification/data_helpers.py    |   17 +
 example/cnn_text_classification/old/text_cnn.py    |   20 +-
 example/cnn_text_classification/text_cnn.py        |   22 +-
 example/ctc/lstm.py                                |   17 +
 example/ctc/lstm_ocr.py                            |   17 +
 example/ctc/ocr_predict.py                         |   18 +
 example/dec/dec.py                                 |   19 +-
 example/dsd/mlp.py                                 |   17 +
 example/dsd/sparse_sgd.py                          |   17 +
 example/fcn-xs/data.py                             |   17 +
 example/fcn-xs/fcn_xs.py                           |   17 +
 example/fcn-xs/image_segmentaion.py                |  133 ++-
 example/fcn-xs/init_fcnxs.py                       |  195 +--
 example/fcn-xs/run_fcnxs.sh                        |   17 +
 example/fcn-xs/solver.py                           |  269 +++--
 example/fcn-xs/symbol_fcnxs.py                     |  395 ++++---
 example/gan/dcgan.py                               |   17 +
 example/gluon/actor_critic.py                      |   17 +
 example/gluon/data.py                              |   17 +
 example/gluon/dcgan.py                             |   17 +
 example/gluon/image_classification.py              |   17 +
 example/gluon/lstm_crf.py                          |   17 +
 example/gluon/mnist.py                             |   17 +
 example/gluon/super_resolution.py                  |   17 +
 example/gluon/tree_lstm/dataset.py                 |   17 +
 example/gluon/tree_lstm/fetch_and_preprocess.sh    |   20 +-
 example/gluon/tree_lstm/main.py                    |   17 +
 example/gluon/tree_lstm/scripts/download.py        |   17 +
 example/gluon/tree_lstm/scripts/preprocess-sick.py |   17 +
 example/gluon/tree_lstm/tree_lstm.py               |   17 +
 example/gluon/word_language_model/data.py          |   17 +
 example/gluon/word_language_model/get_ptb_data.sh  |   18 +
 example/gluon/word_language_model/model.py         |   17 +
 example/gluon/word_language_model/train.py         |   17 +
 example/image-classification/benchmark.py          |   17 +
 example/image-classification/benchmark_score.py    |   17 +
 example/image-classification/common/data.py        |   17 +
 example/image-classification/common/find_mxnet.py  |   17 +
 example/image-classification/common/fit.py         |   17 +
 example/image-classification/common/modelzoo.py    |   17 +
 example/image-classification/common/util.py        |   17 +
 example/image-classification/data/caltech256.sh    |   18 +
 .../image-classification/data/imagenet1k-val.sh    |   18 +
 example/image-classification/fine-tune.py          |   17 +
 .../predict-cpp/image-classification-predict.cc    |   21 +-
 example/image-classification/score.py              |   17 +
 example/image-classification/symbols/alexnet.py    |   17 +
 example/image-classification/symbols/googlenet.py  |   17 +
 .../image-classification/symbols/inception-bn.py   |   17 +
 .../symbols/inception-resnet-v2.py                 |   27 +-
 .../image-classification/symbols/inception-v3.py   |   17 +
 .../image-classification/symbols/inception-v4.py   |   17 +
 example/image-classification/symbols/lenet.py      |   17 +
 example/image-classification/symbols/mlp.py        |   17 +
 example/image-classification/symbols/mobilenet.py  |   17 +
 example/image-classification/symbols/resnet-v1.py  |   17 +
 example/image-classification/symbols/resnet.py     |   17 +
 example/image-classification/symbols/resnext.py    |   33 +-
 example/image-classification/symbols/vgg.py        |   17 +
 example/image-classification/test_score.py         |   17 +
 example/image-classification/train_cifar10.py      |   17 +
 example/image-classification/train_imagenet.py     |   17 +
 example/image-classification/train_mnist.py        |   21 +-
 example/kaggle-ndsb1/gen_img_list.py               |   21 +-
 example/kaggle-ndsb1/predict_dsb.py                |   17 +
 example/kaggle-ndsb1/submission_dsb.py             |   29 +-
 example/kaggle-ndsb1/symbol_dsb.py                 |   17 +
 example/kaggle-ndsb1/train_dsb.py                  |   23 +-
 example/kaggle-ndsb1/training_curves.py            |   17 +
 example/kaggle-ndsb2/Preprocessing.py              |   17 +
 example/kaggle-ndsb2/Train.py                      |   17 +
 example/memcost/inception_memcost.py               |   17 +
 example/model-parallel-lstm/get_ptb_data.sh        |   18 +
 example/model-parallel-lstm/lstm.py                |   37 +-
 example/model-parallel-lstm/lstm_ptb.py            |   19 +-
 example/module/lstm_bucketing.py                   |   17 +
 example/module/mnist_mlp.py                        |   17 +
 example/module/python_loss.py                      |   17 +
 example/module/sequential_module.py                |   17 +
 example/module/train_cifar10.py                    |   19 +-
 example/multi-task/data.py                         |   17 +
 example/multi-task/example_multi_task.py           |   17 +
 example/nce-loss/get_text8.sh                      |   17 +
 example/nce-loss/lstm_word.py                      |   29 +-
 example/nce-loss/nce.py                            |   17 +
 example/nce-loss/toy_nce.py                        |   25 +-
 example/nce-loss/toy_softmax.py                    |   21 +-
 example/nce-loss/wordvec.py                        |   29 +-
 example/nce-loss/wordvec_subwords.py               |   17 +
 example/neural-style/download.sh                   |   18 +
 example/neural-style/end_to_end/basic.py           |   17 +
 example/neural-style/end_to_end/boost_inference.py |   17 +
 example/neural-style/end_to_end/boost_train.py     |   17 +
 example/neural-style/end_to_end/data_processing.py |   17 +
 example/neural-style/end_to_end/gen_v3.py          |   17 +
 example/neural-style/end_to_end/gen_v4.py          |   17 +
 example/neural-style/end_to_end/model_vgg19.py     |   17 +
 example/neural-style/find_mxnet.py                 |   17 +
 example/neural-style/model_vgg19.py                |   17 +
 example/neural-style/nstyle.py                     |   19 +-
 example/numpy-ops/custom_softmax.py                |   19 +-
 example/numpy-ops/data.py                          |   17 +
 example/numpy-ops/ndarray_softmax.py               |   19 +-
 example/numpy-ops/numpy_softmax.py                 |   19 +-
 example/numpy-ops/weighted_logistic_regression.py  |   17 +
 example/profiler/profiler_executor.py              |   17 +
 example/profiler/profiler_imageiter.py             |   21 +-
 example/profiler/profiler_matmul.py                |   17 +
 example/profiler/profiler_ndarray.py               |   17 +
 example/python-howto/data.py                       |   17 +
 example/python-howto/data_iter.py                  |   17 +
 example/python-howto/debug_conv.py                 |   19 +-
 example/python-howto/monitor_weights.py            |   19 +-
 example/python-howto/multiple_outputs.py           |   17 +
 example/rcnn/demo.py                               |   17 +
 example/rcnn/rcnn/config.py                        |   17 +
 example/rcnn/rcnn/core/callback.py                 |   17 +
 example/rcnn/rcnn/core/loader.py                   |   17 +
 example/rcnn/rcnn/core/metric.py                   |   17 +
 example/rcnn/rcnn/core/module.py                   |   17 +
 example/rcnn/rcnn/core/tester.py                   |   17 +
 example/rcnn/rcnn/cython/nms_kernel.cu             |   19 +
 example/rcnn/rcnn/cython/setup.py                  |   17 +
 example/rcnn/rcnn/dataset/__init__.py              |   17 +
 example/rcnn/rcnn/dataset/coco.py                  |   17 +
 example/rcnn/rcnn/dataset/ds_utils.py              |   19 +-
 example/rcnn/rcnn/dataset/imdb.py                  |   17 +
 example/rcnn/rcnn/dataset/pascal_voc.py            |   17 +
 example/rcnn/rcnn/dataset/pascal_voc_eval.py       |   17 +
 example/rcnn/rcnn/io/image.py                      |   17 +
 example/rcnn/rcnn/io/rcnn.py                       |   17 +
 example/rcnn/rcnn/io/rpn.py                        |   17 +
 example/rcnn/rcnn/logger.py                        |   17 +
 example/rcnn/rcnn/processing/bbox_regression.py    |   17 +
 example/rcnn/rcnn/processing/bbox_transform.py     |   17 +
 example/rcnn/rcnn/processing/generate_anchor.py    |   17 +
 example/rcnn/rcnn/processing/nms.py                |   17 +
 example/rcnn/rcnn/pycocotools/__init__.py          |   17 +
 example/rcnn/rcnn/pycocotools/coco.py              |   19 +-
 example/rcnn/rcnn/pycocotools/cocoeval.py          |   19 +-
 example/rcnn/rcnn/pycocotools/mask.py              |   19 +-
 example/rcnn/rcnn/pycocotools/maskApi.h            |   19 +
 example/rcnn/rcnn/pycocotools/setup.py             |   17 +
 example/rcnn/rcnn/symbol/__init__.py               |   17 +
 example/rcnn/rcnn/symbol/proposal.py               |   17 +
 example/rcnn/rcnn/symbol/proposal_target.py        |   17 +
 example/rcnn/rcnn/symbol/symbol_resnet.py          |   17 +
 example/rcnn/rcnn/symbol/symbol_vgg.py             |   19 +-
 example/rcnn/rcnn/tools/reeval.py                  |   17 +
 example/rcnn/rcnn/tools/test_rcnn.py               |   17 +
 example/rcnn/rcnn/tools/test_rpn.py                |   17 +
 example/rcnn/rcnn/tools/train_rcnn.py              |   17 +
 example/rcnn/rcnn/tools/train_rpn.py               |   17 +
 example/rcnn/rcnn/utils/combine_model.py           |   17 +
 example/rcnn/rcnn/utils/load_data.py               |   17 +
 example/rcnn/rcnn/utils/load_model.py              |   17 +
 example/rcnn/rcnn/utils/save_model.py              |   17 +
 example/rcnn/script/additional_deps.sh             |   18 +
 example/rcnn/script/get_coco.sh                    |   18 +
 example/rcnn/script/get_pretrained_model.sh        |   18 +
 example/rcnn/script/get_selective_search.sh        |   18 +
 example/rcnn/script/get_voc.sh                     |   18 +
 example/rcnn/script/resnet_voc07.sh                |   18 +
 example/rcnn/script/resnet_voc0712.sh              |   18 +
 example/rcnn/script/vgg_alter_voc07.sh             |   18 +
 example/rcnn/script/vgg_fast_rcnn.sh               |   18 +
 example/rcnn/script/vgg_voc07.sh                   |   18 +
 example/rcnn/script/vgg_voc0712.sh                 |   18 +
 example/rcnn/test.py                               |   17 +
 example/rcnn/train_alternate.py                    |   17 +
 example/rcnn/train_end2end.py                      |   17 +
 example/recommenders/crossentropy.py               |   24 +-
 example/recommenders/matrix_fact.py                |   17 +
 example/recommenders/movielens_data.py             |   23 +-
 example/recommenders/negativesample.py             |   21 +-
 example/recommenders/randomproj.py                 |   25 +-
 example/recommenders/recotools.py                  |   19 +-
 example/recommenders/symbol_alexnet.py             |   17 +
 example/reinforcement-learning/a3c/a3c.py          |   21 +-
 example/reinforcement-learning/a3c/launcher.py     |   17 +
 example/reinforcement-learning/a3c/rl_data.py      |   17 +
 example/reinforcement-learning/a3c/sym.py          |   17 +
 example/reinforcement-learning/ddpg/ddpg.py        |   17 +
 example/reinforcement-learning/ddpg/policies.py    |   25 +-
 example/reinforcement-learning/ddpg/qfuncs.py      |   23 +-
 example/reinforcement-learning/ddpg/replay_mem.py  |   19 +-
 example/reinforcement-learning/ddpg/run.py         |   19 +-
 example/reinforcement-learning/ddpg/strategies.py  |   25 +-
 example/reinforcement-learning/ddpg/utils.py       |   17 +
 example/reinforcement-learning/dqn/atari_game.py   |   17 +
 example/reinforcement-learning/dqn/base.py         |   17 +
 example/reinforcement-learning/dqn/dqn_demo.py     |   17 +
 example/reinforcement-learning/dqn/dqn_run_test.py |   17 +
 example/reinforcement-learning/dqn/game.py         |   19 +-
 example/reinforcement-learning/dqn/operators.py    |   17 +
 .../reinforcement-learning/dqn/replay_memory.py    |   17 +
 example/reinforcement-learning/dqn/utils.py        |   17 +
 .../parallel_actor_critic/config.py                |   17 +
 .../parallel_actor_critic/envs.py                  |   17 +
 .../parallel_actor_critic/model.py                 |   17 +
 .../parallel_actor_critic/train.py                 |   17 +
 example/rnn-time-major/bucket_io.py                |   21 +-
 example/rnn-time-major/get_ptb_data.sh             |   18 +
 example/rnn-time-major/rnn_cell_demo.py            |   21 +-
 example/rnn/cudnn_lstm_bucketing.py                |   17 +
 example/rnn/get_ptb_data.sh                        |   18 +
 example/rnn/lstm_bucketing.py                      |   17 +
 example/rnn/old/bucket_io.py                       |   31 +-
 example/rnn/old/get_ptb_data.sh                    |   18 +
 example/rnn/old/gru.py                             |   17 +
 example/rnn/old/gru_bucketing.py                   |   17 +
 example/rnn/old/lstm.py                            |   17 +
 example/rnn/old/lstm_bucketing.py                  |   17 +
 example/rnn/old/rnn.py                             |   17 +
 example/rnn/old/rnn_cell_demo.py                   |   21 +-
 example/rnn/old/rnn_model.py                       |   17 +
 example/speech-demo/config_util.py                 |   17 +
 example/speech-demo/decode_mxnet.py                |   25 +-
 example/speech-demo/decode_mxnet.sh                |   24 +-
 example/speech-demo/io_func/convert2kaldi.py       |   23 +-
 example/speech-demo/io_func/feat_io.py             |   17 +
 example/speech-demo/io_func/feat_readers/common.py |   19 +-
 .../io_func/feat_readers/reader_atrack.py          |   23 +-
 .../io_func/feat_readers/reader_bvec.py            |   19 +-
 .../speech-demo/io_func/feat_readers/reader_htk.py |   21 +-
 .../io_func/feat_readers/reader_kaldi.py           |   23 +-
 example/speech-demo/io_func/feat_readers/stats.py  |   17 +
 .../io_func/feat_readers/writer_kaldi.py           |   17 +
 example/speech-demo/io_func/info.py                |   17 +
 example/speech-demo/io_func/kaldi_parser.py        |   19 +-
 example/speech-demo/io_func/model_io.py            |   57 +-
 example/speech-demo/io_func/regr_feat_io.py        |   25 +-
 example/speech-demo/io_func/utils.py               |   17 +
 example/speech-demo/io_util.py                     |   33 +-
 example/speech-demo/lstm_proj.py                   |   21 +-
 example/speech-demo/make_stats.py                  |   17 +
 example/speech-demo/python_wrap/ctypes.cc          |   23 +-
 .../python_wrap/example_usage/example.py           |   21 +-
 example/speech-demo/run_ami.sh                     |   18 +
 example/speech-demo/run_timit.sh                   |   18 +
 example/speech-demo/speechSGD.py                   |   19 +-
 example/speech-demo/tests/test_nothing.py          |   19 +-
 example/speech-demo/tests/test_system.py           |   23 +-
 example/speech-demo/train_lstm_proj.py             |   19 +-
 example/speech_recognition/arch_deepspeech.py      |   17 +
 example/speech_recognition/config_util.py          |   17 +
 example/speech_recognition/flac_to_wav.sh          |   33 +-
 example/speech_recognition/label_util.py           |   17 +
 example/speech_recognition/log_util.py             |   17 +
 example/speech_recognition/main.py                 |  719 ++++++------
 example/speech_recognition/singleton.py            |   17 +
 .../speech_recognition/stt_bi_graphemes_util.py    |   17 +
 example/speech_recognition/stt_bucketing_module.py |   43 +-
 example/speech_recognition/stt_datagenerator.py    |   19 +-
 example/speech_recognition/stt_io_bucketingiter.py |  313 ++---
 example/speech_recognition/stt_io_iter.py          |   17 +
 example/speech_recognition/stt_layer_batchnorm.py  |   17 +
 example/speech_recognition/stt_layer_conv.py       |   17 +
 example/speech_recognition/stt_layer_fc.py         |   17 +
 example/speech_recognition/stt_layer_gru.py        |   17 +
 example/speech_recognition/stt_layer_lstm.py       |   17 +
 example/speech_recognition/stt_layer_slice.py      |   17 +
 example/speech_recognition/stt_layer_warpctc.py    |   17 +
 example/speech_recognition/stt_metric.py           |   17 +
 example/speech_recognition/stt_utils.py            |   19 +-
 example/speech_recognition/train.py                |   17 +
 example/ssd/config/config.py                       |   17 +
 example/ssd/config/utils.py                        |   17 +
 example/ssd/data/demo/download_demo_images.py      |   17 +
 example/ssd/dataset/concat_db.py                   |   17 +
 example/ssd/dataset/imdb.py                        |   17 +
 example/ssd/dataset/iterator.py                    |   17 +
 example/ssd/dataset/mscoco.py                      |   17 +
 example/ssd/dataset/pascal_voc.py                  |   17 +
 example/ssd/dataset/pycocotools/__init__.py        |   17 +
 example/ssd/dataset/pycocotools/coco.py            |   17 +
 example/ssd/dataset/testdb.py                      |   17 +
 example/ssd/dataset/yolo_format.py                 |   17 +
 example/ssd/demo.py                                |   17 +
 example/ssd/deploy.py                              |   17 +
 example/ssd/detect/detector.py                     |   17 +
 example/ssd/evaluate.py                            |   17 +
 example/ssd/evaluate/eval_metric.py                |   17 +
 example/ssd/evaluate/eval_voc.py                   |   17 +
 example/ssd/evaluate/evaluate_net.py               |   17 +
 example/ssd/symbol/common.py                       |   17 +
 example/ssd/symbol/inceptionv3.py                  |   17 +
 example/ssd/symbol/legacy_vgg16_ssd_300.py         |   17 +
 example/ssd/symbol/legacy_vgg16_ssd_512.py         |   17 +
 example/ssd/symbol/resnet.py                       |   17 +
 example/ssd/symbol/symbol_builder.py               |   17 +
 example/ssd/symbol/symbol_factory.py               |   17 +
 example/ssd/symbol/vgg16_reduced.py                |   17 +
 .../caffe_parse/parse_from_protobuf.py             |   17 +
 example/ssd/tools/caffe_converter/convert_model.py |   17 +
 .../ssd/tools/caffe_converter/convert_symbol.py    |   17 +
 example/ssd/tools/caffe_converter/mean_image.py    |   17 +
 example/ssd/tools/find_mxnet.py                    |   17 +
 example/ssd/tools/prepare_coco.sh                  |   18 +
 example/ssd/tools/prepare_dataset.py               |   17 +
 example/ssd/tools/prepare_pascal.sh                |   18 +
 example/ssd/tools/rand_sampler.py                  |   17 +
 example/ssd/tools/visualize_net.py                 |   17 +
 example/ssd/train.py                               |   17 +
 example/ssd/train/metric.py                        |   17 +
 example/ssd/train/train_net.py                     |  Bin 9651 -> 10437 bytes
 example/stochastic-depth/sd_cifar10.py             |   17 +
 example/stochastic-depth/sd_mnist.py               |   17 +
 example/stochastic-depth/sd_module.py              |   17 +
 example/svm_mnist/svm_mnist.py                     |   17 +
 example/torch/data.py                              |   17 +
 example/torch/torch_function.py                    |   17 +
 example/torch/torch_module.py                      |   17 +
 example/utils/get_data.py                          |   17 +
 example/warpctc/infer_ocr.py                       |   17 +
 example/warpctc/lstm.py                            |   17 +
 example/warpctc/lstm_model.py                      |   19 +-
 example/warpctc/lstm_ocr.py                        |   25 +-
 example/warpctc/ocr_predict.py                     |   18 +
 example/warpctc/toy_ctc.py                         |   27 +-
 include/mxnet/base.h                               |   20 +-
 include/mxnet/c_api.h                              |   20 +-
 include/mxnet/c_lapack_api.h                       |   20 +-
 include/mxnet/c_predict_api.h                      |   20 +-
 include/mxnet/engine.h                             |   20 +-
 include/mxnet/executor.h                           |   20 +-
 include/mxnet/io.h                                 |   20 +-
 include/mxnet/kvstore.h                            |   20 +-
 include/mxnet/mxrtc.h                              |   20 +-
 include/mxnet/ndarray.h                            |   20 +-
 include/mxnet/op_attr_types.h                      |   20 +-
 include/mxnet/operator.h                           |   20 +-
 include/mxnet/operator_util.h                      |   20 +-
 include/mxnet/resource.h                           |   20 +-
 include/mxnet/storage.h                            |   20 +-
 include/mxnet/tensor_blob.h                        |   20 +-
 matlab/get_inception_model.sh                      |   18 +
 perl-package/AI-MXNet/examples/get_ptb_data.sh     |   18 +
 perl-package/AI-MXNet/lib/AI/MXNet.pm              |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/Base.pm         |   23 +-
 perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm     |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm     |   21 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Context.pm      |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/Contrib.pm      |   19 +-
 .../AI-MXNet/lib/AI/MXNet/Contrib/AutoGrad.pm      |   19 +-
 .../AI-MXNet/lib/AI/MXNet/Contrib/NDArray.pm       |   19 +-
 .../AI-MXNet/lib/AI/MXNet/Contrib/Symbol.pm        |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm     |   21 +-
 .../AI-MXNet/lib/AI/MXNet/Executor/Group.pm        |   17 +
 .../AI-MXNet/lib/AI/MXNet/Function/Parameters.pm   |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/IO.pm           |   21 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Image.pm        |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm  |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm      |   19 +-
 .../AI-MXNet/lib/AI/MXNet/KVStoreServer.pm         |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/LRScheduler.pm  |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Logging.pm      |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm       |   21 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Module.pm       |   21 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm  |   17 +
 .../AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm      |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/Monitor.pm      |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm      |   57 +-
 perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Base.pm |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Doc.pm  |   17 +
 .../AI-MXNet/lib/AI/MXNet/NDArray/Slice.pm         |   33 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm    |   29 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Profiler.pm     |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/RNN.pm          |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm     |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm       |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Random.pm       |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/RecordIO.pm     |   21 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Rtc.pm          |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm       |   23 +-
 .../AI-MXNet/lib/AI/MXNet/Symbol/AttrScope.pm      |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Base.pm  |   23 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Doc.pm   |   17 +
 .../AI-MXNet/lib/AI/MXNet/Symbol/NameManager.pm    |   17 +
 perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm    |   19 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Types.pm        |   17 +
 .../AI-MXNet/lib/AI/MXNet/Util/Printable.pm        |   19 +-
 .../AI-MXNet/lib/AI/MXNet/Visualization.pm         |   21 +-
 perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm      |   17 +
 perl-package/AI-NNVMCAPI/lib/AI/NNVMCAPI.pm        |   17 +
 perl-package/test.sh                               |   17 +
 plugin/caffe/caffe_blob.cc                         |   20 +-
 plugin/caffe/caffe_blob.h                          |   20 +-
 plugin/caffe/caffe_common.cc                       |   22 +-
 plugin/caffe/caffe_common.h                        |   20 +-
 plugin/caffe/caffe_data_iter.cc                    |   20 +-
 plugin/caffe/caffe_fieldentry.h                    |   20 +-
 plugin/caffe/caffe_loss-inl.h                      |   20 +-
 plugin/caffe/caffe_loss.cc                         |   24 +-
 plugin/caffe/caffe_loss.cu                         |   24 +-
 plugin/caffe/caffe_op-inl.h                        |   20 +-
 plugin/caffe/caffe_op.cc                           |   22 +-
 plugin/caffe/caffe_op.cu                           |   20 +-
 plugin/caffe/caffe_stream.cc                       |   22 +-
 plugin/caffe/caffe_stream.h                        |   22 +-
 plugin/opencv/__init__.py                          |   17 +
 plugin/opencv/cv_api.cc                            |   20 +-
 plugin/opencv/cv_api.h                             |   20 +-
 plugin/opencv/opencv.py                            |   17 +
 plugin/sframe/iter_sframe.cc                       |   20 +-
 plugin/torch/torch_base.cc                         |   20 +-
 plugin/torch/torch_base.h                          |   20 +-
 plugin/torch/torch_criterion-inl.h                 |   20 +-
 plugin/torch/torch_criterion.cc                    |   20 +-
 plugin/torch/torch_criterion.cu                    |   20 +-
 plugin/torch/torch_function.cc                     |   20 +-
 plugin/torch/torch_function.h                      |   20 +-
 plugin/torch/torch_module-inl.h                    |   20 +-
 plugin/torch/torch_module.cc                       |   20 +-
 plugin/torch/torch_module.cu                       |   20 +-
 plugin/warpctc/warpctc-inl.h                       |   20 +-
 plugin/warpctc/warpctc.cc                          |   20 +-
 plugin/warpctc/warpctc.cu                          |   20 +-
 prepare_mkl.sh                                     |   18 +
 python/mxnet/__init__.py                           |   18 +
 python/mxnet/_ctypes/__init__.py                   |   17 +
 python/mxnet/_ctypes/ndarray.py                    |   17 +
 python/mxnet/_ctypes/symbol.py                     |   17 +
 python/mxnet/_cy2/__init__.py                      |   17 +
 python/mxnet/_cy3/__init__.py                      |   17 +
 python/mxnet/_ndarray_internal.py                  |   17 +
 python/mxnet/_symbol_internal.py                   |   17 +
 python/mxnet/attribute.py                          |   17 +
 python/mxnet/autograd.py                           |   17 +
 python/mxnet/base.py                               |   17 +
 python/mxnet/callback.py                           |   17 +
 python/mxnet/context.py                            |   17 +
 python/mxnet/contrib/__init__.py                   |   17 +
 python/mxnet/contrib/autograd.py                   |   17 +
 python/mxnet/contrib/ndarray.py                    |   17 +
 python/mxnet/contrib/symbol.py                     |   17 +
 python/mxnet/contrib/tensorboard.py                |   17 +
 python/mxnet/executor.py                           |   17 +
 python/mxnet/executor_manager.py                   |   17 +
 python/mxnet/gluon/__init__.py                     |   17 +
 python/mxnet/gluon/block.py                        |   17 +
 python/mxnet/gluon/data/__init__.py                |   17 +
 python/mxnet/gluon/data/dataloader.py              |   17 +
 python/mxnet/gluon/data/dataset.py                 |   17 +
 python/mxnet/gluon/data/sampler.py                 |   17 +
 python/mxnet/gluon/data/vision.py                  |   17 +
 python/mxnet/gluon/loss.py                         |   17 +
 python/mxnet/gluon/model_zoo/__init__.py           |   17 +
 python/mxnet/gluon/model_zoo/custom_layers.py      |   17 +
 python/mxnet/gluon/model_zoo/model_store.py        |   17 +
 python/mxnet/gluon/model_zoo/vision/__init__.py    |   17 +
 python/mxnet/gluon/model_zoo/vision/alexnet.py     |   17 +
 python/mxnet/gluon/model_zoo/vision/densenet.py    |   17 +
 python/mxnet/gluon/model_zoo/vision/inception.py   |   17 +
 python/mxnet/gluon/model_zoo/vision/resnet.py      |   17 +
 python/mxnet/gluon/model_zoo/vision/squeezenet.py  |   17 +
 python/mxnet/gluon/model_zoo/vision/vgg.py         |   17 +
 python/mxnet/gluon/nn/__init__.py                  |   17 +
 python/mxnet/gluon/nn/basic_layers.py              |   17 +
 python/mxnet/gluon/nn/conv_layers.py               |   19 +-
 python/mxnet/gluon/parameter.py                    |   17 +
 python/mxnet/gluon/rnn/__init__.py                 |   17 +
 python/mxnet/gluon/rnn/rnn_cell.py                 |   17 +
 python/mxnet/gluon/rnn/rnn_layer.py                |   17 +
 python/mxnet/gluon/trainer.py                      |   17 +
 python/mxnet/gluon/utils.py                        |   17 +
 python/mxnet/image/__init__.py                     |   17 +
 python/mxnet/image/detection.py                    |   17 +
 python/mxnet/image/image.py                        |   17 +
 python/mxnet/initializer.py                        |   17 +
 python/mxnet/io.py                                 |   17 +
 python/mxnet/kvstore.py                            |   17 +
 python/mxnet/kvstore_server.py                     |   17 +
 python/mxnet/libinfo.py                            |   17 +
 python/mxnet/log.py                                |   18 +
 python/mxnet/lr_scheduler.py                       |   17 +
 python/mxnet/metric.py                             |   17 +
 python/mxnet/misc.py                               |   17 +
 python/mxnet/model.py                              |   17 +
 python/mxnet/module/__init__.py                    |   17 +
 python/mxnet/module/base_module.py                 |   17 +
 python/mxnet/module/bucketing_module.py            |   17 +
 python/mxnet/module/executor_group.py              |   17 +
 python/mxnet/module/module.py                      |   17 +
 python/mxnet/module/python_module.py               |   17 +
 python/mxnet/module/sequential_module.py           |   17 +
 python/mxnet/monitor.py                            |   17 +
 python/mxnet/name.py                               |   17 +
 python/mxnet/ndarray.py                            |   21 +-
 python/mxnet/ndarray_doc.py                        |   17 +
 python/mxnet/notebook/__init__.py                  |   17 +
 python/mxnet/notebook/callback.py                  |   17 +
 python/mxnet/operator.py                           |   17 +
 python/mxnet/optimizer.py                          |   17 +
 python/mxnet/profiler.py                           |   17 +
 python/mxnet/random.py                             |   17 +
 python/mxnet/recordio.py                           |   17 +
 python/mxnet/registry.py                           |   17 +
 python/mxnet/rnn/__init__.py                       |   17 +
 python/mxnet/rnn/io.py                             |   17 +
 python/mxnet/rnn/rnn.py                            |   17 +
 python/mxnet/rnn/rnn_cell.py                       |   17 +
 python/mxnet/rtc.py                                |   17 +
 python/mxnet/symbol.py                             |   17 +
 python/mxnet/symbol_doc.py                         |   17 +
 python/mxnet/test_utils.py                         |   17 +
 python/mxnet/torch.py                              |   17 +
 python/mxnet/visualization.py                      |   17 +
 python/setup.py                                    |   17 +
 scala-package/core/scripts/get_cifar_data.sh       |   18 +
 scala-package/core/scripts/get_mnist_data.sh       |   18 +
 .../examples/scripts/customop/run_customop.sh      |   18 +
 .../scripts/customop/run_customopwithrtc.sh        |   18 +
 scala-package/examples/scripts/module/mnist_mlp.sh |   18 +
 .../scripts/module/run_sequential_module.sh        |   18 +
 .../neuralstyle_end2end/run_test_end2end.sh        |   22 +-
 .../neuralstyle_end2end/run_train_end2end.sh       |   24 +-
 .../scripts/profiler/run_profiler_matmul.sh        |   18 +
 .../scripts/profiler/run_profiler_ndarray.sh       |   18 +
 .../examples/scripts/rnn/run_test_charrnn.sh       |   18 +
 .../examples/scripts/rnn/run_train_charrnn.sh      |   18 +
 .../examples/scripts/run_cnntextclassification.sh  |   24 +-
 scala-package/examples/scripts/run_gan_mnist.sh    |   20 +-
 scala-package/examples/scripts/run_multitask.sh    |   18 +
 scala-package/examples/scripts/run_neuralstyle.sh  |   24 +-
 .../examples/scripts/run_visualization.sh          |   20 +-
 .../main/native/ml_dmlc_mxnet_init_native_c_api.cc |   20 +-
 .../native/src/main/native/jni_helper_func.h       |   20 +-
 .../src/main/native/ml_dmlc_mxnet_native_c_api.cc  |   20 +-
 scala-package/spark/bin/run-mnist-example.sh       |   18 +
 setup-utils/install-mxnet-amz-linux.sh             |   18 +
 setup-utils/install-mxnet-fedora-python.sh         |   22 +-
 setup-utils/install-mxnet-osx-python.sh            |   40 +-
 setup-utils/install-mxnet-ubuntu-python.sh         |   18 +
 setup-utils/install-mxnet-ubuntu-r.sh              |   20 +-
 src/c_api/c_api.cc                                 |   20 +-
 src/c_api/c_api_common.h                           |   20 +-
 src/c_api/c_api_error.cc                           |   20 +-
 src/c_api/c_api_executor.cc                        |   20 +-
 src/c_api/c_api_ndarray.cc                         |   20 +-
 src/c_api/c_api_symbolic.cc                        |   20 +-
 src/c_api/c_predict_api.cc                         |   20 +-
 src/common/cuda_utils.h                            |   20 +-
 src/common/lazy_alloc_array.h                      |   20 +-
 src/common/mxrtc.cc                                |   20 +-
 src/common/object_pool.h                           |   20 +-
 src/common/utils.h                                 |   20 +-
 src/engine/engine.cc                               |   20 +-
 src/engine/engine_impl.h                           |   20 +-
 src/engine/naive_engine.cc                         |   20 +-
 src/engine/profiler.cc                             |   20 +-
 src/engine/profiler.h                              |   20 +-
 src/engine/stream_manager.h                        |   20 +-
 src/engine/thread_pool.h                           |   20 +-
 src/engine/threaded_engine.cc                      |   20 +-
 src/engine/threaded_engine.h                       |   20 +-
 src/engine/threaded_engine_perdevice.cc            |   20 +-
 src/engine/threaded_engine_pooled.cc               |   20 +-
 src/executor/attach_op_execs_pass.cc               |   20 +-
 src/executor/attach_op_resource_pass.cc            |   20 +-
 src/executor/exec_pass.h                           |   20 +-
 src/executor/graph_executor.cc                     |   20 +-
 src/executor/graph_executor.h                      |   20 +-
 src/executor/inplace_addto_detect_pass.cc          |   20 +-
 src/initialize.cc                                  |   20 +-
 src/io/image_aug_default.cc                        |   20 +-
 src/io/image_augmenter.h                           |   20 +-
 src/io/image_det_aug_default.cc                    |   20 +-
 src/io/image_io.cc                                 |   20 +-
 src/io/image_iter_common.h                         |   20 +-
 src/io/image_recordio.h                            |   26 +-
 src/io/inst_vector.h                               |   20 +-
 src/io/io.cc                                       |   20 +-
 src/io/iter_batchloader.h                          |   20 +-
 src/io/iter_csv.cc                                 |   20 +-
 src/io/iter_image_det_recordio.cc                  |   20 +-
 src/io/iter_image_recordio.cc                      |   20 +-
 src/io/iter_image_recordio_2.cc                    |   20 +-
 src/io/iter_mnist.cc                               |   20 +-
 src/io/iter_normalize.h                            |   20 +-
 src/io/iter_prefetcher.h                           |   20 +-
 src/kvstore/comm.h                                 |   20 +-
 src/kvstore/kvstore.cc                             |   20 +-
 src/kvstore/kvstore_dist.h                         |   20 +-
 src/kvstore/kvstore_dist_server.h                  |   20 +-
 src/kvstore/kvstore_local.h                        |   20 +-
 src/ndarray/autograd.cc                            |   20 +-
 src/ndarray/autograd.h                             |   20 +-
 src/ndarray/ndarray.cc                             |   20 +-
 src/ndarray/ndarray_function-inl.h                 |   20 +-
 src/ndarray/ndarray_function.cc                    |   20 +-
 src/ndarray/ndarray_function.cu                    |   20 +-
 src/ndarray/ndarray_function.h                     |   20 +-
 src/operator/activation-inl.h                      |   20 +-
 src/operator/activation.cc                         |   20 +-
 src/operator/activation.cu                         |   20 +-
 src/operator/batch_norm-inl.h                      |   20 +-
 src/operator/batch_norm.cc                         |   20 +-
 src/operator/batch_norm.cu                         |   20 +-
 src/operator/batch_norm_v1-inl.h                   |   20 +-
 src/operator/batch_norm_v1.cc                      |   20 +-
 src/operator/batch_norm_v1.cu                      |   20 +-
 src/operator/bilinear_sampler-inl.h                |  456 ++++----
 src/operator/bilinear_sampler.cc                   |  474 ++++----
 src/operator/bilinear_sampler.cu                   |  432 +++----
 src/operator/channel_op_common.h                   |   20 +-
 src/operator/concat-inl.h                          |   20 +-
 src/operator/concat.cc                             |   20 +-
 src/operator/concat.cu                             |   20 +-
 src/operator/contrib/count_sketch-inl.h            |   20 +-
 src/operator/contrib/count_sketch.cc               |   22 +-
 src/operator/contrib/count_sketch.cu               |   20 +-
 src/operator/contrib/ctc_loss-inl.h                |   20 +-
 src/operator/contrib/ctc_loss.cc                   |   40 +-
 src/operator/contrib/ctc_loss.cu                   |   20 +-
 src/operator/contrib/deformable_convolution-inl.h  |   19 +
 src/operator/contrib/deformable_convolution.cc     |   21 +-
 src/operator/contrib/deformable_convolution.cu     |   19 +
 .../contrib/deformable_psroi_pooling-inl.h         |   19 +
 src/operator/contrib/deformable_psroi_pooling.cc   |   21 +-
 src/operator/contrib/deformable_psroi_pooling.cu   |   19 +
 src/operator/contrib/dequantize-inl.h              |   20 +-
 src/operator/contrib/dequantize.cc                 |   20 +-
 src/operator/contrib/dequantize.cu                 |   20 +-
 src/operator/contrib/fft-inl.h                     |   20 +-
 src/operator/contrib/fft.cc                        |   20 +-
 src/operator/contrib/fft.cu                        |   20 +-
 src/operator/contrib/ifft-inl.h                    |   20 +-
 src/operator/contrib/ifft.cc                       |   20 +-
 src/operator/contrib/ifft.cu                       |   20 +-
 src/operator/contrib/krprod.h                      |   20 +-
 src/operator/contrib/multi_proposal-inl.h          |   20 +-
 src/operator/contrib/multi_proposal.cc             |   19 +
 src/operator/contrib/multi_proposal.cu             |   20 +-
 src/operator/contrib/multibox_detection-inl.h      |   20 +-
 src/operator/contrib/multibox_detection.cc         |   20 +-
 src/operator/contrib/multibox_detection.cu         |   20 +-
 src/operator/contrib/multibox_prior-inl.h          |   20 +-
 src/operator/contrib/multibox_prior.cc             |   20 +-
 src/operator/contrib/multibox_prior.cu             |   20 +-
 src/operator/contrib/multibox_target-inl.h         |   20 +-
 src/operator/contrib/multibox_target.cc            |   20 +-
 src/operator/contrib/multibox_target.cu            |   20 +-
 src/operator/contrib/nn/deformable_im2col.cuh      |   53 +-
 src/operator/contrib/nn/deformable_im2col.h        |   45 +-
 src/operator/contrib/proposal-inl.h                |   20 +-
 src/operator/contrib/proposal.cc                   |   20 +-
 src/operator/contrib/proposal.cu                   |   20 +-
 src/operator/contrib/psroi_pooling-inl.h           |   20 +-
 src/operator/contrib/psroi_pooling.cc              |   20 +-
 src/operator/contrib/psroi_pooling.cu              |   20 +-
 src/operator/contrib/quantize-inl.h                |   20 +-
 src/operator/contrib/quantize.cc                   |   20 +-
 src/operator/contrib/quantize.cu                   |   20 +-
 src/operator/convolution-inl.h                     |   20 +-
 src/operator/convolution.cc                        |   20 +-
 src/operator/convolution.cu                        |   20 +-
 src/operator/convolution_v1-inl.h                  |   20 +-
 src/operator/convolution_v1.cc                     |   20 +-
 src/operator/convolution_v1.cu                     |   20 +-
 src/operator/correlation-inl.h                     |  490 ++++----
 src/operator/correlation.cc                        |  368 +++---
 src/operator/correlation.cu                        | 1237 ++++++++++----------
 src/operator/crop-inl.h                            |  446 +++----
 src/operator/crop.cc                               |   20 +-
 src/operator/crop.cu                               |   20 +-
 src/operator/cross_device_copy.cc                  |   20 +-
 src/operator/cudnn_activation-inl.h                |   20 +-
 src/operator/cudnn_algoreg-inl.h                   |   20 +-
 src/operator/cudnn_algoreg.cc                      |   20 +-
 src/operator/cudnn_batch_norm-inl.h                |   20 +-
 src/operator/cudnn_batch_norm.cc                   |   20 +-
 src/operator/cudnn_batch_norm.cu                   |   20 +-
 src/operator/cudnn_bilinear_sampler-inl.h          |  352 +++---
 src/operator/cudnn_convolution-inl.h               |   20 +-
 src/operator/cudnn_deconvolution-inl.h             |   20 +-
 src/operator/cudnn_lrn-inl.h                       |   20 +-
 src/operator/cudnn_pooling-inl.h                   |   20 +-
 src/operator/cudnn_rnn-inl.h                       |   20 +-
 src/operator/cudnn_softmax_activation-inl.h        |   20 +-
 src/operator/cudnn_spatial_transformer-inl.h       |   20 +-
 src/operator/custom/custom-inl.h                   |   20 +-
 src/operator/custom/custom.cc                      |   20 +-
 src/operator/custom/native_op-inl.h                |   20 +-
 src/operator/custom/native_op.cc                   |   20 +-
 src/operator/custom/native_op.cu                   |   20 +-
 src/operator/custom/ndarray_op-inl.h               |   20 +-
 src/operator/custom/ndarray_op.cc                  |   20 +-
 src/operator/deconvolution-inl.h                   |   20 +-
 src/operator/deconvolution.cc                      |   20 +-
 src/operator/deconvolution.cu                      |   20 +-
 src/operator/dropout-inl.h                         |   20 +-
 src/operator/dropout.cc                            |   20 +-
 src/operator/dropout.cu                            |   20 +-
 src/operator/elemwise_op_common.h                  |   20 +-
 src/operator/fully_connected-inl.h                 |   20 +-
 src/operator/fully_connected.cc                    |   20 +-
 src/operator/fully_connected.cu                    |   20 +-
 src/operator/grid_generator-inl.h                  |  654 ++++++-----
 src/operator/grid_generator.cc                     |   90 +-
 src/operator/grid_generator.cu                     |   60 +-
 src/operator/identity_attach_KL_sparse_reg-inl.h   |  372 +++---
 src/operator/identity_attach_KL_sparse_reg.cc      |   92 +-
 src/operator/identity_attach_KL_sparse_reg.cu      |   50 +-
 src/operator/instance_norm-inl.h                   |   20 +-
 src/operator/instance_norm.cc                      |   20 +-
 src/operator/instance_norm.cu                      |   20 +-
 src/operator/l2_normalization-inl.h                |   20 +-
 src/operator/l2_normalization.cc                   |   20 +-
 src/operator/l2_normalization.cu                   |   20 +-
 src/operator/leaky_relu-inl.h                      |   20 +-
 src/operator/leaky_relu.cc                         |   22 +-
 src/operator/leaky_relu.cu                         |   20 +-
 src/operator/loss_binary_op-inl.h                  |   20 +-
 src/operator/loss_binary_op.cc                     |   20 +-
 src/operator/loss_binary_op.cu                     |   20 +-
 src/operator/lrn-inl.h                             |   20 +-
 src/operator/lrn.cc                                |   20 +-
 src/operator/lrn.cu                                |   20 +-
 src/operator/make_loss-inl.h                       |   20 +-
 src/operator/make_loss.cc                          |   20 +-
 src/operator/make_loss.cu                          |   20 +-
 src/operator/mshadow_op.h                          |   20 +-
 src/operator/mxnet_op.h                            |   20 +-
 src/operator/nn/im2col.cuh                         |   45 +-
 src/operator/nn/im2col.h                           |   20 +-
 src/operator/nn/pool.cuh                           |   19 +
 src/operator/nn/pool.h                             |   20 +-
 src/operator/nn/softmax-inl.h                      |   20 +-
 src/operator/nn/softmax.cc                         |   20 +-
 src/operator/nn/softmax.cu                         |   20 +-
 src/operator/nnpack/nnpack_convolution-inl.h       |   20 +-
 src/operator/nnpack/nnpack_fully_connected-inl.h   |   20 +-
 src/operator/nnpack/nnpack_pooling-inl.h           |   20 +-
 src/operator/nnpack/nnpack_util.cc                 |   20 +-
 src/operator/nnpack/nnpack_util.h                  |   20 +-
 src/operator/operator.cc                           |   20 +-
 src/operator/operator_common.h                     |   20 +-
 src/operator/operator_util.cc                      |   20 +-
 src/operator/optimizer_op-inl.h                    |   20 +-
 src/operator/optimizer_op.cc                       |   20 +-
 src/operator/optimizer_op.cu                       |   20 +-
 src/operator/pad-inl.h                             |   20 +-
 src/operator/pad.cc                                |   20 +-
 src/operator/pad.cu                                |   20 +-
 src/operator/pooling-inl.h                         |   20 +-
 src/operator/pooling.cc                            |   20 +-
 src/operator/pooling.cu                            |   20 +-
 src/operator/pooling_v1-inl.h                      |   20 +-
 src/operator/pooling_v1.cc                         |   20 +-
 src/operator/pooling_v1.cu                         |   20 +-
 src/operator/random/multisample_op.cc              |   20 +-
 src/operator/random/multisample_op.h               |   20 +-
 src/operator/random/sample_multinomial_op.cc       |   20 +-
 src/operator/random/sample_multinomial_op.cu       |   20 +-
 src/operator/random/sample_multinomial_op.h        |   20 +-
 src/operator/random/sample_op.cc                   |   26 +-
 src/operator/random/sample_op.cu                   |   20 +-
 src/operator/random/sample_op.h                    |   20 +-
 src/operator/regression_output-inl.h               |   20 +-
 src/operator/regression_output.cc                  |   20 +-
 src/operator/regression_output.cu                  |   20 +-
 src/operator/rnn-inl.h                             |   20 +-
 src/operator/rnn.cc                                |   20 +-
 src/operator/rnn.cu                                |   20 +-
 src/operator/roi_pooling-inl.h                     |   20 +-
 src/operator/roi_pooling.cc                        |   20 +-
 src/operator/roi_pooling.cu                        |   20 +-
 src/operator/sequence_last-inl.h                   |   20 +-
 src/operator/sequence_last.cc                      |   20 +-
 src/operator/sequence_last.cu                      |   20 +-
 src/operator/sequence_mask-inl.h                   |   20 +-
 src/operator/sequence_mask.cc                      |   20 +-
 src/operator/sequence_mask.cu                      |   20 +-
 src/operator/sequence_op_common.h                  |   20 +-
 src/operator/sequence_reverse-inl.h                |   20 +-
 src/operator/sequence_reverse.cc                   |   20 +-
 src/operator/sequence_reverse.cu                   |   20 +-
 src/operator/slice_channel-inl.h                   |   20 +-
 src/operator/slice_channel.cc                      |   22 +-
 src/operator/slice_channel.cu                      |   20 +-
 src/operator/softmax_activation-inl.h              |   20 +-
 src/operator/softmax_activation.cc                 |   20 +-
 src/operator/softmax_activation.cu                 |   20 +-
 src/operator/softmax_output-inl.h                  |   20 +-
 src/operator/softmax_output.cc                     |   20 +-
 src/operator/softmax_output.cu                     |   20 +-
 src/operator/spatial_transformer-inl.h             |   20 +-
 src/operator/spatial_transformer.cc                |   20 +-
 src/operator/spatial_transformer.cu                |   20 +-
 src/operator/special_functions-inl.h               |   20 +-
 src/operator/svm_output-inl.h                      |   20 +-
 src/operator/svm_output.cc                         |   20 +-
 src/operator/svm_output.cu                         |   20 +-
 src/operator/swapaxis-inl.h                        |   20 +-
 src/operator/swapaxis.cc                           |   20 +-
 src/operator/swapaxis.cu                           |   20 +-
 src/operator/tensor/broadcast_reduce-inl.cuh       | 1229 +++++++++----------
 src/operator/tensor/broadcast_reduce-inl.h         |   20 +-
 src/operator/tensor/broadcast_reduce_op.h          |   20 +-
 src/operator/tensor/broadcast_reduce_op_index.cc   |   20 +-
 src/operator/tensor/broadcast_reduce_op_index.cu   |   20 +-
 src/operator/tensor/broadcast_reduce_op_value.cc   |   20 +-
 src/operator/tensor/broadcast_reduce_op_value.cu   |   20 +-
 src/operator/tensor/control_flow_op.cc             |   20 +-
 src/operator/tensor/control_flow_op.cu             |   20 +-
 src/operator/tensor/control_flow_op.h              |   20 +-
 src/operator/tensor/elemwise_binary_broadcast_op.h |   20 +-
 .../tensor/elemwise_binary_broadcast_op_basic.cc   |   20 +-
 .../tensor/elemwise_binary_broadcast_op_basic.cu   |   20 +-
 .../elemwise_binary_broadcast_op_extended.cc       |   20 +-
 .../elemwise_binary_broadcast_op_extended.cu       |   20 +-
 .../tensor/elemwise_binary_broadcast_op_logic.cc   |   20 +-
 .../tensor/elemwise_binary_broadcast_op_logic.cu   |   20 +-
 src/operator/tensor/elemwise_binary_op.h           |   20 +-
 src/operator/tensor/elemwise_binary_op_basic.cc    |   20 +-
 src/operator/tensor/elemwise_binary_op_basic.cu    |   20 +-
 src/operator/tensor/elemwise_binary_op_extended.cc |   20 +-
 src/operator/tensor/elemwise_binary_op_extended.cu |   20 +-
 src/operator/tensor/elemwise_binary_op_logic.cc    |   20 +-
 src/operator/tensor/elemwise_binary_op_logic.cu    |   20 +-
 src/operator/tensor/elemwise_binary_scalar_op.h    |   20 +-
 .../tensor/elemwise_binary_scalar_op_basic.cc      |   20 +-
 .../tensor/elemwise_binary_scalar_op_basic.cu      |   20 +-
 .../tensor/elemwise_binary_scalar_op_extended.cc   |   20 +-
 .../tensor/elemwise_binary_scalar_op_extended.cu   |   20 +-
 .../tensor/elemwise_binary_scalar_op_logic.cc      |   20 +-
 .../tensor/elemwise_binary_scalar_op_logic.cu      |   20 +-
 src/operator/tensor/elemwise_sum.cc                |   20 +-
 src/operator/tensor/elemwise_sum.cu                |   20 +-
 src/operator/tensor/elemwise_sum.h                 |   20 +-
 src/operator/tensor/elemwise_unary_op.cc           |   20 +-
 src/operator/tensor/elemwise_unary_op.cu           |   20 +-
 src/operator/tensor/elemwise_unary_op.h            |   20 +-
 src/operator/tensor/indexing_op-inl.cuh            |  605 +++++-----
 src/operator/tensor/indexing_op.cc                 |   20 +-
 src/operator/tensor/indexing_op.cu                 |   20 +-
 src/operator/tensor/indexing_op.h                  |   20 +-
 src/operator/tensor/init_op.cc                     |   20 +-
 src/operator/tensor/init_op.cu                     |   20 +-
 src/operator/tensor/init_op.h                      |   20 +-
 src/operator/tensor/la_op.cc                       |   22 +-
 src/operator/tensor/la_op.h                        |   20 +-
 src/operator/tensor/la_op_inline.h                 |   20 +-
 src/operator/tensor/matrix_op-inl.h                |   20 +-
 src/operator/tensor/matrix_op.cc                   |   20 +-
 src/operator/tensor/matrix_op.cu                   |   20 +-
 src/operator/tensor/ordering_op-inl.h              |   20 +-
 src/operator/tensor/ordering_op.cc                 |   20 +-
 src/operator/tensor/ordering_op.cu                 |   20 +-
 src/operator/tensor/sort_op-inl.cuh                |  277 +++--
 src/operator/tensor/sort_op.h                      |  192 +--
 src/operator/upsampling-inl.h                      |   20 +-
 src/operator/upsampling.cc                         |   20 +-
 src/operator/upsampling.cu                         |   20 +-
 src/optimizer/sgd-inl.h                            |   20 +-
 src/resource.cc                                    |   20 +-
 src/storage/cpu_device_storage.h                   |   20 +-
 src/storage/gpu_device_storage.h                   |   20 +-
 src/storage/naive_storage_manager.h                |   20 +-
 src/storage/pinned_memory_storage.h                |   20 +-
 src/storage/pooled_storage_manager.h               |   20 +-
 src/storage/storage.cc                             |   20 +-
 src/storage/storage_manager.h                      |   20 +-
 tests/ci_build/ci_build.sh                         |   18 +
 tests/ci_build/install/install_julia.sh            |   18 +
 tests/ci_build/install/install_library.sh          |   18 +
 tests/ci_build/install/install_maven.sh            |   18 +
 tests/ci_build/install/install_openblas.sh         |   18 +
 tests/ci_build/install/install_opencv.sh           |   18 +
 tests/ci_build/install/install_python2.sh          |   18 +
 tests/ci_build/install/install_python3.sh          |   18 +
 tests/ci_build/install/install_testdeps.sh         |   18 +
 tests/ci_build/install/ubuntu_install_core.sh      |   18 +
 tests/ci_build/install/ubuntu_install_perl.sh      |   18 +
 tests/ci_build/install/ubuntu_install_python.sh    |   18 +
 tests/ci_build/install/ubuntu_install_r.sh         |   18 +
 tests/ci_build/install/ubuntu_install_scala.sh     |   18 +
 tests/cpp/engine/threaded_engine_test.cc           |   20 +-
 tests/cpp/include/test_op.h                        |   20 +-
 tests/cpp/include/test_perf.h                      |   20 +-
 tests/cpp/include/test_util.h                      |   20 +-
 tests/cpp/operator/batchnorm_test.cc               |   20 +-
 tests/cpp/operator/krprod_test.cc                  |   20 +-
 tests/cpp/storage/storage_test.cc                  |   20 +-
 tests/cpp/test_main.cc                             |   20 +-
 tests/jenkins/run_as_user.sh                       |   18 +
 tests/jenkins/run_test.sh                          |   18 +
 tests/jenkins/run_test_amzn_linux_gpu.sh           |   18 +
 tests/jenkins/run_test_installation_docs.sh        |   22 +-
 tests/jenkins/run_test_pip_installations.sh        |   18 +
 tests/jenkins/run_test_ubuntu.sh                   |   18 +
 tests/jenkins/set_user_permissions.sh              |   18 +
 tests/nightly/TestDoc/doc_spell_checker.py         |   17 +
 tests/nightly/TestDoc/doc_spell_grammar.sh         |   18 +
 .../compilation_warnings/compilation_warnings.sh   |   18 +
 .../nightly/compilation_warnings/process_output.py |   17 +
 tests/nightly/dist_lenet.py                        |   18 +
 tests/nightly/dist_sync_kvstore.py                 |   18 +
 tests/nightly/download.sh                          |   18 +
 tests/nightly/multi_lenet.py                       |   18 +
 .../assertion_util.py                              |   17 +
 .../mxnet_keras_integration_tests/model_util.py    |   17 +
 .../mxnet_keras_integration_tests/profiler.py      |   17 +
 .../test_mnist_mlp.py                              |   17 +
 tests/nightly/sh2ju.sh                             |   18 +
 tests/nightly/test_all.sh                          |   18 +
 tests/nightly/test_kvstore.py                      |   18 +
 tests/nightly/test_mxnet_keras_integration_cpu.sh  |   18 +
 tests/nightly/test_mxnet_keras_integration_gpu.sh  |   18 +
 tests/nightly/test_tutorial.py                     |   17 +
 tests/python/common/get_data.py                    |   17 +
 tests/python/common/models.py                      |   17 +
 tests/python/doctest/test_docstring.py             |   17 +
 tests/python/gpu/test_forward.py                   |   17 +
 tests/python/gpu/test_operator_gpu.py              |   17 +
 tests/python/gpu/test_rtc.py                       |   19 +-
 tests/python/predict/mxnet_predict_example.py      |   17 +
 tests/python/train/common.py                       |   17 +
 tests/python/train/test_autograd.py                |   17 +
 tests/python/train/test_bucketing.py               |   17 +
 tests/python/train/test_conv.py                    |   17 +
 tests/python/train/test_dtype.py                   |   19 +-
 tests/python/train/test_mlp.py                     |   17 +
 tests/python/unittest/common.py                    |   17 +
 tests/python/unittest/test_attr.py                 |   17 +
 tests/python/unittest/test_autograd.py             |   17 +
 tests/python/unittest/test_contrib_autograd.py     |   17 +
 tests/python/unittest/test_executor.py             |   17 +
 tests/python/unittest/test_gluon.py                |   17 +
 tests/python/unittest/test_gluon_data.py           |   17 +
 tests/python/unittest/test_gluon_model_zoo.py      |   17 +
 tests/python/unittest/test_gluon_rnn.py            |   17 +
 tests/python/unittest/test_image.py                |   17 +
 tests/python/unittest/test_infer_shape.py          |   17 +
 tests/python/unittest/test_init.py                 |   17 +
 tests/python/unittest/test_io.py                   |   17 +
 tests/python/unittest/test_kvstore.py              |   17 +
 tests/python/unittest/test_loss.py                 |   17 +
 tests/python/unittest/test_metric.py               |   17 +
 tests/python/unittest/test_model_parallel.py       |   17 +
 tests/python/unittest/test_module.py               |   17 +
 tests/python/unittest/test_multi_device_exec.py    |   17 +
 tests/python/unittest/test_ndarray.py              |   17 +
 tests/python/unittest/test_operator.py             |   17 +
 tests/python/unittest/test_optimizer.py            |   17 +
 tests/python/unittest/test_profiler.py             |   17 +
 tests/python/unittest/test_random.py               |   17 +
 tests/python/unittest/test_recordio.py             |   19 +-
 tests/python/unittest/test_rnn.py                  |   17 +
 tests/python/unittest/test_symbol.py               |   17 +
 tests/python/unittest/test_viz.py                  |   17 +
 tests/travis/is_core_changed.sh                    |   18 +
 tests/travis/run_test.sh                           |   18 +
 tests/travis/setup.sh                              |   18 +
 tests/travis/travis_after_failure.sh               |   18 +
 tools/accnn/acc_conv.py                            |  171 +--
 tools/accnn/acc_fc.py                              |  131 ++-
 tools/accnn/accnn.py                               |   93 +-
 tools/accnn/rank_selection.py                      |  191 +--
 tools/accnn/utils.py                               |  219 ++--
 tools/bandwidth/measure.py                         |   17 +
 tools/bandwidth/test_measure.py                    |   17 +
 tools/caffe_converter/caffe_parser.py              |   17 +
 tools/caffe_converter/caffe_proto_utils.py         |   17 +
 tools/caffe_converter/compare_layers.py            |   17 +
 tools/caffe_converter/convert_caffe_modelzoo.py    |   17 +
 tools/caffe_converter/convert_mean.py              |   17 +
 tools/caffe_converter/convert_model.py             |   17 +
 tools/caffe_converter/convert_symbol.py            |   17 +
 tools/caffe_converter/run.sh                       |   18 +
 tools/caffe_converter/test_converter.py            |   17 +
 tools/im2rec.cc                                    |   20 +-
 tools/im2rec.py                                    |  631 +++++-----
 tools/ipynb2md.py                                  |   18 +
 tools/kill-mxnet.py                                |   18 +
 tools/launch.py                                    |   20 +-
 tools/license_header.py                            |  157 +++
 tools/parse_log.py                                 |   18 +
 tools/pip_package/make_pip_package.sh              |   18 +
 tools/pip_package/setup.py                         |   17 +
 1051 files changed, 24743 insertions(+), 6220 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 49633d4..370c2b3 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -105,6 +105,7 @@ try {
         node('mxnetlinux') {
           ws('workspace/sanity') {
             init_git()
+            sh "python tools/license_header.py check"
             make('lint', 'cpplint rcpplint jnilint')
             make('lint', 'pylint')
           }
diff --git a/amalgamation/amalgamation.py b/amalgamation/amalgamation.py
index b33b81c..22b421d 100644
--- a/amalgamation/amalgamation.py
+++ b/amalgamation/amalgamation.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 import os.path, re, StringIO
 
@@ -8,7 +25,7 @@ blacklist = [
     'kvstore_dist.h', 'mach/clock.h', 'mach/mach.h',
     'malloc.h', 'mkl.h', 'mkl_cblas.h', 'mkl_vsl.h', 'mkl_vsl_functions.h',
     'nvml.h', 'opencv2/opencv.hpp', 'sys/stat.h', 'sys/types.h', 'cuda.h', 'cuda_fp16.h',
-    'omp.h', 'execinfo.h', 'packet/sse-inl.h', 'emmintrin.h', 'thrust/device_vector.h', 
+    'omp.h', 'execinfo.h', 'packet/sse-inl.h', 'emmintrin.h', 'thrust/device_vector.h',
     'cusolverDn.h'
     ]
 
diff --git a/amalgamation/dmlc-minimum0.cc b/amalgamation/dmlc-minimum0.cc
index bce6112..3f7a97b 100644
--- a/amalgamation/dmlc-minimum0.cc
+++ b/amalgamation/dmlc-minimum0.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright 2015 by Contributors.
  * \brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib.
  *  Normally this is not needed.
  */
diff --git a/amalgamation/jni/org_dmlc_mxnet_Predictor.h b/amalgamation/jni/org_dmlc_mxnet_Predictor.h
index e5a68ed..1bdf80d 100644
--- a/amalgamation/jni/org_dmlc_mxnet_Predictor.h
+++ b/amalgamation/jni/org_dmlc_mxnet_Predictor.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /* DO NOT EDIT THIS FILE - it is machine generated */
 #include <jni.h>
 /* Header for class org_dmlc_mxnet_Predictor */
diff --git a/amalgamation/jni/predictor.cc b/amalgamation/jni/predictor.cc
index b6cc937..1936daf 100644
--- a/amalgamation/jni/predictor.cc
+++ b/amalgamation/jni/predictor.cc
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 #include <jni.h>
 #include "org_dmlc_mxnet_Predictor.h"
 
diff --git a/amalgamation/mxnet_predict0.cc b/amalgamation/mxnet_predict0.cc
index ca1b581..badf237 100644
--- a/amalgamation/mxnet_predict0.cc
+++ b/amalgamation/mxnet_predict0.cc
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 // mxnet.cc
 
 #define MSHADOW_FORCE_STREAM
diff --git a/amalgamation/python/mxnet_predict.py b/amalgamation/python/mxnet_predict.py
index 684f231..3dd6b38 100644
--- a/amalgamation/python/mxnet_predict.py
+++ b/amalgamation/python/mxnet_predict.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, too-many-arguments
 """Lightweight API for mxnet prediction.
diff --git a/cmake/Modules/FindAccelerate.cmake b/cmake/Modules/FindAccelerate.cmake
index 8c99382..695538a 100644
--- a/cmake/Modules/FindAccelerate.cmake
+++ b/cmake/Modules/FindAccelerate.cmake
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # Find the Apple Accelerate framework
 #
 # The following are set after configuration is done:
diff --git a/cmake/Modules/FindAtlas.cmake b/cmake/Modules/FindAtlas.cmake
index 350bbe9..27aaa0e 100644
--- a/cmake/Modules/FindAtlas.cmake
+++ b/cmake/Modules/FindAtlas.cmake
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # Find the Atlas (and Lapack) libraries
 #
 # The following variables are optionally searched for defaults
diff --git a/cmake/Modules/FindJeMalloc.cmake b/cmake/Modules/FindJeMalloc.cmake
index 8b965cf..57f4744 100644
--- a/cmake/Modules/FindJeMalloc.cmake
+++ b/cmake/Modules/FindJeMalloc.cmake
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 # Copyright (c)      2014 Thomas Heller
 # Copyright (c) 2007-2012 Hartmut Kaiser
diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake
index 9679f3d..743a871 100644
--- a/cmake/Modules/FindMKL.cmake
+++ b/cmake/Modules/FindMKL.cmake
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # Find the MKL libraries
 #
 # Options:
diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake
index b63817a..7c5272b 100644
--- a/cmake/Modules/FindOpenBLAS.cmake
+++ b/cmake/Modules/FindOpenBLAS.cmake
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 if(MKL_FOUND)
   message(ERROR " OpenBLAS is not required since MKL is enabled")
 endif()
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index c367edb..ac6ce39 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # For cmake_parse_arguments
 include(CMakeParseArguments)
 
diff --git a/cpp-package/example/feature_extract/run.sh b/cpp-package/example/feature_extract/run.sh
index afac492..dc66656 100755
--- a/cpp-package/example/feature_extract/run.sh
+++ b/cpp-package/example/feature_extract/run.sh
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ### To run the this example,
 ###
 ### 1.
diff --git a/cpp-package/example/get_mnist.sh b/cpp-package/example/get_mnist.sh
index 2bbe7a8..4037962 100755
--- a/cpp-package/example/get_mnist.sh
+++ b/cpp-package/example/get_mnist.sh
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 if [ ! -d "./mnist_data" ]; then
   mkdir mnist_data
   (cd mnist_data; wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz)
diff --git a/cpp-package/example/run_lenet_with_mxdataiter.sh b/cpp-package/example/run_lenet_with_mxdataiter.sh
index fffc355..cafad32 100755
--- a/cpp-package/example/run_lenet_with_mxdataiter.sh
+++ b/cpp-package/example/run_lenet_with_mxdataiter.sh
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 if [ ! -f "./mnist.zip" ]; then
   wget http://webdocs.cs.ualberta.ca/~bx3/data/mnist.zip
   unzip -u mnist.zip
diff --git a/cpp-package/include/mxnet-cpp/MxNetCpp.h b/cpp-package/include/mxnet-cpp/MxNetCpp.h
index 5d61b82..882bbea 100644
--- a/cpp-package/include/mxnet-cpp/MxNetCpp.h
+++ b/cpp-package/include/mxnet-cpp/MxNetCpp.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file MxNetCpp.h
  * \brief meta include file for mxnet.cpp
  * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/base.h b/cpp-package/include/mxnet-cpp/base.h
index b684986..19375c0 100644
--- a/cpp-package/include/mxnet-cpp/base.h
+++ b/cpp-package/include/mxnet-cpp/base.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file base.h
 * \brief base definitions for mxnetcpp
 * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/executor.h b/cpp-package/include/mxnet-cpp/executor.h
index 67eec01..7e45ef5 100644
--- a/cpp-package/include/mxnet-cpp/executor.h
+++ b/cpp-package/include/mxnet-cpp/executor.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file executor.h
 * \brief executor definition
 * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/initializer.h b/cpp-package/include/mxnet-cpp/initializer.h
index f286565..e5bfa4d 100644
--- a/cpp-package/include/mxnet-cpp/initializer.h
+++ b/cpp-package/include/mxnet-cpp/initializer.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file initializer.h
  * \brief random initializer
  * \author Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/io.h b/cpp-package/include/mxnet-cpp/io.h
index 727a964..7281416 100644
--- a/cpp-package/include/mxnet-cpp/io.h
+++ b/cpp-package/include/mxnet-cpp/io.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file operator.h
 * \brief definition of io, such as DataIter
 * \author Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/kvstore.h b/cpp-package/include/mxnet-cpp/kvstore.h
index 9bb33a4..9c3c81f 100644
--- a/cpp-package/include/mxnet-cpp/kvstore.h
+++ b/cpp-package/include/mxnet-cpp/kvstore.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file kvstore.h
 * \brief definition of kvstore
 * \author Chuntao Hong
diff --git a/cpp-package/include/mxnet-cpp/lr_scheduler.h b/cpp-package/include/mxnet-cpp/lr_scheduler.h
index 4c56b7a..b9381a8 100644
--- a/cpp-package/include/mxnet-cpp/lr_scheduler.h
+++ b/cpp-package/include/mxnet-cpp/lr_scheduler.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2017 by Contributors
 * \file lr_scheduler.h
 * \brief Scheduling learning rate
 */
diff --git a/cpp-package/include/mxnet-cpp/metric.h b/cpp-package/include/mxnet-cpp/metric.h
index eda9271..6dbb197 100644
--- a/cpp-package/include/mxnet-cpp/metric.h
+++ b/cpp-package/include/mxnet-cpp/metric.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file base.h
 * \brief metrics defined
 * \author Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/model.h b/cpp-package/include/mxnet-cpp/model.h
index e4cb1a9..c8af6a4 100644
--- a/cpp-package/include/mxnet-cpp/model.h
+++ b/cpp-package/include/mxnet-cpp/model.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file model.h
 * \brief MXNET.cpp model module
 * \author Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/monitor.h b/cpp-package/include/mxnet-cpp/monitor.h
index afe030c..33ef485 100644
--- a/cpp-package/include/mxnet-cpp/monitor.h
+++ b/cpp-package/include/mxnet-cpp/monitor.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2017 by Contributors
 * \file monitor.h
 * \brief monitor definition
 * \author Xin Li
diff --git a/cpp-package/include/mxnet-cpp/ndarray.h b/cpp-package/include/mxnet-cpp/ndarray.h
index 52451fa..9e196d0 100644
--- a/cpp-package/include/mxnet-cpp/ndarray.h
+++ b/cpp-package/include/mxnet-cpp/ndarray.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file ndarray.h
 * \brief definition of ndarray
 * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/op_map.h b/cpp-package/include/mxnet-cpp/op_map.h
index ea75a8c..b54cc0a 100644
--- a/cpp-package/include/mxnet-cpp/op_map.h
+++ b/cpp-package/include/mxnet-cpp/op_map.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file op_map.h
 * \brief definition of OpMap
 * \author Chuntao Hong
diff --git a/cpp-package/include/mxnet-cpp/op_suppl.h b/cpp-package/include/mxnet-cpp/op_suppl.h
index b66521b..52cdae7 100644
--- a/cpp-package/include/mxnet-cpp/op_suppl.h
+++ b/cpp-package/include/mxnet-cpp/op_suppl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file op_suppl.h
 * \brief A supplement and amendment of the operators from op.h
 * \author Zhang Chen, zhubuntu, Xin Li
diff --git a/cpp-package/include/mxnet-cpp/op_util.h b/cpp-package/include/mxnet-cpp/op_util.h
index 5a73748..20e06a8 100644
--- a/cpp-package/include/mxnet-cpp/op_util.h
+++ b/cpp-package/include/mxnet-cpp/op_util.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2017 by Contributors
 * \file op_util.h
 * \brief operator helper functions
 * \author Chris Olivier
diff --git a/cpp-package/include/mxnet-cpp/operator.h b/cpp-package/include/mxnet-cpp/operator.h
index 4fc45bb..02bd21e 100644
--- a/cpp-package/include/mxnet-cpp/operator.h
+++ b/cpp-package/include/mxnet-cpp/operator.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file operator.h
 * \brief definition of operator
 * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/optimizer.h b/cpp-package/include/mxnet-cpp/optimizer.h
index 1bc36d5..e57da5d 100644
--- a/cpp-package/include/mxnet-cpp/optimizer.h
+++ b/cpp-package/include/mxnet-cpp/optimizer.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file optimizer.h
 * \brief definition of optimizer
 * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/shape.h b/cpp-package/include/mxnet-cpp/shape.h
index d30ea9d..2793e43 100644
--- a/cpp-package/include/mxnet-cpp/shape.h
+++ b/cpp-package/include/mxnet-cpp/shape.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file shape.h
 * \brief definition of shape
 * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/include/mxnet-cpp/symbol.h b/cpp-package/include/mxnet-cpp/symbol.h
index c04ae2a..888aebd 100644
--- a/cpp-package/include/mxnet-cpp/symbol.h
+++ b/cpp-package/include/mxnet-cpp/symbol.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-*  Copyright (c) 2016 by Contributors
 * \file symbol.h
 * \brief definition of symbol
 * \author Chuntao Hong, Zhang Chen
diff --git a/cpp-package/scripts/OpWrapperGenerator.py b/cpp-package/scripts/OpWrapperGenerator.py
index 8f76236..83495fe 100644
--- a/cpp-package/scripts/OpWrapperGenerator.py
+++ b/cpp-package/scripts/OpWrapperGenerator.py
@@ -1,4 +1,21 @@
-# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# -*- coding: utf-8 -*-
 # This is a python script that generates operator wrappers such as FullyConnected,
 # based on current libmxnet.dll. This script is written so that we don't need to
 # write new operator wrappers when new ones are added to the library.
diff --git a/cpp-package/scripts/lint.py b/cpp-package/scripts/lint.py
index 89492ed..f9f284f 100644
--- a/cpp-package/scripts/lint.py
+++ b/cpp-package/scripts/lint.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=protected-access, unused-variable, locally-disabled, redefined-variable-type
 """Lint helper to generate lint summary of source.
 Copyright by Contributors
diff --git a/cpp-package/tests/ci_test.sh b/cpp-package/tests/ci_test.sh
index 29d0a9d..3b2af35 100755
--- a/cpp-package/tests/ci_test.sh
+++ b/cpp-package/tests/ci_test.sh
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 set -e # exit on the first error
 cd $(dirname $(readlink -f $0))/../example
 echo $PWD
diff --git a/cpp-package/tests/travis/run_test.sh b/cpp-package/tests/travis/run_test.sh
index 2750658..4925b35 100755
--- a/cpp-package/tests/travis/run_test.sh
+++ b/cpp-package/tests/travis/run_test.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 if [ ${TASK} == "lint" ]; then
     make lint || exit -1
     echo "Check documentations of c++ code..."
diff --git a/cpp-package/tests/travis/setup.sh b/cpp-package/tests/travis/setup.sh
index 4238c76..5a3813e 100755
--- a/cpp-package/tests/travis/setup.sh
+++ b/cpp-package/tests/travis/setup.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 if [ ${TASK} == "lint" ]; then
     pip install cpplint 'pylint==1.4.4' 'astroid==1.3.6' --user
 fi
diff --git a/docker/Dockerfiles/Dockerfile.in.scala b/docker/Dockerfiles/Dockerfile.in.scala
index 6898126..1fe9365 100644
--- a/docker/Dockerfiles/Dockerfile.in.scala
+++ b/docker/Dockerfiles/Dockerfile.in.scala
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 # -*- mode: dockerfile -*-
 # part of the dockerfile to install the scala binding
 
diff --git a/docker/install/cpp.sh b/docker/install/cpp.sh
index f30ab52..1aa55ac 100755
--- a/docker/install/cpp.sh
+++ b/docker/install/cpp.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # libraries for building mxnet c++ core on ubuntu
 
 apt-get update && apt-get install -y \
diff --git a/docker/install/julia.sh b/docker/install/julia.sh
index 604a1bc..e6fe49b 100755
--- a/docker/install/julia.sh
+++ b/docker/install/julia.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's julia package on ubuntu
 
 # the julia version shipped with ubuntu (version 0.4) is too low. so download a
diff --git a/docker/install/perl.sh b/docker/install/perl.sh
index da4df67..a981746 100755
--- a/docker/install/perl.sh
+++ b/docker/install/perl.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's perl package on ubuntu
 apt-get update && apt-get install -y libmouse-perl pdl cpanminus swig libgraphviz-perl
 cpanm -q Function::Parameters
diff --git a/docker/install/python.sh b/docker/install/python.sh
index 0459bb9..763f27b 100755
--- a/docker/install/python.sh
+++ b/docker/install/python.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's python package on ubuntu
 
 apt-get update && apt-get install -y python-dev python3-dev
diff --git a/docker/install/r.sh b/docker/install/r.sh
index 9351763..a0fa273 100755
--- a/docker/install/r.sh
+++ b/docker/install/r.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's r package on ubuntu
 
 echo "deb http://cran.rstudio.com/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list
diff --git a/docker/install/scala.sh b/docker/install/scala.sh
index 8cbe911..bb0bb9c 100755
--- a/docker/install/scala.sh
+++ b/docker/install/scala.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's scala package on ubuntu
 
 apt-get install -y maven default-jdk
diff --git a/docker/run.sh b/docker/run.sh
index b13e13c..f570f70 100644
--- a/docker/run.sh
+++ b/docker/run.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # Build and push all docker containers
 
 DEVICES=('cpu' 'gpu')
diff --git a/docker/tool.sh b/docker/tool.sh
index 222d428..d8ab9db 100755
--- a/docker/tool.sh
+++ b/docker/tool.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 #
 # Script to build, test and push a docker container
 #
diff --git a/docs/build_version_doc/AddVersion.py b/docs/build_version_doc/AddVersion.py
index ee46ef5..38ce48f 100644
--- a/docs/build_version_doc/AddVersion.py
+++ b/docs/build_version_doc/AddVersion.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import argparse
 from bs4 import BeautifulSoup as bs
@@ -55,4 +72,4 @@ if __name__ == '__main__':
                 navbar_mobile.append(version_str_mobile)
                 outstr = str(content).replace('&lt;', '<').replace('&gt;', '>')
                 with open(os.path.join(path, name), "w") as outf:
-                    outf.write(outstr)
\ No newline at end of file
+                    outf.write(outstr)
diff --git a/docs/build_version_doc/build_doc.sh b/docs/build_version_doc/build_doc.sh
index 99b6bd8..c5b59ba 100755
--- a/docs/build_version_doc/build_doc.sh
+++ b/docs/build_version_doc/build_doc.sh
@@ -1,12 +1,30 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 web_url="$1"
 web_folder="VersionedWeb"
 local_build="latest"
 web_branch="$2"
 git clone $web_url $web_folder
 cd $web_folder
-git checkout $web_branch 
+git checkout $web_branch
 cd ..
 mkdir "$local_build"
 
@@ -14,7 +32,7 @@ mkdir "$local_build"
 tag_list_file="tag_list.txt"
 cp "$web_folder/tag.txt" "$tag_list_file"
 tag_list=()
-while read -r line 
+while read -r line
 do
     tag_list+=("$line")
 done < "$tag_list_file"
diff --git a/docs/conf.py b/docs/conf.py
index 7a1059e..ad51323 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # -*- coding: utf-8 -*-
 import sys, os, re, subprocess
 import mock
diff --git a/docs/mxdoc.py b/docs/mxdoc.py
index 25f6af7..2726a1c 100644
--- a/docs/mxdoc.py
+++ b/docs/mxdoc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """A sphnix-doc plugin to build mxnet docs"""
 import subprocess
 import re
diff --git a/example/adversary/data.py b/example/adversary/data.py
index d39821f..0ca8e1f 100644
--- a/example/adversary/data.py
+++ b/example/adversary/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 """ data iterator for mnist """
 import sys
diff --git a/example/autoencoder/autoencoder.py b/example/autoencoder/autoencoder.py
index ca8db7a..a84b271 100644
--- a/example/autoencoder/autoencoder.py
+++ b/example/autoencoder/autoencoder.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 from mxnet import misc
diff --git a/example/autoencoder/data.py b/example/autoencoder/data.py
index ecd117d..d6a25ed 100644
--- a/example/autoencoder/data.py
+++ b/example/autoencoder/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import numpy as np
 from sklearn.datasets import fetch_mldata
diff --git a/example/autoencoder/mnist_sae.py b/example/autoencoder/mnist_sae.py
index 538d8b9..5525948 100644
--- a/example/autoencoder/mnist_sae.py
+++ b/example/autoencoder/mnist_sae.py
@@ -1,27 +1,44 @@
-# pylint: skip-file
-from __future__ import print_function
-import mxnet as mx
-import numpy as np
-import logging
-import data
-from autoencoder import AutoEncoderModel
-
-
-if __name__ == '__main__':
-    # set to INFO to see less information during training
-    logging.basicConfig(level=logging.DEBUG)
-    ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2,
-        internal_act='relu', output_act='relu')
-
-    X, _ = data.get_mnist()
-    train_X = X[:60000]
-    val_X = X[60000:]
-
-    ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0,
-                             lr_scheduler=mx.misc.FactorScheduler(20000,0.1))
-    ae_model.finetune(train_X, 256, 100000, 'sgd', l_rate=0.1, decay=0.0,
-                   lr_scheduler=mx.misc.FactorScheduler(20000,0.1))
-    ae_model.save('mnist_pt.arg')
-    ae_model.load('mnist_pt.arg')
-    print("Training error:", ae_model.eval(train_X))
-    print("Validation error:", ae_model.eval(val_X))
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: skip-file
+from __future__ import print_function
+import mxnet as mx
+import numpy as np
+import logging
+import data
+from autoencoder import AutoEncoderModel
+
+
+if __name__ == '__main__':
+    # set to INFO to see less information during training
+    logging.basicConfig(level=logging.DEBUG)
+    ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2,
+        internal_act='relu', output_act='relu')
+
+    X, _ = data.get_mnist()
+    train_X = X[:60000]
+    val_X = X[60000:]
+
+    ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0,
+                             lr_scheduler=mx.misc.FactorScheduler(20000,0.1))
+    ae_model.finetune(train_X, 256, 100000, 'sgd', l_rate=0.1, decay=0.0,
+                   lr_scheduler=mx.misc.FactorScheduler(20000,0.1))
+    ae_model.save('mnist_pt.arg')
+    ae_model.load('mnist_pt.arg')
+    print("Training error:", ae_model.eval(train_X))
+    print("Validation error:", ae_model.eval(val_X))
diff --git a/example/autoencoder/model.py b/example/autoencoder/model.py
index 85fb48c..1aaae1b 100644
--- a/example/autoencoder/model.py
+++ b/example/autoencoder/model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 import numpy as np
diff --git a/example/autoencoder/solver.py b/example/autoencoder/solver.py
index 21c5da2..5589c5a 100644
--- a/example/autoencoder/solver.py
+++ b/example/autoencoder/solver.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 import numpy as np
diff --git a/example/bayesian-methods/algos.py b/example/bayesian-methods/algos.py
index 81df9c5..e47a18f 100644
--- a/example/bayesian-methods/algos.py
+++ b/example/bayesian-methods/algos.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 import mxnet.ndarray as nd
diff --git a/example/bayesian-methods/bdk_demo.py b/example/bayesian-methods/bdk_demo.py
index 77e9b94..145dac1 100644
--- a/example/bayesian-methods/bdk_demo.py
+++ b/example/bayesian-methods/bdk_demo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 import mxnet.ndarray as nd
diff --git a/example/bayesian-methods/data_loader.py b/example/bayesian-methods/data_loader.py
index 90b01e0..2649eb5 100644
--- a/example/bayesian-methods/data_loader.py
+++ b/example/bayesian-methods/data_loader.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import numpy
 import os
diff --git a/example/bayesian-methods/utils.py b/example/bayesian-methods/utils.py
index 4a2f41d..a274437 100644
--- a/example/bayesian-methods/utils.py
+++ b/example/bayesian-methods/utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import mxnet.ndarray as nd
 import numpy
@@ -169,4 +186,4 @@ def pred_test(testing_data, exe, param_list=None, save_path=""):
             ret[i, 1] = pred.std()**2
         numpy.savetxt(save_path, ret)
     mse = numpy.square(ret[:, 0] - testing_data[:, 0] **3).mean()
-    return mse, ret
\ No newline at end of file
+    return mse, ret
diff --git a/example/bi-lstm-sort/infer_sort.py b/example/bi-lstm-sort/infer_sort.py
index 0f5ef07..b074c03 100644
--- a/example/bi-lstm-sort/infer_sort.py
+++ b/example/bi-lstm-sort/infer_sort.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 import sys
@@ -31,7 +48,7 @@ if __name__ == '__main__':
     rvocab = {}
     for k, v in vocab.items():
         rvocab[v] = k
-    
+
     _, arg_params, __ = mx.model.load_checkpoint("sort", 1)
 
     model = BiLSTMInferenceModel(5, len(vocab),
@@ -42,9 +59,9 @@ if __name__ == '__main__':
     data = np.zeros((1, len(tks)))
     for k in range(len(tks)):
         data[0][k] = vocab[tks[k]]
-    
+
     data = mx.nd.array(data)
     prob = model.forward(data)
-    for k in range(len(tks)):        
+    for k in range(len(tks)):
         print(rvocab[np.argmax(prob, axis = 1)[k]])
-    
+
diff --git a/example/bi-lstm-sort/lstm.py b/example/bi-lstm-sort/lstm.py
index 6168712..a082092 100644
--- a/example/bi-lstm-sort/lstm.py
+++ b/example/bi-lstm-sort/lstm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import sys
 sys.path.insert(0, "../../python")
@@ -64,7 +81,7 @@ def bi_lstm_unroll(seq_len, input_size,
     embed = mx.sym.Embedding(data=data, input_dim=input_size,
                              weight=embed_weight, output_dim=num_embed, name='embed')
     wordvec = mx.sym.SliceChannel(data=embed, num_outputs=seq_len, squeeze_axis=1)
-    
+
     forward_hidden = []
     for seqidx in range(seq_len):
         hidden = wordvec[seqidx]
@@ -87,7 +104,7 @@ def bi_lstm_unroll(seq_len, input_size,
         hidden = next_state.h
         last_states[1] = next_state
         backward_hidden.insert(0, hidden)
-        
+
     hidden_all = []
     for i in range(seq_len):
         hidden_all.append(mx.sym.Concat(*[forward_hidden[i], backward_hidden[i]], dim=1))
@@ -109,7 +126,7 @@ def bi_lstm_inference_symbol(input_size, seq_len,
     embed_weight=mx.sym.Variable("embed_weight")
     cls_weight = mx.sym.Variable("cls_weight")
     cls_bias = mx.sym.Variable("cls_bias")
-    last_states = [LSTMState(c = mx.sym.Variable("l0_init_c"), h = mx.sym.Variable("l0_init_h")), 
+    last_states = [LSTMState(c = mx.sym.Variable("l0_init_c"), h = mx.sym.Variable("l0_init_h")),
                    LSTMState(c = mx.sym.Variable("l1_init_c"), h = mx.sym.Variable("l1_init_h"))]
     forward_param = LSTMParam(i2h_weight=mx.sym.Variable("l0_i2h_weight"),
                               i2h_bias=mx.sym.Variable("l0_i2h_bias"),
@@ -143,7 +160,7 @@ def bi_lstm_inference_symbol(input_size, seq_len,
         hidden = next_state.h
         last_states[1] = next_state
         backward_hidden.insert(0, hidden)
-        
+
     hidden_all = []
     for i in range(seq_len):
         hidden_all.append(mx.sym.Concat(*[forward_hidden[i], backward_hidden[i]], dim=1))
diff --git a/example/bi-lstm-sort/lstm_sort.py b/example/bi-lstm-sort/lstm_sort.py
index fe8c38b..aef88b8 100644
--- a/example/bi-lstm-sort/lstm_sort.py
+++ b/example/bi-lstm-sort/lstm_sort.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 import sys
diff --git a/example/bi-lstm-sort/rnn_model.py b/example/bi-lstm-sort/rnn_model.py
index a253e86..202aae6 100644
--- a/example/bi-lstm-sort/rnn_model.py
+++ b/example/bi-lstm-sort/rnn_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 import sys
@@ -25,7 +42,7 @@ class BiLSTMInferenceModel(object):
         batch_size = 1
         init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(2)]
         init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(2)]
-        
+
         data_shape = [("data", (batch_size, seq_len, ))]
 
         input_shapes = dict(init_c + init_h + data_shape)
diff --git a/example/bi-lstm-sort/sort_io.py b/example/bi-lstm-sort/sort_io.py
index 8e11521..8cb44c6 100644
--- a/example/bi-lstm-sort/sort_io.py
+++ b/example/bi-lstm-sort/sort_io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 from __future__ import print_function
diff --git a/example/caffe/caffe_net.py b/example/caffe/caffe_net.py
index c91d37b..0dc4770 100644
--- a/example/caffe/caffe_net.py
+++ b/example/caffe/caffe_net.py
@@ -1,5 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
-from data import get_iterator 
+from data import get_iterator
 import argparse
 import train_model
 
diff --git a/example/caffe/data.py b/example/caffe/data.py
index 723e7da..fac8e11 100644
--- a/example/caffe/data.py
+++ b/example/caffe/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 import os
 # code to automatically download dataset
diff --git a/example/caffe/train_model.py b/example/caffe/train_model.py
index 9a51f07..2eadd86 100644
--- a/example/caffe/train_model.py
+++ b/example/caffe/train_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import logging
 import os
diff --git a/example/cnn_text_classification/data_helpers.py b/example/cnn_text_classification/data_helpers.py
index b3ece2d..3812683 100644
--- a/example/cnn_text_classification/data_helpers.py
+++ b/example/cnn_text_classification/data_helpers.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import re
 import itertools
diff --git a/example/cnn_text_classification/old/text_cnn.py b/example/cnn_text_classification/old/text_cnn.py
index e41af36..8d82d6e 100644
--- a/example/cnn_text_classification/old/text_cnn.py
+++ b/example/cnn_text_classification/old/text_cnn.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # -*- coding: utf-8 -*-
 from __future__ import print_function
 import sys,os
@@ -242,7 +260,7 @@ def train_without_pretrained_embedding():
     print('train shape:', x_train.shape)
     print('dev shape:', x_dev.shape)
     print('vocab_size', vocab_size)
-   
+
     batch_size = 50
     num_embed = 300
     sentence_size = x_train.shape[1]
diff --git a/example/cnn_text_classification/text_cnn.py b/example/cnn_text_classification/text_cnn.py
index 16d3dca..d88a8e6 100644
--- a/example/cnn_text_classification/text_cnn.py
+++ b/example/cnn_text_classification/text_cnn.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # -*- coding: utf-8 -*-
 
 import sys
@@ -77,7 +95,7 @@ def data_iter(batch_size, num_embed, pre_trained_word2vec=False):
         x_train, y_train, batch_size, shuffle=True)
     valid = mx.io.NDArrayIter(
         x_dev, y_dev, batch_size)
-    
+
     return (train, valid, sentence_size, embed_size, vocab_size)
 
 def sym_gen(batch_size, sentence_size, num_embed, vocab_size,
@@ -121,7 +139,7 @@ def sym_gen(batch_size, sentence_size, num_embed, vocab_size,
     # softmax output
     sm = mx.sym.SoftmaxOutput(data=fc, label=input_y, name='softmax')
 
-    return sm, ('data',), ('softmax_label',)  
+    return sm, ('data',), ('softmax_label',)
 
 def train(symbol, train_iter, valid_iter, data_names, label_names):
     devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
diff --git a/example/ctc/lstm.py b/example/ctc/lstm.py
index 9c493bb..7e18c86 100644
--- a/example/ctc/lstm.py
+++ b/example/ctc/lstm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import sys
 
diff --git a/example/ctc/lstm_ocr.py b/example/ctc/lstm_ocr.py
index 7d437bf..c9928aa 100644
--- a/example/ctc/lstm_ocr.py
+++ b/example/ctc/lstm_ocr.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 from __future__ import print_function
diff --git a/example/ctc/ocr_predict.py b/example/ctc/ocr_predict.py
index a07733e..3096a66 100644
--- a/example/ctc/ocr_predict.py
+++ b/example/ctc/ocr_predict.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python2.7
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding=utf-8
 from __future__ import print_function
 import sys, os
diff --git a/example/dec/dec.py b/example/dec/dec.py
index d8a4514..ac6545a 100644
--- a/example/dec/dec.py
+++ b/example/dec/dec.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from __future__ import print_function
 import sys
@@ -154,4 +171,4 @@ def mnist_exp(xpu):
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     mnist_exp(mx.gpu(0))
-    
+
diff --git a/example/dsd/mlp.py b/example/dsd/mlp.py
index ccb0940..767e592 100644
--- a/example/dsd/mlp.py
+++ b/example/dsd/mlp.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import os
 import logging
diff --git a/example/dsd/sparse_sgd.py b/example/dsd/sparse_sgd.py
index f11a239..b21e9b9 100644
--- a/example/dsd/sparse_sgd.py
+++ b/example/dsd/sparse_sgd.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from mxnet.ndarray import NDArray, topk, abs as NDabs
 from mxnet.optimizer import SGD, register
 import logging
diff --git a/example/fcn-xs/data.py b/example/fcn-xs/data.py
index 9de0d8d..685b6f7 100644
--- a/example/fcn-xs/data.py
+++ b/example/fcn-xs/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 """ file iterator for pasval voc 2012"""
 import mxnet as mx
diff --git a/example/fcn-xs/fcn_xs.py b/example/fcn-xs/fcn_xs.py
index 85961d9..53244a1 100644
--- a/example/fcn-xs/fcn_xs.py
+++ b/example/fcn-xs/fcn_xs.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import sys, os
 import argparse
diff --git a/example/fcn-xs/image_segmentaion.py b/example/fcn-xs/image_segmentaion.py
index 6d619c1..ddd850f 100644
--- a/example/fcn-xs/image_segmentaion.py
+++ b/example/fcn-xs/image_segmentaion.py
@@ -1,58 +1,75 @@
-# pylint: skip-file
-import numpy as np
-import mxnet as mx
-from PIL import Image
-
-def getpallete(num_cls):
-    # this function is to get the colormap for visualizing the segmentation mask
-    n = num_cls
-    pallete = [0]*(n*3)
-    for j in xrange(0,n):
-            lab = j
-            pallete[j*3+0] = 0
-            pallete[j*3+1] = 0
-            pallete[j*3+2] = 0
-            i = 0
-            while (lab > 0):
-                    pallete[j*3+0] |= (((lab >> 0) & 1) << (7-i))
-                    pallete[j*3+1] |= (((lab >> 1) & 1) << (7-i))
-                    pallete[j*3+2] |= (((lab >> 2) & 1) << (7-i))
-                    i = i + 1
-                    lab >>= 3
-    return pallete
-
-pallete = getpallete(256)
-img = "./person_bicycle.jpg"
-seg = img.replace("jpg", "png")
-model_previx = "FCN8s_VGG16"
-epoch = 19
-ctx = mx.gpu(0)
-
-def get_data(img_path):
-    """get the (1, 3, h, w) np.array data for the img_path"""
-    mean = np.array([123.68, 116.779, 103.939])  # (R,G,B)
-    img = Image.open(img_path)
-    img = np.array(img, dtype=np.float32)
-    reshaped_mean = mean.reshape(1, 1, 3)
-    img = img - reshaped_mean
-    img = np.swapaxes(img, 0, 2)
-    img = np.swapaxes(img, 1, 2)
-    img = np.expand_dims(img, axis=0)
-    return img
-
-def main():
-    fcnxs, fcnxs_args, fcnxs_auxs = mx.model.load_checkpoint(model_previx, epoch)
-    fcnxs_args["data"] = mx.nd.array(get_data(img), ctx)
-    data_shape = fcnxs_args["data"].shape
-    label_shape = (1, data_shape[2]*data_shape[3])
-    fcnxs_args["softmax_label"] = mx.nd.empty(label_shape, ctx)
-    exector = fcnxs.bind(ctx, fcnxs_args ,args_grad=None, grad_req="null", aux_states=fcnxs_args)
-    exector.forward(is_train=False)
-    output = exector.outputs[0]
-    out_img = np.uint8(np.squeeze(output.asnumpy().argmax(axis=1)))
-    out_img = Image.fromarray(out_img)
-    out_img.putpalette(pallete)
-    out_img.save(seg)
-
-if __name__ == "__main__":
-    main()
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: skip-file
+import numpy as np
+import mxnet as mx
+from PIL import Image
+
+def getpallete(num_cls):
+    # this function is to get the colormap for visualizing the segmentation mask
+    n = num_cls
+    pallete = [0]*(n*3)
+    for j in xrange(0,n):
+            lab = j
+            pallete[j*3+0] = 0
+            pallete[j*3+1] = 0
+            pallete[j*3+2] = 0
+            i = 0
+            while (lab > 0):
+                    pallete[j*3+0] |= (((lab >> 0) & 1) << (7-i))
+                    pallete[j*3+1] |= (((lab >> 1) & 1) << (7-i))
+                    pallete[j*3+2] |= (((lab >> 2) & 1) << (7-i))
+                    i = i + 1
+                    lab >>= 3
+    return pallete
+
+pallete = getpallete(256)
+img = "./person_bicycle.jpg"
+seg = img.replace("jpg", "png")
+model_previx = "FCN8s_VGG16"
+epoch = 19
+ctx = mx.gpu(0)
+
+def get_data(img_path):
+    """get the (1, 3, h, w) np.array data for the img_path"""
+    mean = np.array([123.68, 116.779, 103.939])  # (R,G,B)
+    img = Image.open(img_path)
+    img = np.array(img, dtype=np.float32)
+    reshaped_mean = mean.reshape(1, 1, 3)
+    img = img - reshaped_mean
+    img = np.swapaxes(img, 0, 2)
+    img = np.swapaxes(img, 1, 2)
+    img = np.expand_dims(img, axis=0)
+    return img
+
+def main():
+    fcnxs, fcnxs_args, fcnxs_auxs = mx.model.load_checkpoint(model_previx, epoch)
+    fcnxs_args["data"] = mx.nd.array(get_data(img), ctx)
+    data_shape = fcnxs_args["data"].shape
+    label_shape = (1, data_shape[2]*data_shape[3])
+    fcnxs_args["softmax_label"] = mx.nd.empty(label_shape, ctx)
+    exector = fcnxs.bind(ctx, fcnxs_args ,args_grad=None, grad_req="null", aux_states=fcnxs_args)
+    exector.forward(is_train=False)
+    output = exector.outputs[0]
+    out_img = np.uint8(np.squeeze(output.asnumpy().argmax(axis=1)))
+    out_img = Image.fromarray(out_img)
+    out_img.putpalette(pallete)
+    out_img.save(seg)
+
+if __name__ == "__main__":
+    main()
diff --git a/example/fcn-xs/init_fcnxs.py b/example/fcn-xs/init_fcnxs.py
index c90a45b..ede46b8 100644
--- a/example/fcn-xs/init_fcnxs.py
+++ b/example/fcn-xs/init_fcnxs.py
@@ -1,89 +1,106 @@
-# pylint: skip-file
-import mxnet as mx
-import numpy as np
-import sys
-import logging
-
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-
-# make a bilinear interpolation kernel, return a numpy.ndarray
-def upsample_filt(size):
-    factor = (size + 1) // 2
-    if size % 2 == 1:
-        center = factor - 1.0
-    else:
-        center = factor - 0.5
-    og = np.ogrid[:size, :size]
-    return (1 - abs(og[0] - center) / factor) * \
-           (1 - abs(og[1] - center) / factor)
-
-def init_from_vgg16(ctx, fcnxs_symbol, vgg16fc_args, vgg16fc_auxs):
-    fcnxs_args = vgg16fc_args.copy()
-    fcnxs_auxs = vgg16fc_auxs.copy()
-    for k,v in fcnxs_args.items():
-        if(v.context != ctx):
-            fcnxs_args[k] = mx.nd.zeros(v.shape, ctx)
-            v.copyto(fcnxs_args[k])
-    for k,v in fcnxs_auxs.items():
-        if(v.context != ctx):
-            fcnxs_auxs[k] = mx.nd.zeros(v.shape, ctx)
-            v.copyto(fcnxs_auxs[k])
-    data_shape=(1,3,500,500)
-    arg_names = fcnxs_symbol.list_arguments()
-    arg_shapes, _, _ = fcnxs_symbol.infer_shape(data=data_shape)
-    rest_params = dict([(x[0], mx.nd.zeros(x[1], ctx)) for x in zip(arg_names, arg_shapes)
-            if x[0] in ['score_weight', 'score_bias', 'score_pool4_weight', 'score_pool4_bias', \
-                        'score_pool3_weight', 'score_pool3_bias']])
-    fcnxs_args.update(rest_params)
-    deconv_params = dict([(x[0], x[1]) for x in zip(arg_names, arg_shapes)
-            if x[0] in ["bigscore_weight", 'score2_weight', 'score4_weight']])
-    for k, v in deconv_params.items():
-        filt = upsample_filt(v[3])
-        initw = np.zeros(v)
-        initw[range(v[0]), range(v[1]), :, :] = filt  # becareful here is the slice assing
-        fcnxs_args[k] = mx.nd.array(initw, ctx)
-    return fcnxs_args, fcnxs_auxs
-
-def init_from_fcnxs(ctx, fcnxs_symbol, fcnxs_args_from, fcnxs_auxs_from):
-    """ use zero initialization for better convergence, because it tends to oputut 0,
-    and the label 0 stands for background, which may occupy most size of one image.
-    """
-    fcnxs_args = fcnxs_args_from.copy()
-    fcnxs_auxs = fcnxs_auxs_from.copy()
-    for k,v in fcnxs_args.items():
-        if(v.context != ctx):
-            fcnxs_args[k] = mx.nd.zeros(v.shape, ctx)
-            v.copyto(fcnxs_args[k])
-    for k,v in fcnxs_auxs.items():
-        if(v.context != ctx):
-            fcnxs_auxs[k] = mx.nd.zeros(v.shape, ctx)
-            v.copyto(fcnxs_auxs[k])
-    data_shape=(1,3,500,500)
-    arg_names = fcnxs_symbol.list_arguments()
-    arg_shapes, _, _ = fcnxs_symbol.infer_shape(data=data_shape)
-    rest_params = {}
-    deconv_params = {}
-    # this is fcn8s init from fcn16s
-    if 'score_pool3_weight' in arg_names:
-        rest_params = dict([(x[0], mx.nd.zeros(x[1], ctx)) for x in zip(arg_names, arg_shapes)
-            if x[0] in ['score_pool3_bias', 'score_pool3_weight']])
-        deconv_params = dict([(x[0], x[1]) for x in zip(arg_names, arg_shapes) if x[0] \
-            in ["bigscore_weight", 'score4_weight']])
-    # this is fcn16s init from fcn32s
-    elif 'score_pool4_weight' in arg_names:
-        rest_params = dict([(x[0], mx.nd.zeros(x[1], ctx)) for x in zip(arg_names, arg_shapes)
-            if x[0] in ['score_pool4_weight', 'score_pool4_bias']])
-        deconv_params = dict([(x[0], x[1]) for x in zip(arg_names, arg_shapes) if x[0] \
-            in ["bigscore_weight", 'score2_weight']])
-    # this is fcn32s init
-    else:
-        logging.error("you are init the fcn32s model, so you should use init_from_vgg16()")
-        sys.exit()
-    fcnxs_args.update(rest_params)
-    for k, v in deconv_params.items():
-        filt = upsample_filt(v[3])
-        initw = np.zeros(v)
-        initw[range(v[0]), range(v[1]), :, :] = filt  # becareful here is the slice assing
-        fcnxs_args[k] = mx.nd.array(initw, ctx)
-    return fcnxs_args, fcnxs_auxs
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: skip-file
+import mxnet as mx
+import numpy as np
+import sys
+import logging
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+# make a bilinear interpolation kernel, return a numpy.ndarray
+def upsample_filt(size):
+    factor = (size + 1) // 2
+    if size % 2 == 1:
+        center = factor - 1.0
+    else:
+        center = factor - 0.5
+    og = np.ogrid[:size, :size]
+    return (1 - abs(og[0] - center) / factor) * \
+           (1 - abs(og[1] - center) / factor)
+
+def init_from_vgg16(ctx, fcnxs_symbol, vgg16fc_args, vgg16fc_auxs):
+    fcnxs_args = vgg16fc_args.copy()
+    fcnxs_auxs = vgg16fc_auxs.copy()
+    for k,v in fcnxs_args.items():
+        if(v.context != ctx):
+            fcnxs_args[k] = mx.nd.zeros(v.shape, ctx)
+            v.copyto(fcnxs_args[k])
+    for k,v in fcnxs_auxs.items():
+        if(v.context != ctx):
+            fcnxs_auxs[k] = mx.nd.zeros(v.shape, ctx)
+            v.copyto(fcnxs_auxs[k])
+    data_shape=(1,3,500,500)
+    arg_names = fcnxs_symbol.list_arguments()
+    arg_shapes, _, _ = fcnxs_symbol.infer_shape(data=data_shape)
+    rest_params = dict([(x[0], mx.nd.zeros(x[1], ctx)) for x in zip(arg_names, arg_shapes)
+            if x[0] in ['score_weight', 'score_bias', 'score_pool4_weight', 'score_pool4_bias', \
+                        'score_pool3_weight', 'score_pool3_bias']])
+    fcnxs_args.update(rest_params)
+    deconv_params = dict([(x[0], x[1]) for x in zip(arg_names, arg_shapes)
+            if x[0] in ["bigscore_weight", 'score2_weight', 'score4_weight']])
+    for k, v in deconv_params.items():
+        filt = upsample_filt(v[3])
+        initw = np.zeros(v)
+        initw[range(v[0]), range(v[1]), :, :] = filt  # becareful here is the slice assing
+        fcnxs_args[k] = mx.nd.array(initw, ctx)
+    return fcnxs_args, fcnxs_auxs
+
+def init_from_fcnxs(ctx, fcnxs_symbol, fcnxs_args_from, fcnxs_auxs_from):
+    """ use zero initialization for better convergence, because it tends to oputut 0,
+    and the label 0 stands for background, which may occupy most size of one image.
+    """
+    fcnxs_args = fcnxs_args_from.copy()
+    fcnxs_auxs = fcnxs_auxs_from.copy()
+    for k,v in fcnxs_args.items():
+        if(v.context != ctx):
+            fcnxs_args[k] = mx.nd.zeros(v.shape, ctx)
+            v.copyto(fcnxs_args[k])
+    for k,v in fcnxs_auxs.items():
+        if(v.context != ctx):
+            fcnxs_auxs[k] = mx.nd.zeros(v.shape, ctx)
+            v.copyto(fcnxs_auxs[k])
+    data_shape=(1,3,500,500)
+    arg_names = fcnxs_symbol.list_arguments()
+    arg_shapes, _, _ = fcnxs_symbol.infer_shape(data=data_shape)
+    rest_params = {}
+    deconv_params = {}
+    # this is fcn8s init from fcn16s
+    if 'score_pool3_weight' in arg_names:
+        rest_params = dict([(x[0], mx.nd.zeros(x[1], ctx)) for x in zip(arg_names, arg_shapes)
+            if x[0] in ['score_pool3_bias', 'score_pool3_weight']])
+        deconv_params = dict([(x[0], x[1]) for x in zip(arg_names, arg_shapes) if x[0] \
+            in ["bigscore_weight", 'score4_weight']])
+    # this is fcn16s init from fcn32s
+    elif 'score_pool4_weight' in arg_names:
+        rest_params = dict([(x[0], mx.nd.zeros(x[1], ctx)) for x in zip(arg_names, arg_shapes)
+            if x[0] in ['score_pool4_weight', 'score_pool4_bias']])
+        deconv_params = dict([(x[0], x[1]) for x in zip(arg_names, arg_shapes) if x[0] \
+            in ["bigscore_weight", 'score2_weight']])
+    # this is fcn32s init
+    else:
+        logging.error("you are init the fcn32s model, so you should use init_from_vgg16()")
+        sys.exit()
+    fcnxs_args.update(rest_params)
+    for k, v in deconv_params.items():
+        filt = upsample_filt(v[3])
+        initw = np.zeros(v)
+        initw[range(v[0]), range(v[1]), :, :] = filt  # becareful here is the slice assing
+        fcnxs_args[k] = mx.nd.array(initw, ctx)
+    return fcnxs_args, fcnxs_auxs
diff --git a/example/fcn-xs/run_fcnxs.sh b/example/fcn-xs/run_fcnxs.sh
index 926f3f8..df9a880 100755
--- a/example/fcn-xs/run_fcnxs.sh
+++ b/example/fcn-xs/run_fcnxs.sh
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # train fcn-32s model
 python -u fcn_xs.py --model=fcn32s --prefix=VGG_FC_ILSVRC_16_layers \
        --epoch=74 --init-type=vgg16
diff --git a/example/fcn-xs/solver.py b/example/fcn-xs/solver.py
index dd78e73..cf7298b 100644
--- a/example/fcn-xs/solver.py
+++ b/example/fcn-xs/solver.py
@@ -1,126 +1,143 @@
-# pylint: skip-file
-import numpy as np
-import mxnet as mx
-import time
-import logging
-from collections import namedtuple
-from mxnet import optimizer as opt
-from mxnet.optimizer import get_updater
-from mxnet import metric
-
-# Parameter to pass to batch_end_callback
-BatchEndParam = namedtuple('BatchEndParams', ['epoch', 'nbatch', 'eval_metric'])
-class Solver(object):
-    def __init__(self, symbol, ctx=None,
-                 begin_epoch=0, num_epoch=None,
-                 arg_params=None, aux_params=None,
-                 optimizer='sgd', **kwargs):
-        self.symbol = symbol
-        if ctx is None:
-            ctx = mx.cpu()
-        self.ctx = ctx
-        self.begin_epoch = begin_epoch
-        self.num_epoch = num_epoch
-        self.arg_params = arg_params
-        self.aux_params = aux_params
-        self.optimizer = optimizer
-        self.kwargs = kwargs.copy()
-
-    def fit(self, train_data, eval_data=None,
-            eval_metric='acc',
-            grad_req='write',
-            epoch_end_callback=None,
-            batch_end_callback=None,
-            kvstore='local',
-            logger=None):
-        if logger is None:
-            logger = logging
-        logging.info('Start training with %s', str(self.ctx))
-        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=train_data.provide_data[0][1])
-        arg_names = self.symbol.list_arguments()
-        if grad_req != 'null':
-            self.grad_params = {}
-            for name, shape in zip(arg_names, arg_shapes):
-                if not (name.endswith('data') or name.endswith('label')):
-                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
-        else:
-            self.grad_params = None
-        aux_names = self.symbol.list_auxiliary_states()
-        self.aux_params = {k : nd.zeros(s) for k, s in zip(aux_names, aux_shapes)}
-        data_name = train_data.data_name
-        label_name = train_data.label_name
-        input_names = [data_name, label_name]
-        self.optimizer = opt.create(self.optimizer, rescale_grad=(1.0/train_data.get_batch_size()), **(self.kwargs))
-        self.updater = get_updater(self.optimizer)
-        eval_metric = metric.create(eval_metric)
-        # begin training
-        for epoch in range(self.begin_epoch, self.num_epoch):
-            nbatch = 0
-            train_data.reset()
-            eval_metric.reset()
-            for data in train_data:
-                nbatch += 1
-                label_shape = data[label_name].shape
-                self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
-                self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
-                    label_shape[1]*label_shape[2]), self.ctx)
-                output_names = self.symbol.list_outputs()
-                self.exector = self.symbol.bind(self.ctx, self.arg_params,
-                                args_grad=self.grad_params,
-                                grad_req=grad_req,
-                                aux_states=self.aux_params)
-                assert len(self.symbol.list_arguments()) == len(self.exector.grad_arrays)
-                update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
-                    self.exector.grad_arrays) if nd is not None}
-                output_dict = {}
-                output_buff = {}
-                for key, arr in zip(self.symbol.list_outputs(), self.exector.outputs):
-                    output_dict[key] = arr
-                    output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
-                self.exector.forward(is_train=True)
-                for key in output_dict:
-                    output_dict[key].copyto(output_buff[key])
-                self.exector.backward()
-                for key, arr in update_dict.items():
-                    if key != "bigscore_weight":
-                        self.updater(key, arr, self.arg_params[key])
-                pred_shape = self.exector.outputs[0].shape
-                label = mx.nd.array(data[label_name].reshape(label_shape[0], label_shape[1]*label_shape[2]))
-                pred = mx.nd.array(output_buff["softmax_output"].asnumpy().reshape(pred_shape[0], \
-                    pred_shape[1], pred_shape[2]*pred_shape[3]))
-                eval_metric.update([label], [pred])
-                self.exector.outputs[0].wait_to_read()
-                batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metric)
-                batch_end_callback(batch_end_params)
-            if epoch_end_callback is not None:
-                epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params)
-            name, value = eval_metric.get()
-            logger.info("                     --->Epoch[%d] Train-%s=%f", epoch, name, value)
-            # evaluation
-            if eval_data:
-                logger.info(" in eval process...")
-                nbatch = 0
-                eval_data.reset()
-                eval_metric.reset()
-                for data in eval_data:
-                    nbatch += 1
-                    label_shape = data[label_name].shape
-                    self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
-                    self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
-                        label_shape[1]*label_shape[2]), self.ctx)
-                    exector = self.symbol.bind(self.ctx, self.arg_params,
-                                    args_grad=self.grad_params,
-                                    grad_req=grad_req,
-                                    aux_states=self.aux_params)
-                    cpu_output_array = mx.nd.zeros(exector.outputs[0].shape)
-                    exector.forward(is_train=False)
-                    exector.outputs[0].copyto(cpu_output_array)
-                    pred_shape = cpu_output_array.shape
-                    label = mx.nd.array(data[label_name].reshape(label_shape[0], \
-                        label_shape[1]*label_shape[2]))
-                    pred = mx.nd.array(cpu_output_array.asnumpy().reshape(pred_shape[0], \
-                        pred_shape[1], pred_shape[2]*pred_shape[3]))
-                    eval_metric.update([label], [pred])
-                    exector.outputs[0].wait_to_read()
-            name, value = eval_metric.get()
-            logger.info('batch[%d] Validation-%s=%f', nbatch, name, value)
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: skip-file
+import numpy as np
+import mxnet as mx
+import time
+import logging
+from collections import namedtuple
+from mxnet import optimizer as opt
+from mxnet.optimizer import get_updater
+from mxnet import metric
+
+# Parameter to pass to batch_end_callback
+BatchEndParam = namedtuple('BatchEndParams', ['epoch', 'nbatch', 'eval_metric'])
+class Solver(object):
+    def __init__(self, symbol, ctx=None,
+                 begin_epoch=0, num_epoch=None,
+                 arg_params=None, aux_params=None,
+                 optimizer='sgd', **kwargs):
+        self.symbol = symbol
+        if ctx is None:
+            ctx = mx.cpu()
+        self.ctx = ctx
+        self.begin_epoch = begin_epoch
+        self.num_epoch = num_epoch
+        self.arg_params = arg_params
+        self.aux_params = aux_params
+        self.optimizer = optimizer
+        self.kwargs = kwargs.copy()
+
+    def fit(self, train_data, eval_data=None,
+            eval_metric='acc',
+            grad_req='write',
+            epoch_end_callback=None,
+            batch_end_callback=None,
+            kvstore='local',
+            logger=None):
+        if logger is None:
+            logger = logging
+        logging.info('Start training with %s', str(self.ctx))
+        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=train_data.provide_data[0][1])
+        arg_names = self.symbol.list_arguments()
+        if grad_req != 'null':
+            self.grad_params = {}
+            for name, shape in zip(arg_names, arg_shapes):
+                if not (name.endswith('data') or name.endswith('label')):
+                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
+        else:
+            self.grad_params = None
+        aux_names = self.symbol.list_auxiliary_states()
+        self.aux_params = {k : nd.zeros(s) for k, s in zip(aux_names, aux_shapes)}
+        data_name = train_data.data_name
+        label_name = train_data.label_name
+        input_names = [data_name, label_name]
+        self.optimizer = opt.create(self.optimizer, rescale_grad=(1.0/train_data.get_batch_size()), **(self.kwargs))
+        self.updater = get_updater(self.optimizer)
+        eval_metric = metric.create(eval_metric)
+        # begin training
+        for epoch in range(self.begin_epoch, self.num_epoch):
+            nbatch = 0
+            train_data.reset()
+            eval_metric.reset()
+            for data in train_data:
+                nbatch += 1
+                label_shape = data[label_name].shape
+                self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
+                self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
+                    label_shape[1]*label_shape[2]), self.ctx)
+                output_names = self.symbol.list_outputs()
+                self.exector = self.symbol.bind(self.ctx, self.arg_params,
+                                args_grad=self.grad_params,
+                                grad_req=grad_req,
+                                aux_states=self.aux_params)
+                assert len(self.symbol.list_arguments()) == len(self.exector.grad_arrays)
+                update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
+                    self.exector.grad_arrays) if nd is not None}
+                output_dict = {}
+                output_buff = {}
+                for key, arr in zip(self.symbol.list_outputs(), self.exector.outputs):
+                    output_dict[key] = arr
+                    output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
+                self.exector.forward(is_train=True)
+                for key in output_dict:
+                    output_dict[key].copyto(output_buff[key])
+                self.exector.backward()
+                for key, arr in update_dict.items():
+                    if key != "bigscore_weight":
+                        self.updater(key, arr, self.arg_params[key])
+                pred_shape = self.exector.outputs[0].shape
+                label = mx.nd.array(data[label_name].reshape(label_shape[0], label_shape[1]*label_shape[2]))
+                pred = mx.nd.array(output_buff["softmax_output"].asnumpy().reshape(pred_shape[0], \
+                    pred_shape[1], pred_shape[2]*pred_shape[3]))
+                eval_metric.update([label], [pred])
+                self.exector.outputs[0].wait_to_read()
+                batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metric)
+                batch_end_callback(batch_end_params)
+            if epoch_end_callback is not None:
+                epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params)
+            name, value = eval_metric.get()
+            logger.info("                     --->Epoch[%d] Train-%s=%f", epoch, name, value)
+            # evaluation
+            if eval_data:
+                logger.info(" in eval process...")
+                nbatch = 0
+                eval_data.reset()
+                eval_metric.reset()
+                for data in eval_data:
+                    nbatch += 1
+                    label_shape = data[label_name].shape
+                    self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
+                    self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
+                        label_shape[1]*label_shape[2]), self.ctx)
+                    exector = self.symbol.bind(self.ctx, self.arg_params,
+                                    args_grad=self.grad_params,
+                                    grad_req=grad_req,
+                                    aux_states=self.aux_params)
+                    cpu_output_array = mx.nd.zeros(exector.outputs[0].shape)
+                    exector.forward(is_train=False)
+                    exector.outputs[0].copyto(cpu_output_array)
+                    pred_shape = cpu_output_array.shape
+                    label = mx.nd.array(data[label_name].reshape(label_shape[0], \
+                        label_shape[1]*label_shape[2]))
+                    pred = mx.nd.array(cpu_output_array.asnumpy().reshape(pred_shape[0], \
+                        pred_shape[1], pred_shape[2]*pred_shape[3]))
+                    eval_metric.update([label], [pred])
+                    exector.outputs[0].wait_to_read()
+            name, value = eval_metric.get()
+            logger.info('batch[%d] Validation-%s=%f', nbatch, name, value)
diff --git a/example/fcn-xs/symbol_fcnxs.py b/example/fcn-xs/symbol_fcnxs.py
index a9c4f3b..56888fc 100644
--- a/example/fcn-xs/symbol_fcnxs.py
+++ b/example/fcn-xs/symbol_fcnxs.py
@@ -1,189 +1,206 @@
-# pylint: skip-file
-import mxnet as mx
-
-def filter_map(kernel=1, stride=1, pad=0):
-    return (stride, (kernel-stride)/2-pad)
-
-def compose_fp(fp_first, fp_second):
-    return (fp_first[0]*fp_second[0], fp_first[0]*fp_second[1]+fp_first[1])
-
-def compose_fp_list(fp_list):
-    fp_out = (1.0, 0.0)
-    for fp in fp_list:
-        fp_out = compose_fp(fp_out, fp)
-    return fp_out
-
-def inv_fp(fp_in):
-    return (1.0/fp_in[0], -1.0*fp_in[1]/fp_in[0])
-
-def offset():
-    conv1_1_fp = filter_map(kernel=3, pad=100)
-    conv1_2_fp = conv2_1_fp = conv2_2_fp = conv3_1_fp = conv3_2_fp = conv3_3_fp \
-               = conv4_1_fp = conv4_2_fp = conv4_3_fp = conv5_1_fp = conv5_2_fp \
-               = conv5_3_fp = filter_map(kernel=3, pad=1)
-    pool1_fp = pool2_fp = pool3_fp = pool4_fp = pool5_fp = filter_map(kernel=2, stride=2)
-    fc6_fp = filter_map(kernel=7)
-    fc7_fp = score_fp = score_pool4_fp = score_pool3_fp = filter_map()
-    # for fcn-32s
-    fcn32s_upscore_fp = inv_fp(filter_map(kernel=64, stride=32))
-    fcn32s_upscore_list = [conv1_1_fp, conv1_2_fp, pool1_fp, conv2_1_fp, conv2_2_fp,
-                           pool2_fp, conv3_1_fp, conv3_2_fp, conv3_3_fp, pool3_fp,
-                           conv4_1_fp, conv4_2_fp, conv4_3_fp, pool4_fp, conv5_1_fp,
-                           conv5_2_fp, conv5_3_fp, pool5_fp, fc6_fp, fc7_fp, score_fp,
-                           fcn32s_upscore_fp]
-    crop = {}
-    crop["fcn32s_upscore"] = (-int(round(compose_fp_list(fcn32s_upscore_list)[1])),
-                              -int(round(compose_fp_list(fcn32s_upscore_list)[1])))
-    # for fcn-16s
-    score2_fp = inv_fp(filter_map(kernel=4, stride=2))
-    fcn16s_upscore_fp = inv_fp(filter_map(kernel=32, stride=16))
-    score_pool4c_fp_list = [inv_fp(score2_fp), inv_fp(score_fp), inv_fp(fc7_fp), inv_fp(fc6_fp),
-                            inv_fp(pool5_fp), inv_fp(conv5_3_fp), inv_fp(conv5_2_fp),
-                            inv_fp(conv5_1_fp), score_pool4_fp]
-    crop["score_pool4c"] = (-int(round(compose_fp_list(score_pool4c_fp_list)[1])),
-                            -int(round(compose_fp_list(score_pool4c_fp_list)[1])))
-    fcn16s_upscore_list =  [conv1_1_fp, conv1_2_fp, pool1_fp, conv2_1_fp, conv2_2_fp,
-                            pool2_fp, conv3_1_fp, conv3_2_fp, conv3_3_fp, pool3_fp,
-                            conv4_1_fp, conv4_2_fp, conv4_3_fp, pool4_fp, score_pool4_fp,
-                            inv_fp((1, -crop["score_pool4c"][0])), fcn16s_upscore_fp]
-    crop["fcn16s_upscore"] = (-int(round(compose_fp_list(fcn16s_upscore_list)[1])),
-                              -int(round(compose_fp_list(fcn16s_upscore_list)[1])))
-    # for fcn-8s
-    score4_fp = inv_fp(filter_map(kernel=4, stride=2))
-    fcn8s_upscore_fp = inv_fp(filter_map(kernel=16, stride=8))
-    score_pool3c_fp_list = [inv_fp(score4_fp), (1, -crop["score_pool4c"][0]), inv_fp(score_pool4_fp),
-                            inv_fp(pool4_fp), inv_fp(conv4_3_fp), inv_fp(conv4_2_fp),
-                            inv_fp(conv4_1_fp), score_pool3_fp, score_pool3_fp]
-    crop["score_pool3c"] = (-int(round(compose_fp_list(score_pool3c_fp_list)[1])),
-                            -int(round(compose_fp_list(score_pool3c_fp_list)[1])))
-    fcn8s_upscore_list =  [conv1_1_fp, conv1_2_fp, pool1_fp, conv2_1_fp, conv2_2_fp, pool2_fp,
-                           conv3_1_fp, conv3_2_fp, conv3_3_fp, pool3_fp, score_pool3_fp,
-                           inv_fp((1, -crop["score_pool3c"][0])), fcn8s_upscore_fp]
-    crop["fcn8s_upscore"] = (-int(round(compose_fp_list(fcn8s_upscore_list)[1])),
-                             -int(round(compose_fp_list(fcn8s_upscore_list)[1])))
-    return crop
-
-def vgg16_pool3(input, workspace_default=1024):
-    # group 1
-    conv1_1 = mx.symbol.Convolution(data=input, kernel=(3, 3), pad=(100, 100), num_filter=64,
-                workspace=workspace_default, name="conv1_1")
-    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
-    conv1_2 = mx.symbol.Convolution(data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64,
-                workspace=workspace_default, name="conv1_2")
-    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
-    pool1 = mx.symbol.Pooling(data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool1")
-    # group 2
-    conv2_1 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128,
-                workspace=workspace_default, name="conv2_1")
-    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
-    conv2_2 = mx.symbol.Convolution(data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128,
-                workspace=workspace_default, name="conv2_2")
-    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
-    pool2 = mx.symbol.Pooling(data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool2")
-    # group 3
-    conv3_1 = mx.symbol.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256,
-                workspace=workspace_default, name="conv3_1")
-    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
-    conv3_2 = mx.symbol.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256,
-                workspace=workspace_default, name="conv3_2")
-    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
-    conv3_3 = mx.symbol.Convolution(data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256,
-                workspace=workspace_default, name="conv3_3")
-    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
-    pool3 = mx.symbol.Pooling(data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool3")
-    return pool3
-
-def vgg16_pool4(input, workspace_default=1024):
-    # group 4
-    conv4_1 = mx.symbol.Convolution(data=input, kernel=(3, 3), pad=(1, 1), num_filter=512,
-                workspace=workspace_default, name="conv4_1")
-    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
-    conv4_2 = mx.symbol.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512,
-                workspace=workspace_default, name="conv4_2")
-    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
-    conv4_3 = mx.symbol.Convolution(data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512,
-                workspace=workspace_default, name="conv4_3")
-    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
-    pool4 = mx.symbol.Pooling(data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool4")
-    return pool4
-
-def vgg16_score(input, numclass, workspace_default=1024):
-    # group 5
-    conv5_1 = mx.symbol.Convolution(data=input, kernel=(3, 3), pad=(1, 1), num_filter=512,
-                workspace=workspace_default, name="conv5_1")
-    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
-    conv5_2 = mx.symbol.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512,
-                workspace=workspace_default, name="conv5_2")
-    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
-    conv5_3 = mx.symbol.Convolution(data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512,
-                workspace=workspace_default, name="conv5_3")
-    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
-    pool5 = mx.symbol.Pooling(data=relu5_3, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool5")
-    # group 6
-    fc6 = mx.symbol.Convolution(data=pool5, kernel=(7, 7), num_filter=4096,
-                workspace=workspace_default, name="fc6")
-    relu6 = mx.symbol.Activation(data=fc6, act_type="relu", name="relu6")
-    drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
-    # group 7
-    fc7 = mx.symbol.Convolution(data=drop6, kernel=(1, 1), num_filter=4096,
-                workspace=workspace_default, name="fc7")
-    relu7 = mx.symbol.Activation(data=fc7, act_type="relu", name="relu7")
-    drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
-    # group 8
-    score = mx.symbol.Convolution(data=drop7, kernel=(1, 1), num_filter=numclass,
-                workspace=workspace_default, name="score")
-    return score
-
-def fcnxs_score(input, crop, offset, kernel=(64,64), stride=(32,32), numclass=21, workspace_default=1024):
-    # score out
-    bigscore = mx.symbol.Deconvolution(data=input, kernel=kernel, stride=stride, adj=(stride[0]-1, stride[1]-1),
-               num_filter=numclass, workspace=workspace_default, name="bigscore")
-    upscore = mx.symbol.Crop(*[bigscore, crop], offset=offset, name="upscore")
-    # upscore = mx.symbol.Crop(*[input, crop], offset=offset, name="upscore")
-    softmax = mx.symbol.SoftmaxOutput(data=upscore, multi_output=True, use_ignore=True, ignore_label=255, name="softmax")
-    return softmax
-
-def get_fcn32s_symbol(numclass=21, workspace_default=1024):
-    data = mx.symbol.Variable(name="data")
-    pool3 = vgg16_pool3(data, workspace_default)
-    pool4 = vgg16_pool4(pool3, workspace_default)
-    score = vgg16_score(pool4, numclass, workspace_default)
-    softmax = fcnxs_score(score, data, offset()["fcn32s_upscore"], (64,64), (32,32), numclass, workspace_default)
-    return softmax
-
-def get_fcn16s_symbol(numclass=21, workspace_default=1024):
-    data = mx.symbol.Variable(name="data")
-    pool3 = vgg16_pool3(data, workspace_default)
-    pool4 = vgg16_pool4(pool3, workspace_default)
-    score = vgg16_score(pool4, numclass, workspace_default)
-    # score 2X
-    score2 = mx.symbol.Deconvolution(data=score, kernel=(4, 4), stride=(2, 2), num_filter=numclass,
-                 adj=(1, 1), workspace=workspace_default, name="score2")  # 2X
-    score_pool4 = mx.symbol.Convolution(data=pool4, kernel=(1, 1), num_filter=numclass,
-                 workspace=workspace_default, name="score_pool4")
-    score_pool4c = mx.symbol.Crop(*[score_pool4, score2], offset=offset()["score_pool4c"], name="score_pool4c")
-    score_fused = score2 + score_pool4c
-    softmax = fcnxs_score(score_fused, data, offset()["fcn16s_upscore"], (32, 32), (16, 16), numclass, workspace_default)
-    return softmax
-
-def get_fcn8s_symbol(numclass=21, workspace_default=1024):
-    data = mx.symbol.Variable(name="data")
-    pool3 = vgg16_pool3(data, workspace_default)
-    pool4 = vgg16_pool4(pool3, workspace_default)
-    score = vgg16_score(pool4, numclass, workspace_default)
-    # score 2X
-    score2 = mx.symbol.Deconvolution(data=score, kernel=(4, 4), stride=(2, 2),num_filter=numclass,
-                adj=(1, 1), workspace=workspace_default, name="score2")  # 2X
-    score_pool4 = mx.symbol.Convolution(data=pool4, kernel=(1, 1), num_filter=numclass,
-                workspace=workspace_default, name="score_pool4")
-    score_pool4c = mx.symbol.Crop(*[score_pool4, score2], offset=offset()["score_pool4c"], name="score_pool4c")
-    score_fused = score2 + score_pool4c
-    # score 4X
-    score4 = mx.symbol.Deconvolution(data=score_fused, kernel=(4, 4), stride=(2, 2),num_filter=numclass,
-                adj=(1, 1), workspace=workspace_default, name="score4") # 4X
-    score_pool3 = mx.symbol.Convolution(data=pool3, kernel=(1, 1), num_filter=numclass,
-                workspace=workspace_default, name="score_pool3")
-    score_pool3c = mx.symbol.Crop(*[score_pool3, score4], offset=offset()["score_pool3c"], name="score_pool3c")
-    score_final = score4 + score_pool3c
-    softmax = fcnxs_score(score_final, data, offset()["fcn8s_upscore"], (16, 16), (8, 8), numclass, workspace_default)
-    return softmax
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: skip-file
+import mxnet as mx
+
+def filter_map(kernel=1, stride=1, pad=0):
+    return (stride, (kernel-stride)/2-pad)
+
+def compose_fp(fp_first, fp_second):
+    return (fp_first[0]*fp_second[0], fp_first[0]*fp_second[1]+fp_first[1])
+
+def compose_fp_list(fp_list):
+    fp_out = (1.0, 0.0)
+    for fp in fp_list:
+        fp_out = compose_fp(fp_out, fp)
+    return fp_out
+
+def inv_fp(fp_in):
+    return (1.0/fp_in[0], -1.0*fp_in[1]/fp_in[0])
+
+def offset():
+    conv1_1_fp = filter_map(kernel=3, pad=100)
+    conv1_2_fp = conv2_1_fp = conv2_2_fp = conv3_1_fp = conv3_2_fp = conv3_3_fp \
+               = conv4_1_fp = conv4_2_fp = conv4_3_fp = conv5_1_fp = conv5_2_fp \
+               = conv5_3_fp = filter_map(kernel=3, pad=1)
+    pool1_fp = pool2_fp = pool3_fp = pool4_fp = pool5_fp = filter_map(kernel=2, stride=2)
+    fc6_fp = filter_map(kernel=7)
+    fc7_fp = score_fp = score_pool4_fp = score_pool3_fp = filter_map()
+    # for fcn-32s
+    fcn32s_upscore_fp = inv_fp(filter_map(kernel=64, stride=32))
+    fcn32s_upscore_list = [conv1_1_fp, conv1_2_fp, pool1_fp, conv2_1_fp, conv2_2_fp,
+                           pool2_fp, conv3_1_fp, conv3_2_fp, conv3_3_fp, pool3_fp,
+                           conv4_1_fp, conv4_2_fp, conv4_3_fp, pool4_fp, conv5_1_fp,
+                           conv5_2_fp, conv5_3_fp, pool5_fp, fc6_fp, fc7_fp, score_fp,
+                           fcn32s_upscore_fp]
+    crop = {}
+    crop["fcn32s_upscore"] = (-int(round(compose_fp_list(fcn32s_upscore_list)[1])),
+                              -int(round(compose_fp_list(fcn32s_upscore_list)[1])))
+    # for fcn-16s
+    score2_fp = inv_fp(filter_map(kernel=4, stride=2))
+    fcn16s_upscore_fp = inv_fp(filter_map(kernel=32, stride=16))
+    score_pool4c_fp_list = [inv_fp(score2_fp), inv_fp(score_fp), inv_fp(fc7_fp), inv_fp(fc6_fp),
+                            inv_fp(pool5_fp), inv_fp(conv5_3_fp), inv_fp(conv5_2_fp),
+                            inv_fp(conv5_1_fp), score_pool4_fp]
+    crop["score_pool4c"] = (-int(round(compose_fp_list(score_pool4c_fp_list)[1])),
+                            -int(round(compose_fp_list(score_pool4c_fp_list)[1])))
+    fcn16s_upscore_list =  [conv1_1_fp, conv1_2_fp, pool1_fp, conv2_1_fp, conv2_2_fp,
+                            pool2_fp, conv3_1_fp, conv3_2_fp, conv3_3_fp, pool3_fp,
+                            conv4_1_fp, conv4_2_fp, conv4_3_fp, pool4_fp, score_pool4_fp,
+                            inv_fp((1, -crop["score_pool4c"][0])), fcn16s_upscore_fp]
+    crop["fcn16s_upscore"] = (-int(round(compose_fp_list(fcn16s_upscore_list)[1])),
+                              -int(round(compose_fp_list(fcn16s_upscore_list)[1])))
+    # for fcn-8s
+    score4_fp = inv_fp(filter_map(kernel=4, stride=2))
+    fcn8s_upscore_fp = inv_fp(filter_map(kernel=16, stride=8))
+    score_pool3c_fp_list = [inv_fp(score4_fp), (1, -crop["score_pool4c"][0]), inv_fp(score_pool4_fp),
+                            inv_fp(pool4_fp), inv_fp(conv4_3_fp), inv_fp(conv4_2_fp),
+                            inv_fp(conv4_1_fp), score_pool3_fp, score_pool3_fp]
+    crop["score_pool3c"] = (-int(round(compose_fp_list(score_pool3c_fp_list)[1])),
+                            -int(round(compose_fp_list(score_pool3c_fp_list)[1])))
+    fcn8s_upscore_list =  [conv1_1_fp, conv1_2_fp, pool1_fp, conv2_1_fp, conv2_2_fp, pool2_fp,
+                           conv3_1_fp, conv3_2_fp, conv3_3_fp, pool3_fp, score_pool3_fp,
+                           inv_fp((1, -crop["score_pool3c"][0])), fcn8s_upscore_fp]
+    crop["fcn8s_upscore"] = (-int(round(compose_fp_list(fcn8s_upscore_list)[1])),
+                             -int(round(compose_fp_list(fcn8s_upscore_list)[1])))
+    return crop
+
+def vgg16_pool3(input, workspace_default=1024):
+    # group 1
+    conv1_1 = mx.symbol.Convolution(data=input, kernel=(3, 3), pad=(100, 100), num_filter=64,
+                workspace=workspace_default, name="conv1_1")
+    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
+    conv1_2 = mx.symbol.Convolution(data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64,
+                workspace=workspace_default, name="conv1_2")
+    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
+    pool1 = mx.symbol.Pooling(data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool1")
+    # group 2
+    conv2_1 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128,
+                workspace=workspace_default, name="conv2_1")
+    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
+    conv2_2 = mx.symbol.Convolution(data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128,
+                workspace=workspace_default, name="conv2_2")
+    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
+    pool2 = mx.symbol.Pooling(data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool2")
+    # group 3
+    conv3_1 = mx.symbol.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256,
+                workspace=workspace_default, name="conv3_1")
+    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
+    conv3_2 = mx.symbol.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256,
+                workspace=workspace_default, name="conv3_2")
+    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
+    conv3_3 = mx.symbol.Convolution(data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256,
+                workspace=workspace_default, name="conv3_3")
+    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
+    pool3 = mx.symbol.Pooling(data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool3")
+    return pool3
+
+def vgg16_pool4(input, workspace_default=1024):
+    # group 4
+    conv4_1 = mx.symbol.Convolution(data=input, kernel=(3, 3), pad=(1, 1), num_filter=512,
+                workspace=workspace_default, name="conv4_1")
+    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
+    conv4_2 = mx.symbol.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512,
+                workspace=workspace_default, name="conv4_2")
+    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
+    conv4_3 = mx.symbol.Convolution(data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512,
+                workspace=workspace_default, name="conv4_3")
+    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
+    pool4 = mx.symbol.Pooling(data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool4")
+    return pool4
+
+def vgg16_score(input, numclass, workspace_default=1024):
+    # group 5
+    conv5_1 = mx.symbol.Convolution(data=input, kernel=(3, 3), pad=(1, 1), num_filter=512,
+                workspace=workspace_default, name="conv5_1")
+    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
+    conv5_2 = mx.symbol.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512,
+                workspace=workspace_default, name="conv5_2")
+    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
+    conv5_3 = mx.symbol.Convolution(data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512,
+                workspace=workspace_default, name="conv5_3")
+    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
+    pool5 = mx.symbol.Pooling(data=relu5_3, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool5")
+    # group 6
+    fc6 = mx.symbol.Convolution(data=pool5, kernel=(7, 7), num_filter=4096,
+                workspace=workspace_default, name="fc6")
+    relu6 = mx.symbol.Activation(data=fc6, act_type="relu", name="relu6")
+    drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
+    # group 7
+    fc7 = mx.symbol.Convolution(data=drop6, kernel=(1, 1), num_filter=4096,
+                workspace=workspace_default, name="fc7")
+    relu7 = mx.symbol.Activation(data=fc7, act_type="relu", name="relu7")
+    drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
+    # group 8
+    score = mx.symbol.Convolution(data=drop7, kernel=(1, 1), num_filter=numclass,
+                workspace=workspace_default, name="score")
+    return score
+
+def fcnxs_score(input, crop, offset, kernel=(64,64), stride=(32,32), numclass=21, workspace_default=1024):
+    # score out
+    bigscore = mx.symbol.Deconvolution(data=input, kernel=kernel, stride=stride, adj=(stride[0]-1, stride[1]-1),
+               num_filter=numclass, workspace=workspace_default, name="bigscore")
+    upscore = mx.symbol.Crop(*[bigscore, crop], offset=offset, name="upscore")
+    # upscore = mx.symbol.Crop(*[input, crop], offset=offset, name="upscore")
+    softmax = mx.symbol.SoftmaxOutput(data=upscore, multi_output=True, use_ignore=True, ignore_label=255, name="softmax")
+    return softmax
+
+def get_fcn32s_symbol(numclass=21, workspace_default=1024):
+    data = mx.symbol.Variable(name="data")
+    pool3 = vgg16_pool3(data, workspace_default)
+    pool4 = vgg16_pool4(pool3, workspace_default)
+    score = vgg16_score(pool4, numclass, workspace_default)
+    softmax = fcnxs_score(score, data, offset()["fcn32s_upscore"], (64,64), (32,32), numclass, workspace_default)
+    return softmax
+
+def get_fcn16s_symbol(numclass=21, workspace_default=1024):
+    data = mx.symbol.Variable(name="data")
+    pool3 = vgg16_pool3(data, workspace_default)
+    pool4 = vgg16_pool4(pool3, workspace_default)
+    score = vgg16_score(pool4, numclass, workspace_default)
+    # score 2X
+    score2 = mx.symbol.Deconvolution(data=score, kernel=(4, 4), stride=(2, 2), num_filter=numclass,
+                 adj=(1, 1), workspace=workspace_default, name="score2")  # 2X
+    score_pool4 = mx.symbol.Convolution(data=pool4, kernel=(1, 1), num_filter=numclass,
+                 workspace=workspace_default, name="score_pool4")
+    score_pool4c = mx.symbol.Crop(*[score_pool4, score2], offset=offset()["score_pool4c"], name="score_pool4c")
+    score_fused = score2 + score_pool4c
+    softmax = fcnxs_score(score_fused, data, offset()["fcn16s_upscore"], (32, 32), (16, 16), numclass, workspace_default)
+    return softmax
+
+def get_fcn8s_symbol(numclass=21, workspace_default=1024):
+    data = mx.symbol.Variable(name="data")
+    pool3 = vgg16_pool3(data, workspace_default)
+    pool4 = vgg16_pool4(pool3, workspace_default)
+    score = vgg16_score(pool4, numclass, workspace_default)
+    # score 2X
+    score2 = mx.symbol.Deconvolution(data=score, kernel=(4, 4), stride=(2, 2),num_filter=numclass,
+                adj=(1, 1), workspace=workspace_default, name="score2")  # 2X
+    score_pool4 = mx.symbol.Convolution(data=pool4, kernel=(1, 1), num_filter=numclass,
+                workspace=workspace_default, name="score_pool4")
+    score_pool4c = mx.symbol.Crop(*[score_pool4, score2], offset=offset()["score_pool4c"], name="score_pool4c")
+    score_fused = score2 + score_pool4c
+    # score 4X
+    score4 = mx.symbol.Deconvolution(data=score_fused, kernel=(4, 4), stride=(2, 2),num_filter=numclass,
+                adj=(1, 1), workspace=workspace_default, name="score4") # 4X
+    score_pool3 = mx.symbol.Convolution(data=pool3, kernel=(1, 1), num_filter=numclass,
+                workspace=workspace_default, name="score_pool3")
+    score_pool3c = mx.symbol.Crop(*[score_pool3, score4], offset=offset()["score_pool3c"], name="score_pool3c")
+    score_final = score4 + score_pool3c
+    softmax = fcnxs_score(score_final, data, offset()["fcn8s_upscore"], (16, 16), (8, 8), numclass, workspace_default)
+    return softmax
diff --git a/example/gan/dcgan.py b/example/gan/dcgan.py
index 5faff9a..981f4a4 100644
--- a/example/gan/dcgan.py
+++ b/example/gan/dcgan.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 import numpy as np
diff --git a/example/gluon/actor_critic.py b/example/gluon/actor_critic.py
index 9c475ce..6d4474b 100644
--- a/example/gluon/actor_critic.py
+++ b/example/gluon/actor_critic.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 
 import argparse
diff --git a/example/gluon/data.py b/example/gluon/data.py
index 80a50bd..c5ddd0a 100644
--- a/example/gluon/data.py
+++ b/example/gluon/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 """ data iterator for mnist """
 import os
diff --git a/example/gluon/dcgan.py b/example/gluon/dcgan.py
index f643b28..ed814df 100644
--- a/example/gluon/dcgan.py
+++ b/example/gluon/dcgan.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import matplotlib as mpl
 mpl.use('Agg')
 from matplotlib import pyplot as plt
diff --git a/example/gluon/image_classification.py b/example/gluon/image_classification.py
index bb1fa8d..3f84ff8 100644
--- a/example/gluon/image_classification.py
+++ b/example/gluon/image_classification.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import division
 
 import argparse, time
diff --git a/example/gluon/lstm_crf.py b/example/gluon/lstm_crf.py
index 8344789..40c8c2b 100644
--- a/example/gluon/lstm_crf.py
+++ b/example/gluon/lstm_crf.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from mxnet import autograd as ag, ndarray as nd, gluon
 from mxnet.gluon import Block, nn, rnn
diff --git a/example/gluon/mnist.py b/example/gluon/mnist.py
index 9d567d5..198d7ca 100644
--- a/example/gluon/mnist.py
+++ b/example/gluon/mnist.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from __future__ import print_function
 
diff --git a/example/gluon/super_resolution.py b/example/gluon/super_resolution.py
index d61fb16..acc5ffa 100644
--- a/example/gluon/super_resolution.py
+++ b/example/gluon/super_resolution.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import argparse, tarfile
 import math
diff --git a/example/gluon/tree_lstm/dataset.py b/example/gluon/tree_lstm/dataset.py
index f9cfce5..4a836dd 100644
--- a/example/gluon/tree_lstm/dataset.py
+++ b/example/gluon/tree_lstm/dataset.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import logging
 logging.basicConfig(level=logging.INFO)
diff --git a/example/gluon/tree_lstm/fetch_and_preprocess.sh b/example/gluon/tree_lstm/fetch_and_preprocess.sh
index dfbf82a..f372392 100755
--- a/example/gluon/tree_lstm/fetch_and_preprocess.sh
+++ b/example/gluon/tree_lstm/fetch_and_preprocess.sh
@@ -1,7 +1,25 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 set -e
 python2.7 scripts/download.py
 
 CLASSPATH="lib:lib/stanford-parser/stanford-parser.jar:lib/stanford-parser/stanford-parser-3.5.1-models.jar"
 javac -cp $CLASSPATH lib/*.java
-python2.7 scripts/preprocess-sick.py
\ No newline at end of file
+python2.7 scripts/preprocess-sick.py
diff --git a/example/gluon/tree_lstm/main.py b/example/gluon/tree_lstm/main.py
index 7903e67..f04a69f 100644
--- a/example/gluon/tree_lstm/main.py
+++ b/example/gluon/tree_lstm/main.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # This example is inspired by https://github.com/dasguptar/treelstm.pytorch
 import argparse, cPickle, math, os, random
 import logging
diff --git a/example/gluon/tree_lstm/scripts/download.py b/example/gluon/tree_lstm/scripts/download.py
index d38b3a4..7ea9303 100644
--- a/example/gluon/tree_lstm/scripts/download.py
+++ b/example/gluon/tree_lstm/scripts/download.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Downloads the following:
 - Stanford parser
diff --git a/example/gluon/tree_lstm/scripts/preprocess-sick.py b/example/gluon/tree_lstm/scripts/preprocess-sick.py
index fd28b58..abbcc5f 100644
--- a/example/gluon/tree_lstm/scripts/preprocess-sick.py
+++ b/example/gluon/tree_lstm/scripts/preprocess-sick.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Preprocessing script for SICK data.
 
diff --git a/example/gluon/tree_lstm/tree_lstm.py b/example/gluon/tree_lstm/tree_lstm.py
index ced7f7e..e96fe26 100644
--- a/example/gluon/tree_lstm/tree_lstm.py
+++ b/example/gluon/tree_lstm/tree_lstm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from mxnet.gluon import Block, nn
 from mxnet.gluon.parameter import Parameter
diff --git a/example/gluon/word_language_model/data.py b/example/gluon/word_language_model/data.py
index e3a283b..913963e 100644
--- a/example/gluon/word_language_model/data.py
+++ b/example/gluon/word_language_model/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import numpy as np
 import mxnet as mx
diff --git a/example/gluon/word_language_model/get_ptb_data.sh b/example/gluon/word_language_model/get_ptb_data.sh
index 1ec009a..d2641cb 100755
--- a/example/gluon/word_language_model/get_ptb_data.sh
+++ b/example/gluon/word_language_model/get_ptb_data.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 RNN_DIR=$(cd `dirname $0`; pwd)
 DATA_DIR="${RNN_DIR}/data/"
 
diff --git a/example/gluon/word_language_model/model.py b/example/gluon/word_language_model/model.py
index 91378ce..40e7926 100644
--- a/example/gluon/word_language_model/model.py
+++ b/example/gluon/word_language_model/model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from mxnet import gluon
 from mxnet.gluon import nn, rnn
diff --git a/example/gluon/word_language_model/train.py b/example/gluon/word_language_model/train.py
index 5b34c00..0b50499 100644
--- a/example/gluon/word_language_model/train.py
+++ b/example/gluon/word_language_model/train.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import time
 import math
diff --git a/example/image-classification/benchmark.py b/example/image-classification/benchmark.py
index 5b040f3..3096fae 100644
--- a/example/image-classification/benchmark.py
+++ b/example/image-classification/benchmark.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import logging
 import argparse
diff --git a/example/image-classification/benchmark_score.py b/example/image-classification/benchmark_score.py
index f54b6ae..aeacffa 100644
--- a/example/image-classification/benchmark_score.py
+++ b/example/image-classification/benchmark_score.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Benchmark the scoring performance on various CNNs
 """
diff --git a/example/image-classification/common/data.py b/example/image-classification/common/data.py
index fe27ec2..eb694a4 100755
--- a/example/image-classification/common/data.py
+++ b/example/image-classification/common/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import random
 from mxnet.io import DataBatch, DataIter
diff --git a/example/image-classification/common/find_mxnet.py b/example/image-classification/common/find_mxnet.py
index 24dcaf8..2ce0713 100644
--- a/example/image-classification/common/find_mxnet.py
+++ b/example/image-classification/common/find_mxnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os, sys
 try:
     import mxnet as mx
diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py
index 69baed1..73235fc 100755
--- a/example/image-classification/common/fit.py
+++ b/example/image-classification/common/fit.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import logging
 import os
diff --git a/example/image-classification/common/modelzoo.py b/example/image-classification/common/modelzoo.py
index c2944cd..1fe14ca 100644
--- a/example/image-classification/common/modelzoo.py
+++ b/example/image-classification/common/modelzoo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 from util import download_file
 
diff --git a/example/image-classification/common/util.py b/example/image-classification/common/util.py
index a25e218..5f70411 100644
--- a/example/image-classification/common/util.py
+++ b/example/image-classification/common/util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import subprocess
 import os
 import errno
diff --git a/example/image-classification/data/caltech256.sh b/example/image-classification/data/caltech256.sh
index 3befdac..3fc329a 100755
--- a/example/image-classification/data/caltech256.sh
+++ b/example/image-classification/data/caltech256.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # This file download the caltech 256 dataset
 # (http://www.vision.caltech.edu/Image_Datasets/Caltech256/), and split it into
 # the train and val rec files.
diff --git a/example/image-classification/data/imagenet1k-val.sh b/example/image-classification/data/imagenet1k-val.sh
index 51f8130..13cb551 100755
--- a/example/image-classification/data/imagenet1k-val.sh
+++ b/example/image-classification/data/imagenet1k-val.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # This file download the imagnet-1k validation dataset and convert it into a rec
 # file. One need to provide the URL for the ILSVRC2012_img_val.tar, which can be
 # find at http://www.image-net.org/download-images
diff --git a/example/image-classification/fine-tune.py b/example/image-classification/fine-tune.py
index 5a2a04d..a5fb243 100644
--- a/example/image-classification/fine-tune.py
+++ b/example/image-classification/fine-tune.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import argparse
 import logging
diff --git a/example/image-classification/predict-cpp/image-classification-predict.cc b/example/image-classification/predict-cpp/image-classification-predict.cc
index a8652c4..fb74ed9 100644
--- a/example/image-classification/predict-cpp/image-classification-predict.cc
+++ b/example/image-classification/predict-cpp/image-classification-predict.cc
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  *  Copyright (c) 2015 by Xiao Liu, pertusa, caprice-j
  * \file image_classification-predict.cpp
@@ -196,7 +215,7 @@ int main(int argc, char* argv[]) {
     const mx_uint input_shape_indptr[2] = { 0, 4 };
     const mx_uint input_shape_data[4] = { 1,
                                         static_cast<mx_uint>(channels),
-                                        static_cast<mx_uint>(height), 
+                                        static_cast<mx_uint>(height),
                                         static_cast<mx_uint>(width)};
     PredictorHandle pred_hnd = 0;
 
diff --git a/example/image-classification/score.py b/example/image-classification/score.py
index d26dddd..f40e649 100644
--- a/example/image-classification/score.py
+++ b/example/image-classification/score.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 from common import modelzoo, find_mxnet
 import mxnet as mx
diff --git a/example/image-classification/symbols/alexnet.py b/example/image-classification/symbols/alexnet.py
index e2b512b..f945b9f 100755
--- a/example/image-classification/symbols/alexnet.py
+++ b/example/image-classification/symbols/alexnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Reference:
 
diff --git a/example/image-classification/symbols/googlenet.py b/example/image-classification/symbols/googlenet.py
index cc8c7ad..05f33da 100644
--- a/example/image-classification/symbols/googlenet.py
+++ b/example/image-classification/symbols/googlenet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """References:
 
 Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir
diff --git a/example/image-classification/symbols/inception-bn.py b/example/image-classification/symbols/inception-bn.py
index 7dae9ad..84934a5 100644
--- a/example/image-classification/symbols/inception-bn.py
+++ b/example/image-classification/symbols/inception-bn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 
 Inception + BN, suitable for images with around 224 x 224
diff --git a/example/image-classification/symbols/inception-resnet-v2.py b/example/image-classification/symbols/inception-resnet-v2.py
index b2b0c60..5f31335 100644
--- a/example/image-classification/symbols/inception-resnet-v2.py
+++ b/example/image-classification/symbols/inception-resnet-v2.py
@@ -1,9 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
-Contains the definition of the Inception Resnet V2 architecture.		
-As described in http://arxiv.org/abs/1602.07261.		
-Inception-v4, Inception-ResNet and the Impact of Residual Connections		
-on Learning		
-Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi		
+Contains the definition of the Inception Resnet V2 architecture.
+As described in http://arxiv.org/abs/1602.07261.
+Inception-v4, Inception-ResNet and the Impact of Residual Connections
+on Learning
+Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
 """
 import mxnet as mx
 
diff --git a/example/image-classification/symbols/inception-v3.py b/example/image-classification/symbols/inception-v3.py
index 35562d6..5108579 100644
--- a/example/image-classification/symbols/inception-v3.py
+++ b/example/image-classification/symbols/inception-v3.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Inception V3, suitable for images with around 299 x 299
 
diff --git a/example/image-classification/symbols/inception-v4.py b/example/image-classification/symbols/inception-v4.py
index eead5f7..2b4fe6f 100644
--- a/example/image-classification/symbols/inception-v4.py
+++ b/example/image-classification/symbols/inception-v4.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # -*- coding:utf-8 -*-
 __author__ = 'zhangshuai'
 modified_date = '16/7/5'
diff --git a/example/image-classification/symbols/lenet.py b/example/image-classification/symbols/lenet.py
index 6df0299..f2cc106 100644
--- a/example/image-classification/symbols/lenet.py
+++ b/example/image-classification/symbols/lenet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner.
 Gradient-based learning applied to document recognition.
diff --git a/example/image-classification/symbols/mlp.py b/example/image-classification/symbols/mlp.py
index cc569bc..4b190b2 100644
--- a/example/image-classification/symbols/mlp.py
+++ b/example/image-classification/symbols/mlp.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 a simple multilayer perceptron
 """
diff --git a/example/image-classification/symbols/mobilenet.py b/example/image-classification/symbols/mobilenet.py
index 8ad584a..42b9636 100644
--- a/example/image-classification/symbols/mobilenet.py
+++ b/example/image-classification/symbols/mobilenet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 def Conv(data, num_filter=1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), num_group=1, name=None, suffix=''):
diff --git a/example/image-classification/symbols/resnet-v1.py b/example/image-classification/symbols/resnet-v1.py
index 0d7bee1..e5752f7 100755
--- a/example/image-classification/symbols/resnet-v1.py
+++ b/example/image-classification/symbols/resnet-v1.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 '''
 Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
 (Original author Wei Wu) by Antti-Pekka Hynninen
diff --git a/example/image-classification/symbols/resnet.py b/example/image-classification/symbols/resnet.py
index 41cbc82..be49860 100644
--- a/example/image-classification/symbols/resnet.py
+++ b/example/image-classification/symbols/resnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 '''
 Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
 Original author Wei Wu
diff --git a/example/image-classification/symbols/resnext.py b/example/image-classification/symbols/resnext.py
index bd5b656..5974943 100644
--- a/example/image-classification/symbols/resnext.py
+++ b/example/image-classification/symbols/resnext.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 '''
 Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
 Original author Wei Wu
@@ -29,19 +46,19 @@ def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, n
     """
     if bottle_neck:
         # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
-        
+
         conv1 = mx.sym.Convolution(data=data, num_filter=int(num_filter*0.5), kernel=(1,1), stride=(1,1), pad=(0,0),
                                       no_bias=True, workspace=workspace, name=name + '_conv1')
         bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
         act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
 
-        
+
         conv2 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.5), num_group=num_group, kernel=(3,3), stride=stride, pad=(1,1),
                                       no_bias=True, workspace=workspace, name=name + '_conv2')
         bn2 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
         act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
 
-        
+
         conv3 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
                                    workspace=workspace, name=name + '_conv3')
         bn3 = mx.sym.BatchNorm(data=conv3, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
@@ -58,13 +75,13 @@ def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, n
         eltwise =  bn3 + shortcut
         return mx.sym.Activation(data=eltwise, act_type='relu', name=name + '_relu')
     else:
-        
+
         conv1 = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
                                       no_bias=True, workspace=workspace, name=name + '_conv1')
         bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
         act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
 
-        
+
         conv2 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
                                       no_bias=True, workspace=workspace, name=name + '_conv2')
         bn2 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
@@ -124,12 +141,12 @@ def resnext(units, num_stages, filter_list, num_classes, num_group, image_shape,
 
     for i in range(num_stages):
         body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
-                             name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, num_group=num_group, 
+                             name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, num_group=num_group,
                              bn_mom=bn_mom, workspace=workspace, memonger=memonger)
         for j in range(units[i]-1):
             body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
                                  bottle_neck=bottle_neck, num_group=num_group, bn_mom=bn_mom, workspace=workspace, memonger=memonger)
-            
+
     pool1 = mx.sym.Pooling(data=body, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
     flat = mx.sym.Flatten(data=pool1)
     fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
@@ -186,7 +203,7 @@ def get_symbol(num_classes, num_layers, image_shape, num_group=32, conv_workspac
                   num_stages  = num_stages,
                   filter_list = filter_list,
                   num_classes = num_classes,
-                  num_group   = num_group, 
+                  num_group   = num_group,
                   image_shape = image_shape,
                   bottle_neck = bottle_neck,
                   workspace   = conv_workspace,
diff --git a/example/image-classification/symbols/vgg.py b/example/image-classification/symbols/vgg.py
index 8dae74f..ca10136 100644
--- a/example/image-classification/symbols/vgg.py
+++ b/example/image-classification/symbols/vgg.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """References:
 
 Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
diff --git a/example/image-classification/test_score.py b/example/image-classification/test_score.py
index 19a1d30..0789c92 100644
--- a/example/image-classification/test_score.py
+++ b/example/image-classification/test_score.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 test pretrained models
 """
diff --git a/example/image-classification/train_cifar10.py b/example/image-classification/train_cifar10.py
index 0186233..7eb56eb 100644
--- a/example/image-classification/train_cifar10.py
+++ b/example/image-classification/train_cifar10.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import argparse
 import logging
diff --git a/example/image-classification/train_imagenet.py b/example/image-classification/train_imagenet.py
index 760ea6b..5760a9a 100644
--- a/example/image-classification/train_imagenet.py
+++ b/example/image-classification/train_imagenet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import argparse
 import logging
diff --git a/example/image-classification/train_mnist.py b/example/image-classification/train_mnist.py
index 31ecbfb..2bc4289 100644
--- a/example/image-classification/train_mnist.py
+++ b/example/image-classification/train_mnist.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Train mnist, see more explanation at http://mxnet.io/tutorials/python/mnist.html
 """
@@ -53,9 +70,9 @@ if __name__ == '__main__':
                         help='the number of classes')
     parser.add_argument('--num-examples', type=int, default=60000,
                         help='the number of training examples')
-    
+
     parser.add_argument('--add_stn',  action="store_true", default=False, help='Add Spatial Transformer Network Layer (lenet only)')
-    
+
     fit.add_fit_args(parser)
     parser.set_defaults(
         # network
diff --git a/example/kaggle-ndsb1/gen_img_list.py b/example/kaggle-ndsb1/gen_img_list.py
index 2da5d70..adfc4fe 100644
--- a/example/kaggle-ndsb1/gen_img_list.py
+++ b/example/kaggle-ndsb1/gen_img_list.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import csv
 import os
@@ -26,7 +43,7 @@ random.seed(888)
 
 fo_name=os.path.join(args.out_folder+args.out_file)
 fo = csv.writer(open(fo_name, "w"), delimiter='\t', lineterminator='\n')
-    
+
 if args.train:
     tr_fo_name=os.path.join(args.out_folder+"tr.lst")
     va_fo_name=os.path.join(args.out_folder+"va.lst")
@@ -58,7 +75,7 @@ random.shuffle(img_lst)
 #write
 for item in img_lst:
     fo.writerow(item)
-        
+
 
 
 ## If training, split into train and validation lists (tr.lst and va.lst)
diff --git a/example/kaggle-ndsb1/predict_dsb.py b/example/kaggle-ndsb1/predict_dsb.py
index 483243a..2be2ecc 100644
--- a/example/kaggle-ndsb1/predict_dsb.py
+++ b/example/kaggle-ndsb1/predict_dsb.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import find_mxnet
 import submission_dsb
diff --git a/example/kaggle-ndsb1/submission_dsb.py b/example/kaggle-ndsb1/submission_dsb.py
index a2644f8..2695c1a 100644
--- a/example/kaggle-ndsb1/submission_dsb.py
+++ b/example/kaggle-ndsb1/submission_dsb.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import pandas as pd
 import os
@@ -14,9 +31,9 @@ def gen_sub(predictions,test_lst_path="test.lst",submission_path="submission.csv
     ## check sampleSubmission.csv from kaggle website to view submission format
     header = "acantharia_protist_big_center,acantharia_protist_halo,acantharia_protist,amphipods,appendicularian_fritillaridae,appendicularian_s_shape,appendicularian_slight_curve,appendicularian_straight,artifacts_edge,artifacts,chaetognath_non_sagitta,chaetognath_other,chaetognath_sagitta,chordate_type1,copepod_calanoid_eggs,copepod_calanoid_eucalanus,copepod_calanoid_flatheads,copepod_calanoid_frillyAntennae,copepod_calanoid_large_side_antennatucked,copepod_calanoid_large,copepod_cala [...]
 
-        
+
     # read first line to know the number of columns and column to use
-    img_lst = pd.read_csv(test_lst_path,sep="/",header=None, nrows=1) 
+    img_lst = pd.read_csv(test_lst_path,sep="/",header=None, nrows=1)
     columns = img_lst.columns.tolist() # get the columns
     cols_to_use = columns[len(columns)-1] # drop the last one
     cols_to_use= map(int, str(cols_to_use)) ## convert scalar to list
@@ -28,15 +45,15 @@ def gen_sub(predictions,test_lst_path="test.lst",submission_path="submission.csv
 
     df = pd.DataFrame(predictions,columns = header, index=img_lst)
     df.index.name = 'image'
-    
+
     print("Saving csv to %s" % submission_path)
     df.to_csv(submission_path)
-     
+
     print("Compress with gzip")
     os.system("gzip -f %s" % submission_path)
-    
+
     print("  stored in %s.gz" % submission_path)
 
-   
+
 
 
diff --git a/example/kaggle-ndsb1/symbol_dsb.py b/example/kaggle-ndsb1/symbol_dsb.py
index 43898a1..0a4db8f 100644
--- a/example/kaggle-ndsb1/symbol_dsb.py
+++ b/example/kaggle-ndsb1/symbol_dsb.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import find_mxnet
 import mxnet as mx
 
diff --git a/example/kaggle-ndsb1/train_dsb.py b/example/kaggle-ndsb1/train_dsb.py
index 19beb02..5cec0f6 100644
--- a/example/kaggle-ndsb1/train_dsb.py
+++ b/example/kaggle-ndsb1/train_dsb.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import find_mxnet
 import mxnet as mx
 import logging
@@ -23,7 +40,7 @@ parser.add_argument('--clip-gradient', type=float, default=5.,
                     help='clip min/max gradient to prevent extreme value')
 parser.add_argument('--num-epochs', type=int, default=100,
                     help='the number of training epochs')
-parser.add_argument('--load-epoch', type=int, 
+parser.add_argument('--load-epoch', type=int,
                     help="load the model on an epoch using the model-prefix")
 parser.add_argument('--batch-size', type=int, default=64,
                     help='the batch size')
@@ -35,7 +52,7 @@ parser.add_argument('--num-examples', type=int, default=20000,
                     help='the number of training examples')
 parser.add_argument('--num-classes', type=int, default=121,
                     help='the number of classes')
-parser.add_argument('--log-file', type=str, 
+parser.add_argument('--log-file', type=str,
 		    help='the name of log file')
 parser.add_argument('--log-dir', type=str, default="/tmp/",
                     help='directory of the log file')
@@ -63,7 +80,7 @@ def get_iterator(args, kv):
         rand_crop   = True,
         rand_mirror = True,
     )
-    
+
     # validate data iterator
     val = mx.io.ImageRecordIter(
         path_imgrec = args.data_dir + "va.rec",
diff --git a/example/kaggle-ndsb1/training_curves.py b/example/kaggle-ndsb1/training_curves.py
index e4ffd94..67f25f0 100644
--- a/example/kaggle-ndsb1/training_curves.py
+++ b/example/kaggle-ndsb1/training_curves.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ## based on https://github.com/dmlc/mxnet/issues/1302
 ## Parses the model fit log file and generates a train/val vs epoch plot
 import matplotlib.pyplot as plt
diff --git a/example/kaggle-ndsb2/Preprocessing.py b/example/kaggle-ndsb2/Preprocessing.py
index 64d15e0..29b4ba0 100644
--- a/example/kaggle-ndsb2/Preprocessing.py
+++ b/example/kaggle-ndsb2/Preprocessing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Preprocessing script.
 
 This script walks over the directories and dump the frames into a csv file
diff --git a/example/kaggle-ndsb2/Train.py b/example/kaggle-ndsb2/Train.py
index 22aa3ed..51e308a 100644
--- a/example/kaggle-ndsb2/Train.py
+++ b/example/kaggle-ndsb2/Train.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Training script, this is converted from a ipython notebook
 """
 
diff --git a/example/memcost/inception_memcost.py b/example/memcost/inception_memcost.py
index 45e8590..c539e73 100644
--- a/example/memcost/inception_memcost.py
+++ b/example/memcost/inception_memcost.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import sys
 sys.path.append('../../python/')
diff --git a/example/model-parallel-lstm/get_ptb_data.sh b/example/model-parallel-lstm/get_ptb_data.sh
index 1ec009a..d2641cb 100755
--- a/example/model-parallel-lstm/get_ptb_data.sh
+++ b/example/model-parallel-lstm/get_ptb_data.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 RNN_DIR=$(cd `dirname $0`; pwd)
 DATA_DIR="${RNN_DIR}/data/"
 
diff --git a/example/model-parallel-lstm/lstm.py b/example/model-parallel-lstm/lstm.py
index 795eb6e..c24017f 100644
--- a/example/model-parallel-lstm/lstm.py
+++ b/example/model-parallel-lstm/lstm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import sys
 sys.path.insert(0, "../../python")
@@ -152,7 +169,7 @@ def setup_rnn_model(default_ctx,
     models = {}
     buckets.reverse()
     for bucket_key in buckets:
-        # bind max_len first 
+        # bind max_len first
         rnn_sym = lstm_unroll(num_lstm_layer=num_lstm_layer,
                           num_hidden=num_hidden,
                           seq_len=seq_len,
@@ -190,7 +207,7 @@ def setup_rnn_model(default_ctx,
                 args_grad[name] = mx.nd.zeros(shape, ctx)
             if not name.startswith("t"):
                 print("%s group=%s, ctx=%s" % (name, group, str(ctx)))
-        
+
         #bind with shared executor
         rnn_exec = None
         if max_len == bucket_key:
@@ -220,7 +237,7 @@ def setup_rnn_model(default_ctx,
                              h=arg_dict["l%d_init_h" % i]) for i in range(num_lstm_layer)]
 
         seq_data = [rnn_exec.arg_dict["t%d_data" % i] for i in range(seq_len)]
-        # we don't need to store the last state 
+        # we don't need to store the last state
         last_states = None
 
         if concat_decode:
@@ -235,7 +252,7 @@ def setup_rnn_model(default_ctx,
                      seq_data=seq_data, seq_labels=seq_labels, seq_outputs=seq_outputs,
                      param_blocks=param_blocks)
         models[bucket_key] = model
-    buckets.reverse()    
+    buckets.reverse()
     return models
 
 
@@ -256,7 +273,7 @@ def set_rnn_inputs(m, X, begin):
 def set_rnn_inputs_from_batch(m, batch, batch_seq_length, batch_size):
   X = batch.data
   for seqidx in range(batch_seq_length):
-    idx = seqidx 
+    idx = seqidx
     next_idx = (seqidx + 1) % batch_seq_length
     x = X[idx, :]
     y = X[next_idx, :]
@@ -295,20 +312,20 @@ def train_lstm(model, X_train_batch, X_val_batch,
         nbatch = 0
         train_nll = 0
         tic = time.time()
-        for data_batch in X_train_batch:  
+        for data_batch in X_train_batch:
             batch_seq_length = data_batch.bucket_key
             m = model[batch_seq_length]
             # reset init state
             for state in m.init_states:
               state.c[:] = 0.0
               state.h[:] = 0.0
-              
+
             head_grad = []
             if use_loss:
               ctx = m.seq_outputs[0].context
               head_grad = [mx.nd.ones((1,), ctx) for x in m.seq_outputs]
 
-            set_rnn_inputs_from_batch(m, data_batch, batch_seq_length, batch_size)  
+            set_rnn_inputs_from_batch(m, data_batch, batch_seq_length, batch_size)
 
             m.rnn_exec.forward(is_train=True)
             # probability of each label class, used to evaluate nll
@@ -390,7 +407,7 @@ def train_lstm(model, X_train_batch, X_val_batch,
             else:
                 val_nll += sum([x.asscalar() for x in seq_loss]) / batch_size
             nbatch += batch_size
-            
+
         perp = np.exp(val_nll / nbatch)
         print("Iter [%d] Val: NLL=%.3f, Perp=%.3f" % (
             iteration, val_nll / nbatch, np.exp(val_nll / nbatch)))
@@ -401,7 +418,7 @@ def train_lstm(model, X_train_batch, X_val_batch,
         X_val_batch.reset()
         X_train_batch.reset()
 
-# is this function being used? 
+# is this function being used?
 def setup_rnn_sample_model(ctx,
                            params,
                            num_lstm_layer,
diff --git a/example/model-parallel-lstm/lstm_ptb.py b/example/model-parallel-lstm/lstm_ptb.py
index 20ce896..0141338 100644
--- a/example/model-parallel-lstm/lstm_ptb.py
+++ b/example/model-parallel-lstm/lstm_ptb.py
@@ -1,10 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import lstm
 import sys
 sys.path.insert(0, "../../python")
 import mxnet as mx
 import numpy as np
-# reuse the bucket_io library 
+# reuse the bucket_io library
 sys.path.insert(0, "../rnn")
 from bucket_io import BucketSentenceIter, default_build_vocab
 
diff --git a/example/module/lstm_bucketing.py b/example/module/lstm_bucketing.py
index dc00ef5..ecc7e7b 100644
--- a/example/module/lstm_bucketing.py
+++ b/example/module/lstm_bucketing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 import sys
diff --git a/example/module/mnist_mlp.py b/example/module/mnist_mlp.py
index 6d9d6bf..d2737dc 100644
--- a/example/module/mnist_mlp.py
+++ b/example/module/mnist_mlp.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import os, sys
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
diff --git a/example/module/python_loss.py b/example/module/python_loss.py
index d139789..9680ac6 100644
--- a/example/module/python_loss.py
+++ b/example/module/python_loss.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import numpy as np
 import mxnet as mx
diff --git a/example/module/sequential_module.py b/example/module/sequential_module.py
index 4659457..48e1046 100644
--- a/example/module/sequential_module.py
+++ b/example/module/sequential_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import os, sys
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
diff --git a/example/module/train_cifar10.py b/example/module/train_cifar10.py
index ec3be57..a96e8d9 100644
--- a/example/module/train_cifar10.py
+++ b/example/module/train_cifar10.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Train CIFAR-10 classifier in MXNet.
 Demonstrates using the Module class.
 """
@@ -132,7 +149,7 @@ def do_train(args, callback_args=None):
     (train, val) = get_iterator(args, kv)
 
     if args.gpus is None or args.gpus == '':
-        devs = mx.cpu() 
+        devs = mx.cpu()
     elif type(args.gpus) == str:
         devs = [mx.gpu(int(i)) for i in args.gpus.split(',')]
     else:
diff --git a/example/multi-task/data.py b/example/multi-task/data.py
index d39821f..0ca8e1f 100644
--- a/example/multi-task/data.py
+++ b/example/multi-task/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 """ data iterator for mnist """
 import sys
diff --git a/example/multi-task/example_multi_task.py b/example/multi-task/example_multi_task.py
index 8ee396f..853d435 100644
--- a/example/multi-task/example_multi_task.py
+++ b/example/multi-task/example_multi_task.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import sys
 sys.path.insert(0, "../../python/")
diff --git a/example/nce-loss/get_text8.sh b/example/nce-loss/get_text8.sh
index ccd4a08..e1390eb 100755
--- a/example/nce-loss/get_text8.sh
+++ b/example/nce-loss/get_text8.sh
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 mkdir -p ./data/
 cd ./data/
 wget http://mattmahoney.net/dc/text8.zip
diff --git a/example/nce-loss/lstm_word.py b/example/nce-loss/lstm_word.py
index 3b39207..2372991 100644
--- a/example/nce-loss/lstm_word.py
+++ b/example/nce-loss/lstm_word.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 from __future__ import print_function
 import logging
@@ -55,7 +72,7 @@ def get_net(vocab_size, seq_len, num_label, num_lstm_layer, num_hidden):
         state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i),
                           h=mx.sym.Variable("l%d_init_h" % i))
         last_states.append(state)
-        
+
     data = mx.sym.Variable('data')
     label = mx.sym.Variable('label')
     label_weight = mx.sym.Variable('label_weight')
@@ -76,7 +93,7 @@ def get_net(vocab_size, seq_len, num_label, num_lstm_layer, num_hidden):
     probs = []
     for seqidx in range(seq_len):
         hidden = datavec[seqidx]
-        
+
         for i in range(num_lstm_layer):
             next_state = lstm(num_hidden, indata = hidden,
                               prev_state = last_states[i],
@@ -84,7 +101,7 @@ def get_net(vocab_size, seq_len, num_label, num_lstm_layer, num_hidden):
                               seqidx = seqidx, layeridx = i)
             hidden = next_state.h
             last_states[i] = next_state
-            
+
         probs.append(nce_loss(data = hidden,
                               label = labelvec[seqidx],
                               label_weight = labelweightvec[seqidx],
@@ -149,7 +166,7 @@ class DataIter(mx.io.DataIter):
         self.provide_data = [('data', (batch_size, seq_len))] + init_states
         self.provide_label = [('label', (self.batch_size, seq_len, num_label)),
                               ('label_weight', (self.batch_size, seq_len, num_label))]
-        
+
     def sample_ne(self):
         return self.negative[random.randint(0, len(self.negative) - 1)]
 
@@ -203,7 +220,7 @@ if __name__ == '__main__':
 
     data_train = DataIter("./data/text8", batch_size, seq_len, num_label,
                           init_states)
-    
+
     network = get_net(data_train.vocab_size, seq_len, num_label, num_lstm_layer, num_hidden)
     options, args = parser.parse_args()
     devs = mx.cpu()
@@ -216,7 +233,7 @@ if __name__ == '__main__':
                                  momentum = 0.9,
                                  wd = 0.0000,
                                  initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
-    
+
     metric = NceLSTMAuc()
     model.fit(X = data_train,
               eval_metric = metric,
diff --git a/example/nce-loss/nce.py b/example/nce-loss/nce.py
index abe4135..7f57dfd 100644
--- a/example/nce-loss/nce.py
+++ b/example/nce-loss/nce.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import sys
 sys.path.insert(0, "../../python")
diff --git a/example/nce-loss/toy_nce.py b/example/nce-loss/toy_nce.py
index 9770be0..39da7c7 100644
--- a/example/nce-loss/toy_nce.py
+++ b/example/nce-loss/toy_nce.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import logging
 import sys, random, time
@@ -19,7 +36,7 @@ def get_net(vocab_size, num_label):
                     embed_weight = embed_weight,
                     vocab_size = vocab_size,
                     num_hidden = 100,
-                    num_label = num_label)    
+                    num_label = num_label)
     return ret
 
 class SimpleBatch(object):
@@ -91,10 +108,10 @@ if __name__ == '__main__':
     vocab_size = 10000
     feature_size = 100
     num_label = 6
-    
+
     data_train = DataIter(100000, batch_size, vocab_size, num_label, feature_size)
     data_test = DataIter(1000, batch_size, vocab_size, num_label, feature_size)
-    
+
     network = get_net(vocab_size, num_label)
     devs = [mx.cpu()]
     model = mx.model.FeedForward(ctx = devs,
@@ -104,7 +121,7 @@ if __name__ == '__main__':
                                  momentum = 0.9,
                                  wd = 0.00001,
                                  initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
-    
+
     metric = NceAccuracy()
     model.fit(X = data_train, eval_data = data_test,
               eval_metric = metric,
diff --git a/example/nce-loss/toy_softmax.py b/example/nce-loss/toy_softmax.py
index 66f9cdc..ff6ff43 100644
--- a/example/nce-loss/toy_softmax.py
+++ b/example/nce-loss/toy_softmax.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import logging
 import sys, random, time
@@ -83,7 +100,7 @@ if __name__ == '__main__':
 
     data_train = DataIter(100000, batch_size, vocab_size, num_label, feature_size)
     data_test = DataIter(1000, batch_size, vocab_size, num_label, feature_size)
-    
+
     network = get_net(vocab_size)
     devs = mx.cpu()
     model = mx.model.FeedForward(ctx = devs,
@@ -93,7 +110,7 @@ if __name__ == '__main__':
                                  momentum = 0.9,
                                  wd = 0.0000,
                                  initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
-    
+
     model.fit(X = data_train, eval_data = data_test,
               batch_end_callback = mx.callback.Speedometer(batch_size, 50),)
 
diff --git a/example/nce-loss/wordvec.py b/example/nce-loss/wordvec.py
index 24b7830..887d586 100644
--- a/example/nce-loss/wordvec.py
+++ b/example/nce-loss/wordvec.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 from __future__ import print_function
 import logging
@@ -30,7 +47,7 @@ def get_net(vocab_size, num_input, num_label):
                     embed_weight = embed_weight,
                     vocab_size = vocab_size,
                     num_hidden = 100,
-                    num_label = num_label)    
+                    num_label = num_label)
 
 def load_data(name):
     buf = open(name).read()
@@ -82,7 +99,7 @@ class DataIter(mx.io.DataIter):
         self.provide_data = [('data', (batch_size, num_label - 1))]
         self.provide_label = [('label', (self.batch_size, num_label)),
                               ('label_weight', (self.batch_size, num_label))]
-        
+
     def sample_ne(self):
         return self.negative[random.randint(0, len(self.negative) - 1)]
 
@@ -126,11 +143,11 @@ if __name__ == '__main__':
                       help = "use gpu")
     batch_size = 256
     num_label = 5
-    
+
     data_train = DataIter("./data/text8", batch_size, num_label)
-    
+
     network = get_net(data_train.vocab_size, num_label - 1, num_label)
-    
+
     options, args = parser.parse_args()
     devs = mx.cpu()
     if options.gpu == True:
@@ -143,7 +160,7 @@ if __name__ == '__main__':
                                  wd = 0.0000,
                                  initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
 
-    
+
     metric = NceAuc()
     model.fit(X = data_train,
               eval_metric = metric,
diff --git a/example/nce-loss/wordvec_subwords.py b/example/nce-loss/wordvec_subwords.py
index 049dc9d..c8d46a1 100644
--- a/example/nce-loss/wordvec_subwords.py
+++ b/example/nce-loss/wordvec_subwords.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import logging
 import sys, random, time, math
diff --git a/example/neural-style/download.sh b/example/neural-style/download.sh
index a58640a..d5303a7 100755
--- a/example/neural-style/download.sh
+++ b/example/neural-style/download.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 mkdir -p model
 cd model
 wget https://github.com/dmlc/web-data/raw/master/mxnet/neural-style/model/vgg19.params
diff --git a/example/neural-style/end_to_end/basic.py b/example/neural-style/end_to_end/basic.py
index ed9d3f6..1763e88 100644
--- a/example/neural-style/end_to_end/basic.py
+++ b/example/neural-style/end_to_end/basic.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 sys.path.insert(0, "../../mxnet/python/")
 
diff --git a/example/neural-style/end_to_end/boost_inference.py b/example/neural-style/end_to_end/boost_inference.py
index 72427be..0ec8308 100644
--- a/example/neural-style/end_to_end/boost_inference.py
+++ b/example/neural-style/end_to_end/boost_inference.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 sys.path.insert(0, "../mxnet/python")
 
diff --git a/example/neural-style/end_to_end/boost_train.py b/example/neural-style/end_to_end/boost_train.py
index 9100cc1..fa525e7 100644
--- a/example/neural-style/end_to_end/boost_train.py
+++ b/example/neural-style/end_to_end/boost_train.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 sys.path.insert(0, "../../mxnet/python")
 
diff --git a/example/neural-style/end_to_end/data_processing.py b/example/neural-style/end_to_end/data_processing.py
index 80f1bcd..1c1ab49 100644
--- a/example/neural-style/end_to_end/data_processing.py
+++ b/example/neural-style/end_to_end/data_processing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 from skimage import io, transform
 from skimage.restoration import denoise_tv_chambolle
diff --git a/example/neural-style/end_to_end/gen_v3.py b/example/neural-style/end_to_end/gen_v3.py
index dbc83b1..7962e68 100644
--- a/example/neural-style/end_to_end/gen_v3.py
+++ b/example/neural-style/end_to_end/gen_v3.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 # coding: utf-8
 
diff --git a/example/neural-style/end_to_end/gen_v4.py b/example/neural-style/end_to_end/gen_v4.py
index 379e904..fb4e6d1 100644
--- a/example/neural-style/end_to_end/gen_v4.py
+++ b/example/neural-style/end_to_end/gen_v4.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 # coding: utf-8
 
diff --git a/example/neural-style/end_to_end/model_vgg19.py b/example/neural-style/end_to_end/model_vgg19.py
index 6e287b5..0d369ae 100644
--- a/example/neural-style/end_to_end/model_vgg19.py
+++ b/example/neural-style/end_to_end/model_vgg19.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import os, sys
 from collections import namedtuple
diff --git a/example/neural-style/find_mxnet.py b/example/neural-style/find_mxnet.py
index 2e3970d..b919a2a 100644
--- a/example/neural-style/find_mxnet.py
+++ b/example/neural-style/find_mxnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 try:
     import mxnet as mx
 except ImportError:
diff --git a/example/neural-style/model_vgg19.py b/example/neural-style/model_vgg19.py
index 3344a27..aa83bc3 100644
--- a/example/neural-style/model_vgg19.py
+++ b/example/neural-style/model_vgg19.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import find_mxnet
 import mxnet as mx
 import os, sys
diff --git a/example/neural-style/nstyle.py b/example/neural-style/nstyle.py
index 3eec33d..e3bc8bc 100644
--- a/example/neural-style/nstyle.py
+++ b/example/neural-style/nstyle.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import find_mxnet
 import mxnet as mx
 import numpy as np
@@ -196,7 +213,7 @@ def train_nstyle(args, callback=None):
     img = mx.nd.zeros(content_np.shape, ctx=dev)
     img[:] = mx.rnd.uniform(-0.1, 0.1, img.shape)
 
-    lr = mx.lr_scheduler.FactorScheduler(step=args.lr_sched_delay, 
+    lr = mx.lr_scheduler.FactorScheduler(step=args.lr_sched_delay,
             factor=args.lr_sched_factor)
 
     optimizer = mx.optimizer.NAG(
diff --git a/example/numpy-ops/custom_softmax.py b/example/numpy-ops/custom_softmax.py
index cbd9a02..162215f 100644
--- a/example/numpy-ops/custom_softmax.py
+++ b/example/numpy-ops/custom_softmax.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import os
 from data import mnist_iterator
@@ -23,7 +40,7 @@ class Softmax(mx.operator.CustomOp):
 class SoftmaxProp(mx.operator.CustomOpProp):
     def __init__(self):
         super(SoftmaxProp, self).__init__(need_top_grad=False)
-    
+
     def list_arguments(self):
         return ['data', 'label']
 
diff --git a/example/numpy-ops/data.py b/example/numpy-ops/data.py
index d39821f..0ca8e1f 100644
--- a/example/numpy-ops/data.py
+++ b/example/numpy-ops/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 """ data iterator for mnist """
 import sys
diff --git a/example/numpy-ops/ndarray_softmax.py b/example/numpy-ops/ndarray_softmax.py
index 5c31768..aa8555e 100644
--- a/example/numpy-ops/ndarray_softmax.py
+++ b/example/numpy-ops/ndarray_softmax.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from data import mnist_iterator
 import mxnet as mx
@@ -10,7 +27,7 @@ class NDArraySoftmax(mx.operator.NDArrayOp):
         super(NDArraySoftmax, self).__init__(False)
         self.fwd_kernel = None
         self.bwd_kernel = None
-    
+
     def list_arguments(self):
         return ['data', 'label']
 
diff --git a/example/numpy-ops/numpy_softmax.py b/example/numpy-ops/numpy_softmax.py
index 3f9f6c8..f90783b 100644
--- a/example/numpy-ops/numpy_softmax.py
+++ b/example/numpy-ops/numpy_softmax.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from data import mnist_iterator
 import mxnet as mx
@@ -8,7 +25,7 @@ import logging
 class NumpySoftmax(mx.operator.NumpyOp):
     def __init__(self):
         super(NumpySoftmax, self).__init__(False)
-    
+
     def list_arguments(self):
         return ['data', 'label']
 
diff --git a/example/numpy-ops/weighted_logistic_regression.py b/example/numpy-ops/weighted_logistic_regression.py
index 7094b3a..26b5fb2 100644
--- a/example/numpy-ops/weighted_logistic_regression.py
+++ b/example/numpy-ops/weighted_logistic_regression.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import numpy as np
 import mxnet as mx
diff --git a/example/profiler/profiler_executor.py b/example/profiler/profiler_executor.py
index e70574d..26e3e1b 100644
--- a/example/profiler/profiler_executor.py
+++ b/example/profiler/profiler_executor.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import argparse
 import os, sys
diff --git a/example/profiler/profiler_imageiter.py b/example/profiler/profiler_imageiter.py
index af4c5d1..e16b9b7 100644
--- a/example/profiler/profiler_imageiter.py
+++ b/example/profiler/profiler_imageiter.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 # uncomment to set the number of worker threads.
 # os.environ["MXNET_CPU_WORKER_NTHREADS"] = "4"
@@ -8,7 +25,7 @@ import numpy as np
 
 
 def run_imageiter(path_rec, n, batch_size = 32):
-    
+
     data = mx.img.ImageIter(batch_size=batch_size,
                             data_shape=(3, 224, 224),
                             path_imgrec=path_rec,
@@ -26,4 +43,4 @@ if __name__ == '__main__':
     mx.profiler.profiler_set_config(mode='all', filename='profile_imageiter.json')
     mx.profiler.profiler_set_state('run')
     run_imageiter('test.rec', 20)  # See http://mxnet.io/tutorials/python/image_io.html for how to create .rec files.
-    mx.profiler.profiler_set_state('stop')
\ No newline at end of file
+    mx.profiler.profiler_set_state('stop')
diff --git a/example/profiler/profiler_matmul.py b/example/profiler/profiler_matmul.py
index baa9623..1b1cf74 100644
--- a/example/profiler/profiler_matmul.py
+++ b/example/profiler/profiler_matmul.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 import argparse
diff --git a/example/profiler/profiler_ndarray.py b/example/profiler/profiler_ndarray.py
index bb4d658..67ea87b 100644
--- a/example/profiler/profiler_ndarray.py
+++ b/example/profiler/profiler_ndarray.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import mxnet as mx
 import numpy as np
diff --git a/example/python-howto/data.py b/example/python-howto/data.py
index d39821f..0ca8e1f 100644
--- a/example/python-howto/data.py
+++ b/example/python-howto/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 """ data iterator for mnist """
 import sys
diff --git a/example/python-howto/data_iter.py b/example/python-howto/data_iter.py
index 34e9a41..81c8988 100644
--- a/example/python-howto/data_iter.py
+++ b/example/python-howto/data_iter.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Create a Cifar data iterator.
 
 This example shows how to create a iterator reading from recordio,
diff --git a/example/python-howto/debug_conv.py b/example/python-howto/debug_conv.py
index 3c38d20..9de421d 100644
--- a/example/python-howto/debug_conv.py
+++ b/example/python-howto/debug_conv.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 data_shape = (1,3,5,5)
@@ -19,4 +36,4 @@ mod.init_params()
 input_data = mx.nd.ones(data_shape)
 mod.forward(data_batch=SimpleData([input_data]))
 res = mod.get_outputs()[0].asnumpy()
-print(res)
\ No newline at end of file
+print(res)
diff --git a/example/python-howto/monitor_weights.py b/example/python-howto/monitor_weights.py
index 8dcca1f..c54e649 100644
--- a/example/python-howto/monitor_weights.py
+++ b/example/python-howto/monitor_weights.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from data import mnist_iterator
 import mxnet as mx
@@ -27,6 +44,6 @@ model = mx.model.FeedForward(
 def norm_stat(d):
     return mx.nd.norm(d)/np.sqrt(d.size)
 mon = mx.mon.Monitor(100, norm_stat)
-model.fit(X=train, eval_data=val, monitor=mon, 
+model.fit(X=train, eval_data=val, monitor=mon,
           batch_end_callback = mx.callback.Speedometer(100, 100))
 
diff --git a/example/python-howto/multiple_outputs.py b/example/python-howto/multiple_outputs.py
index 97ce469..43b4538 100644
--- a/example/python-howto/multiple_outputs.py
+++ b/example/python-howto/multiple_outputs.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Create a Multiple output configuration.
 
 This example shows how to create a multiple output configuration.
diff --git a/example/rcnn/demo.py b/example/rcnn/demo.py
index 34ea327..b594033 100644
--- a/example/rcnn/demo.py
+++ b/example/rcnn/demo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import os
 import cv2
diff --git a/example/rcnn/rcnn/config.py b/example/rcnn/rcnn/config.py
index 445c243..17738f0 100644
--- a/example/rcnn/rcnn/config.py
+++ b/example/rcnn/rcnn/config.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 from easydict import EasyDict as edict
 
diff --git a/example/rcnn/rcnn/core/callback.py b/example/rcnn/rcnn/core/callback.py
index 5d48b9f..bacff96 100644
--- a/example/rcnn/rcnn/core/callback.py
+++ b/example/rcnn/rcnn/core/callback.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import time
 import logging
 import mxnet as mx
diff --git a/example/rcnn/rcnn/core/loader.py b/example/rcnn/rcnn/core/loader.py
index 3f5cf3c..826ee20 100644
--- a/example/rcnn/rcnn/core/loader.py
+++ b/example/rcnn/rcnn/core/loader.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 from mxnet.executor_manager import _split_input_slice
diff --git a/example/rcnn/rcnn/core/metric.py b/example/rcnn/rcnn/core/metric.py
index 5808190..d33edb6 100644
--- a/example/rcnn/rcnn/core/metric.py
+++ b/example/rcnn/rcnn/core/metric.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 
diff --git a/example/rcnn/rcnn/core/module.py b/example/rcnn/rcnn/core/module.py
index bf28f8e..337f0f3 100644
--- a/example/rcnn/rcnn/core/module.py
+++ b/example/rcnn/rcnn/core/module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """A `MutableModule` implement the `BaseModule` API, and allows input shape
 varying with training iterations. If shapes vary, executors will rebind,
 using shared arrays from the initial module binded with maximum shape.
diff --git a/example/rcnn/rcnn/core/tester.py b/example/rcnn/rcnn/core/tester.py
index 0ccc47d..651b2a9 100644
--- a/example/rcnn/rcnn/core/tester.py
+++ b/example/rcnn/rcnn/core/tester.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import cPickle
 import os
 import time
diff --git a/example/rcnn/rcnn/cython/nms_kernel.cu b/example/rcnn/rcnn/cython/nms_kernel.cu
index 038a590..047a5e0 100644
--- a/example/rcnn/rcnn/cython/nms_kernel.cu
+++ b/example/rcnn/rcnn/cython/nms_kernel.cu
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 // ------------------------------------------------------------------
 // Faster R-CNN
 // Copyright (c) 2015 Microsoft
diff --git a/example/rcnn/rcnn/cython/setup.py b/example/rcnn/rcnn/cython/setup.py
index 7864607..e50478b 100644
--- a/example/rcnn/rcnn/cython/setup.py
+++ b/example/rcnn/rcnn/cython/setup.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # --------------------------------------------------------
 # Fast R-CNN
 # Copyright (c) 2015 Microsoft
diff --git a/example/rcnn/rcnn/dataset/__init__.py b/example/rcnn/rcnn/dataset/__init__.py
index 266f344..1a706e9 100644
--- a/example/rcnn/rcnn/dataset/__init__.py
+++ b/example/rcnn/rcnn/dataset/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from imdb import IMDB
 from pascal_voc import PascalVOC
 from coco import coco
diff --git a/example/rcnn/rcnn/dataset/coco.py b/example/rcnn/rcnn/dataset/coco.py
index 00c4c41..9ca5a74 100644
--- a/example/rcnn/rcnn/dataset/coco.py
+++ b/example/rcnn/rcnn/dataset/coco.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import cPickle
 import cv2
 import os
diff --git a/example/rcnn/rcnn/dataset/ds_utils.py b/example/rcnn/rcnn/dataset/ds_utils.py
index 131644b..e6f839b 100644
--- a/example/rcnn/rcnn/dataset/ds_utils.py
+++ b/example/rcnn/rcnn/dataset/ds_utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 
 
@@ -13,4 +30,4 @@ def filter_small_boxes(boxes, min_size):
     w = boxes[:, 2] - boxes[:, 0]
     h = boxes[:, 3] - boxes[:, 1]
     keep = np.where((w >= min_size) & (h > min_size))[0]
-    return keep
\ No newline at end of file
+    return keep
diff --git a/example/rcnn/rcnn/dataset/imdb.py b/example/rcnn/rcnn/dataset/imdb.py
index acdcd50..b9038c5 100644
--- a/example/rcnn/rcnn/dataset/imdb.py
+++ b/example/rcnn/rcnn/dataset/imdb.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 General image database
 An image database creates a list of relative image path called image_set_index and
diff --git a/example/rcnn/rcnn/dataset/pascal_voc.py b/example/rcnn/rcnn/dataset/pascal_voc.py
index 2135971..091c4e8 100644
--- a/example/rcnn/rcnn/dataset/pascal_voc.py
+++ b/example/rcnn/rcnn/dataset/pascal_voc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Pascal VOC database
 This class loads ground truth notations from standard Pascal VOC XML data formats
diff --git a/example/rcnn/rcnn/dataset/pascal_voc_eval.py b/example/rcnn/rcnn/dataset/pascal_voc_eval.py
index 54fa12d..e584ed7 100644
--- a/example/rcnn/rcnn/dataset/pascal_voc_eval.py
+++ b/example/rcnn/rcnn/dataset/pascal_voc_eval.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 given a pascal voc imdb, compute mAP
 """
diff --git a/example/rcnn/rcnn/io/image.py b/example/rcnn/rcnn/io/image.py
index d4155a0..e468e46 100644
--- a/example/rcnn/rcnn/io/image.py
+++ b/example/rcnn/rcnn/io/image.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import cv2
 import os
diff --git a/example/rcnn/rcnn/io/rcnn.py b/example/rcnn/rcnn/io/rcnn.py
index 807447c..f9613d6 100644
--- a/example/rcnn/rcnn/io/rcnn.py
+++ b/example/rcnn/rcnn/io/rcnn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Fast R-CNN:
 data =
diff --git a/example/rcnn/rcnn/io/rpn.py b/example/rcnn/rcnn/io/rpn.py
index 52fe1a5..20cd1ce 100644
--- a/example/rcnn/rcnn/io/rpn.py
+++ b/example/rcnn/rcnn/io/rpn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 RPN:
 data =
diff --git a/example/rcnn/rcnn/logger.py b/example/rcnn/rcnn/logger.py
index 2806e1a..e822017 100644
--- a/example/rcnn/rcnn/logger.py
+++ b/example/rcnn/rcnn/logger.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import logging
 
 # set up logger
diff --git a/example/rcnn/rcnn/processing/bbox_regression.py b/example/rcnn/rcnn/processing/bbox_regression.py
index d5b48a7..d5330f4 100644
--- a/example/rcnn/rcnn/processing/bbox_regression.py
+++ b/example/rcnn/rcnn/processing/bbox_regression.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 This file has functions about generating bounding box regression targets
 """
diff --git a/example/rcnn/rcnn/processing/bbox_transform.py b/example/rcnn/rcnn/processing/bbox_transform.py
index 7a8667e..04fa81f 100644
--- a/example/rcnn/rcnn/processing/bbox_transform.py
+++ b/example/rcnn/rcnn/processing/bbox_transform.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 from ..cython.bbox import bbox_overlaps_cython
 
diff --git a/example/rcnn/rcnn/processing/generate_anchor.py b/example/rcnn/rcnn/processing/generate_anchor.py
index 8996a3a..0e97d6e 100644
--- a/example/rcnn/rcnn/processing/generate_anchor.py
+++ b/example/rcnn/rcnn/processing/generate_anchor.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Generate base anchors on index 0
 """
diff --git a/example/rcnn/rcnn/processing/nms.py b/example/rcnn/rcnn/processing/nms.py
index 230139c..eca8d58 100644
--- a/example/rcnn/rcnn/processing/nms.py
+++ b/example/rcnn/rcnn/processing/nms.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 from ..cython.cpu_nms import cpu_nms
 try:
diff --git a/example/rcnn/rcnn/pycocotools/__init__.py b/example/rcnn/rcnn/pycocotools/__init__.py
index 3f7d85b..2f4e0d4 100644
--- a/example/rcnn/rcnn/pycocotools/__init__.py
+++ b/example/rcnn/rcnn/pycocotools/__init__.py
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __author__ = 'tylin'
diff --git a/example/rcnn/rcnn/pycocotools/coco.py b/example/rcnn/rcnn/pycocotools/coco.py
index ca35cc0..5cc835a 100644
--- a/example/rcnn/rcnn/pycocotools/coco.py
+++ b/example/rcnn/rcnn/pycocotools/coco.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __author__ = 'tylin'
 __version__ = '2.0'
 # Interface for accessing the Microsoft COCO dataset.
@@ -425,4 +442,4 @@ class COCO:
         """
         rle = self.annToRLE(ann)
         m = maskUtils.decode(rle)
-        return m
\ No newline at end of file
+        return m
diff --git a/example/rcnn/rcnn/pycocotools/cocoeval.py b/example/rcnn/rcnn/pycocotools/cocoeval.py
index a5dd185..8b78026 100644
--- a/example/rcnn/rcnn/pycocotools/cocoeval.py
+++ b/example/rcnn/rcnn/pycocotools/cocoeval.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __author__ = 'tsungyi'
 
 import numpy as np
@@ -525,4 +542,4 @@ class Params:
             raise Exception('iouType not supported')
         self.iouType = iouType
         # useSegm is deprecated
-        self.useSegm = None
\ No newline at end of file
+        self.useSegm = None
diff --git a/example/rcnn/rcnn/pycocotools/mask.py b/example/rcnn/rcnn/pycocotools/mask.py
index f49b873..48c050c 100644
--- a/example/rcnn/rcnn/pycocotools/mask.py
+++ b/example/rcnn/rcnn/pycocotools/mask.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __author__ = 'tsungyi'
 
 import _mask
@@ -100,4 +117,4 @@ def toBbox(rleObjs):
     if type(rleObjs) == list:
         return _mask.toBbox(rleObjs)
     else:
-        return _mask.toBbox([rleObjs])[0]
\ No newline at end of file
+        return _mask.toBbox([rleObjs])[0]
diff --git a/example/rcnn/rcnn/pycocotools/maskApi.h b/example/rcnn/rcnn/pycocotools/maskApi.h
index ebc7892..56b4c0c 100644
--- a/example/rcnn/rcnn/pycocotools/maskApi.h
+++ b/example/rcnn/rcnn/pycocotools/maskApi.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /**************************************************************************
 * Microsoft COCO Toolbox.      version 2.0
 * Data, paper, and tutorials available at:  http://mscoco.org/
diff --git a/example/rcnn/rcnn/pycocotools/setup.py b/example/rcnn/rcnn/pycocotools/setup.py
index 5e836f1..d7074e9 100644
--- a/example/rcnn/rcnn/pycocotools/setup.py
+++ b/example/rcnn/rcnn/pycocotools/setup.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from distutils.core import setup
 from Cython.Build import cythonize
 from distutils.extension import Extension
diff --git a/example/rcnn/rcnn/symbol/__init__.py b/example/rcnn/rcnn/symbol/__init__.py
index f359ed8..113b52c 100644
--- a/example/rcnn/rcnn/symbol/__init__.py
+++ b/example/rcnn/rcnn/symbol/__init__.py
@@ -1,2 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from symbol_vgg import *
 from symbol_resnet import *
diff --git a/example/rcnn/rcnn/symbol/proposal.py b/example/rcnn/rcnn/symbol/proposal.py
index dd0bb15..6498151 100644
--- a/example/rcnn/rcnn/symbol/proposal.py
+++ b/example/rcnn/rcnn/symbol/proposal.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Proposal Operator transform anchor coordinates into ROI coordinates with prediction results on
 classification probability and bounding box prediction results, and image size and scale information.
diff --git a/example/rcnn/rcnn/symbol/proposal_target.py b/example/rcnn/rcnn/symbol/proposal_target.py
index 6f1a6ff..e0444f9 100644
--- a/example/rcnn/rcnn/symbol/proposal_target.py
+++ b/example/rcnn/rcnn/symbol/proposal_target.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them.
 """
diff --git a/example/rcnn/rcnn/symbol/symbol_resnet.py b/example/rcnn/rcnn/symbol/symbol_resnet.py
index ad60de9..f914d11 100644
--- a/example/rcnn/rcnn/symbol/symbol_resnet.py
+++ b/example/rcnn/rcnn/symbol/symbol_resnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import proposal
 import proposal_target
diff --git a/example/rcnn/rcnn/symbol/symbol_vgg.py b/example/rcnn/rcnn/symbol/symbol_vgg.py
index 34860a4..f04ba89 100644
--- a/example/rcnn/rcnn/symbol/symbol_vgg.py
+++ b/example/rcnn/rcnn/symbol/symbol_vgg.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import proposal
 import proposal_target
@@ -130,7 +147,7 @@ def get_vgg_rcnn_test(num_classes=config.NUM_CLASSES):
 
     # shared convolutional layer
     relu5_3 = get_vgg_conv(data)
-    
+
     # Fast R-CNN
     pool5 = mx.symbol.ROIPooling(
         name='roi_pool5', data=relu5_3, rois=rois, pooled_size=(7, 7), spatial_scale=1.0 / config.RCNN_FEAT_STRIDE)
diff --git a/example/rcnn/rcnn/tools/reeval.py b/example/rcnn/rcnn/tools/reeval.py
index 22e5e20..a7ae898 100644
--- a/example/rcnn/rcnn/tools/reeval.py
+++ b/example/rcnn/rcnn/tools/reeval.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import cPickle
 import os
diff --git a/example/rcnn/rcnn/tools/test_rcnn.py b/example/rcnn/rcnn/tools/test_rcnn.py
index 83a9fac..2c5c222 100644
--- a/example/rcnn/rcnn/tools/test_rcnn.py
+++ b/example/rcnn/rcnn/tools/test_rcnn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import pprint
 import mxnet as mx
diff --git a/example/rcnn/rcnn/tools/test_rpn.py b/example/rcnn/rcnn/tools/test_rpn.py
index 09f6af7..f2244a5 100644
--- a/example/rcnn/rcnn/tools/test_rpn.py
+++ b/example/rcnn/rcnn/tools/test_rpn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import pprint
 import mxnet as mx
diff --git a/example/rcnn/rcnn/tools/train_rcnn.py b/example/rcnn/rcnn/tools/train_rcnn.py
index 3f1cde3..c5417b3 100644
--- a/example/rcnn/rcnn/tools/train_rcnn.py
+++ b/example/rcnn/rcnn/tools/train_rcnn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import pprint
 import mxnet as mx
diff --git a/example/rcnn/rcnn/tools/train_rpn.py b/example/rcnn/rcnn/tools/train_rpn.py
index 87b92c8..aaaf570 100644
--- a/example/rcnn/rcnn/tools/train_rpn.py
+++ b/example/rcnn/rcnn/tools/train_rpn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import pprint
 import mxnet as mx
diff --git a/example/rcnn/rcnn/utils/combine_model.py b/example/rcnn/rcnn/utils/combine_model.py
index 5518dda..eabe937 100644
--- a/example/rcnn/rcnn/utils/combine_model.py
+++ b/example/rcnn/rcnn/utils/combine_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from load_model import load_checkpoint
 from save_model import save_checkpoint
 
diff --git a/example/rcnn/rcnn/utils/load_data.py b/example/rcnn/rcnn/utils/load_data.py
index 4700229..816b3b3 100644
--- a/example/rcnn/rcnn/utils/load_data.py
+++ b/example/rcnn/rcnn/utils/load_data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 from ..logger import logger
 from ..config import config
diff --git a/example/rcnn/rcnn/utils/load_model.py b/example/rcnn/rcnn/utils/load_model.py
index 6f83548..0dc0752 100644
--- a/example/rcnn/rcnn/utils/load_model.py
+++ b/example/rcnn/rcnn/utils/load_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 
diff --git a/example/rcnn/rcnn/utils/save_model.py b/example/rcnn/rcnn/utils/save_model.py
index 1c98869..f27fb61 100644
--- a/example/rcnn/rcnn/utils/save_model.py
+++ b/example/rcnn/rcnn/utils/save_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 
diff --git a/example/rcnn/script/additional_deps.sh b/example/rcnn/script/additional_deps.sh
index 17e3d0b..0e6599c 100755
--- a/example/rcnn/script/additional_deps.sh
+++ b/example/rcnn/script/additional_deps.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # install additional depts
 sudo apt install python-pip python-dev unzip python-matplotlib
 sudo pip install cython scikit-image easydict
diff --git a/example/rcnn/script/get_coco.sh b/example/rcnn/script/get_coco.sh
index d49046c..a2f8f90 100755
--- a/example/rcnn/script/get_coco.sh
+++ b/example/rcnn/script/get_coco.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # make a data folder
 if ! [ -e data ]
 then
diff --git a/example/rcnn/script/get_pretrained_model.sh b/example/rcnn/script/get_pretrained_model.sh
index f63128d..746be0b 100755
--- a/example/rcnn/script/get_pretrained_model.sh
+++ b/example/rcnn/script/get_pretrained_model.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # make a model folder
 if ! [ -e model ]
 then
diff --git a/example/rcnn/script/get_selective_search.sh b/example/rcnn/script/get_selective_search.sh
index 728bd8f..487c653 100755
--- a/example/rcnn/script/get_selective_search.sh
+++ b/example/rcnn/script/get_selective_search.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # make a data folder
 if ! [ -e data ]
 then
diff --git a/example/rcnn/script/get_voc.sh b/example/rcnn/script/get_voc.sh
index c0cefa9..060b793 100755
--- a/example/rcnn/script/get_voc.sh
+++ b/example/rcnn/script/get_voc.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # make a data folder
 if ! [ -e data ]
 then
diff --git a/example/rcnn/script/resnet_voc07.sh b/example/rcnn/script/resnet_voc07.sh
index a5d16ec..3cb421f 100755
--- a/example/rcnn/script/resnet_voc07.sh
+++ b/example/rcnn/script/resnet_voc07.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # run this experiment with
 # nohup bash script/resnet_voc07.sh 0,1 &> resnet_voc07.log &
 # to use gpu 0,1 to train, gpu 0 to test and write logs to resnet_voc07.log
diff --git a/example/rcnn/script/resnet_voc0712.sh b/example/rcnn/script/resnet_voc0712.sh
index c993d49..aa2bd39 100755
--- a/example/rcnn/script/resnet_voc0712.sh
+++ b/example/rcnn/script/resnet_voc0712.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # run this experiment with
 # nohup bash script/resnet_voc00712.sh 0,1 &> resnet_voc0712.log &
 # to use gpu 0,1 to train, gpu 0 to test and write logs to resnet_voc0712.log
diff --git a/example/rcnn/script/vgg_alter_voc07.sh b/example/rcnn/script/vgg_alter_voc07.sh
index 1345f4f..72ee0cd 100755
--- a/example/rcnn/script/vgg_alter_voc07.sh
+++ b/example/rcnn/script/vgg_alter_voc07.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # run this experiment with
 # nohup bash script/vgg_alter_voc07.sh 0,1 &> vgg_voc07.log &
 # to use gpu 0,1 to train, gpu 0 to test and write logs to vgg_voc07.log
diff --git a/example/rcnn/script/vgg_fast_rcnn.sh b/example/rcnn/script/vgg_fast_rcnn.sh
index 7d863b5..cafd2ea 100755
--- a/example/rcnn/script/vgg_fast_rcnn.sh
+++ b/example/rcnn/script/vgg_fast_rcnn.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # run this experiment with
 # nohup bash script/vgg_fast_rcnn.sh 0,1 &> vgg_fast_rcnn.log &
 # to use gpu 0,1 to train, gpu 0 to test and write logs to vgg_fast_rcnn.log
diff --git a/example/rcnn/script/vgg_voc07.sh b/example/rcnn/script/vgg_voc07.sh
index 4b70f7c..22249e1 100755
--- a/example/rcnn/script/vgg_voc07.sh
+++ b/example/rcnn/script/vgg_voc07.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # run this experiment with
 # nohup bash script/vgg_voc07.sh 0,1 &> vgg_voc07.log &
 # to use gpu 0,1 to train, gpu 0 to test and write logs to vgg_voc07.log
diff --git a/example/rcnn/script/vgg_voc0712.sh b/example/rcnn/script/vgg_voc0712.sh
index ff24904..22416da 100755
--- a/example/rcnn/script/vgg_voc0712.sh
+++ b/example/rcnn/script/vgg_voc0712.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # run this experiment with
 # nohup bash script/vgg_voc00712.sh 0,1 &> vgg_voc0712.log &
 # to use gpu 0,1 to train, gpu 0 to test and write logs to vgg_voc0712.log
diff --git a/example/rcnn/test.py b/example/rcnn/test.py
index 12fe697..2989bc0 100644
--- a/example/rcnn/test.py
+++ b/example/rcnn/test.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import mxnet as mx
 from rcnn.logger import logger
diff --git a/example/rcnn/train_alternate.py b/example/rcnn/train_alternate.py
index 74f16b9..7158160 100644
--- a/example/rcnn/train_alternate.py
+++ b/example/rcnn/train_alternate.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import mxnet as mx
 
diff --git a/example/rcnn/train_end2end.py b/example/rcnn/train_end2end.py
index b8b1c5c..5c94293 100644
--- a/example/rcnn/train_end2end.py
+++ b/example/rcnn/train_end2end.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import pprint
 import mxnet as mx
diff --git a/example/recommenders/crossentropy.py b/example/recommenders/crossentropy.py
index 79fee74..51648b0 100644
--- a/example/recommenders/crossentropy.py
+++ b/example/recommenders/crossentropy.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Cross-entropy loss layer for MXNet.
 """
 import os
@@ -12,13 +30,13 @@ class CrossEntropyLoss(mx.operator.CustomOp):
     """An output layer that calculates gradient for cross-entropy loss
     y * log(p) + (1-y) * log(p)
     for label "y" and prediction "p".
-    However, the output of this layer is the original prediction -- same as 
+    However, the output of this layer is the original prediction -- same as
     the "data" input, making it useful for tasks like "predict".
     If you actually want to use the calculated loss, see CrossEntropyLoss op.
 
     This is useful for multi-label prediction where each possible output
     label is considered independently.
-    Cross-entropy loss provides a very large penalty for guessing 
+    Cross-entropy loss provides a very large penalty for guessing
     the wrong answer (0 or 1) confidently.
     The gradient calculation is optimized for y only being 0 or 1.
     """
@@ -93,7 +111,7 @@ if __name__ == "__main__":
     print("Simple test of cross-entropy")
     data = mx.symbol.Variable('data')
     labs = mx.symbol.Variable('labs')
-    net = mx.symbol.Custom(data=data, label=labs, name='ce', 
+    net = mx.symbol.Custom(data=data, label=labs, name='ce',
             op_type='CrossEntropyLoss')
     rand = np.random.RandomState(seed=123)
     for i in range(20):
diff --git a/example/recommenders/matrix_fact.py b/example/recommenders/matrix_fact.py
index 90be41e..73f561a 100644
--- a/example/recommenders/matrix_fact.py
+++ b/example/recommenders/matrix_fact.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import math
 import mxnet as mx
 import numpy as np
diff --git a/example/recommenders/movielens_data.py b/example/recommenders/movielens_data.py
index 157e8c2..3d664fb 100644
--- a/example/recommenders/movielens_data.py
+++ b/example/recommenders/movielens_data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """MovieLens data handling: download, parse, and expose as DataIter
 """
 
@@ -22,13 +39,13 @@ def load_mldata_iter(filename, batch_size):
     user = mx.nd.array(user)
     item = mx.nd.array(item)
     score = mx.nd.array(score)
-    return mx.io.NDArrayIter(data={'user':user,'item':item},label={'score':score}, 
+    return mx.io.NDArrayIter(data={'user':user,'item':item},label={'score':score},
                              batch_size=batch_size, shuffle=True)
 
 def ensure_local_data(prefix):
     if not os.path.exists("%s.zip" % prefix):
         print("Downloading MovieLens data: %s" % prefix)
-        os.system("wget http://files.grouplens.org/datasets/movielens/%s.zip" % prefix) 
+        os.system("wget http://files.grouplens.org/datasets/movielens/%s.zip" % prefix)
         os.system("unzip %s.zip" % prefix)
 
 
@@ -36,7 +53,7 @@ def get_data_iter(batch_size, prefix='ml-100k'):
     """Returns a pair of NDArrayDataIter, one for train, one for test.
     """
     ensure_local_data(prefix)
-    return (load_mldata_iter('./%s/u1.base' % prefix, batch_size), 
+    return (load_mldata_iter('./%s/u1.base' % prefix, batch_size),
             load_mldata_iter('./%s/u1.test' % prefix, batch_size))
 
 def max_id(fname):
diff --git a/example/recommenders/negativesample.py b/example/recommenders/negativesample.py
index ecbd85e..0b4ea84 100644
--- a/example/recommenders/negativesample.py
+++ b/example/recommenders/negativesample.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """DataIter for negative sampling.
 """
 import mxnet as mx
@@ -8,7 +25,7 @@ class NegativeSamplingDataIter(mx.io.DataIter):
     Assumes that all the relevant inputs are in data, not labels.
     Drops (replaces) any labels in the original DataIter.
 
-    It only shuffles one of the input data columns, specified in the 
+    It only shuffles one of the input data columns, specified in the
     constructor as shuffle_data_idx.  So if the original input data
     has three columns, ('item_ids', 'item_words', 'users') and you want
     to keep the two "item_*" together, then set `shuffle_data_idx=2`
@@ -46,7 +63,7 @@ class NegativeSamplingDataIter(mx.io.DataIter):
         self._sampled_queue = []
 
     def _push_queue(self, data_list, labels):
-        """Takes a list of numpy arrays for data, 
+        """Takes a list of numpy arrays for data,
         and a numpy array for labels.
         Converts to minibatches and puts it on the queue.
         """
diff --git a/example/recommenders/randomproj.py b/example/recommenders/randomproj.py
index 539f50e..ba080a0 100644
--- a/example/recommenders/randomproj.py
+++ b/example/recommenders/randomproj.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Random projection layers in MXNet as custom python ops.
 Currently slow and memory-inefficient, but functional.
 """
@@ -34,7 +51,7 @@ class RandomBagOfWordsProjection(mx.operator.CustomOp):
 
     def _get_mask(self, idx, in_data):
         """Returns the mask by which to multiply the parts of the embedding layer.
-        In this version, we have no weights to apply.  
+        In this version, we have no weights to apply.
         """
         mask = idx >= 0  # bool False for -1 values that should be removed. shape=(b,mnz)
         mask = np.expand_dims(mask,2) # shape = (b,mnz,1)
@@ -98,7 +115,7 @@ class SparseRandomProjection(RandomBagOfWordsProjection):
         mask = np.expand_dims(mask,2) # shape = (b,mnz,1)
         mask = np.repeat(mask, self._proj_dim, axis=2) # shape = (b,mnz,d)
         return mask
-        
+
 
 @mx.operator.register("SparseRandomProjection")
 class SparseRandomProjectionProp(RandomBagOfWordsProjectionProp):
@@ -121,8 +138,8 @@ if __name__ == "__main__":
     print("Simple test of proj layer")
     data = mx.symbol.Variable('data')
     vals = mx.symbol.Variable('vals')
-    net = mx.symbol.Custom(indexes=data, values=vals, name='rproj', 
-            op_type='SparseRandomProjection', 
+    net = mx.symbol.Custom(indexes=data, values=vals, name='rproj',
+            op_type='SparseRandomProjection',
             vocab_size=999, output_dim=29)
     d = mx.nd.zeros(shape=(3,100))
     v = mx.nd.ones(shape=(3,100))
diff --git a/example/recommenders/recotools.py b/example/recommenders/recotools.py
index f368170..250baa5 100644
--- a/example/recommenders/recotools.py
+++ b/example/recommenders/recotools.py
@@ -1,7 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 from negativesample import NegativeSamplingDataIter
-import randomproj 
+import randomproj
 import crossentropy
 
 def CosineLoss(a, b, label):
diff --git a/example/recommenders/symbol_alexnet.py b/example/recommenders/symbol_alexnet.py
index 20a3547..e5d02f0 100644
--- a/example/recommenders/symbol_alexnet.py
+++ b/example/recommenders/symbol_alexnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Reference:
 
diff --git a/example/reinforcement-learning/a3c/a3c.py b/example/reinforcement-learning/a3c/a3c.py
index 19ab230..4d89a24 100644
--- a/example/reinforcement-learning/a3c/a3c.py
+++ b/example/reinforcement-learning/a3c/a3c.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 import numpy as np
@@ -76,7 +93,7 @@ def train():
 
     if args.kv_store == 'dist_sync':
         epoch_size /= kv.num_workers
-    
+
     # disable kvstore for single device
     if 'local' in kv.type and (
             args.gpus is None or len(args.gpus.split(',')) is 1):
@@ -164,7 +181,7 @@ def train():
                 print('h', h[0].asnumpy())
                 err += (adv**2).mean()
                 score += r[i]
-                final_score *= (1-D[i]) 
+                final_score *= (1-D[i])
                 final_score += score * D[i]
                 score *= 1-D[i]
                 T += D[i].sum()
diff --git a/example/reinforcement-learning/a3c/launcher.py b/example/reinforcement-learning/a3c/launcher.py
index 8a4a7d1..e0bda21 100644
--- a/example/reinforcement-learning/a3c/launcher.py
+++ b/example/reinforcement-learning/a3c/launcher.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Submission job for local jobs."""
 # pylint: disable=invalid-name
 from __future__ import absolute_import
diff --git a/example/reinforcement-learning/a3c/rl_data.py b/example/reinforcement-learning/a3c/rl_data.py
index 0d16bca..ad78975 100644
--- a/example/reinforcement-learning/a3c/rl_data.py
+++ b/example/reinforcement-learning/a3c/rl_data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 import numpy as np
diff --git a/example/reinforcement-learning/a3c/sym.py b/example/reinforcement-learning/a3c/sym.py
index d3e1767..c48d752 100644
--- a/example/reinforcement-learning/a3c/sym.py
+++ b/example/reinforcement-learning/a3c/sym.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 def get_symbol_atari(act_dim):
diff --git a/example/reinforcement-learning/ddpg/ddpg.py b/example/reinforcement-learning/ddpg/ddpg.py
index 4ded9b9..aa34e4d 100644
--- a/example/reinforcement-learning/ddpg/ddpg.py
+++ b/example/reinforcement-learning/ddpg/ddpg.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from replay_mem import ReplayMem
 from utils import discount_return, sample_rewards
 import rllab.misc.logger as logger
diff --git a/example/reinforcement-learning/ddpg/policies.py b/example/reinforcement-learning/ddpg/policies.py
index 2a625c8..2bae8f6 100644
--- a/example/reinforcement-learning/ddpg/policies.py
+++ b/example/reinforcement-learning/ddpg/policies.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from utils import define_policy
 import mxnet as mx
 
@@ -40,7 +57,7 @@ class DeterministicMLPPolicy(Policy):
 
         self.obs = mx.symbol.Variable("obs")
         self.act = define_policy(
-            self.obs, 
+            self.obs,
             self.env_spec.action_space.flat_dim)
 
     def get_output_symbol(self):
@@ -59,7 +76,7 @@ class DeterministicMLPPolicy(Policy):
 
         raise NotImplementedError
 
-    def define_exe(self, ctx, init, updater, input_shapes=None, args=None, 
+    def define_exe(self, ctx, init, updater, input_shapes=None, args=None,
                     grad_req=None):
 
         # define an executor, initializer and updater for batch version
@@ -71,7 +88,7 @@ class DeterministicMLPPolicy(Policy):
         for name, arr in self.arg_dict.items():
             if name not in input_shapes:
                 init(name, arr)
-                
+
         self.updater = updater
 
         # define an executor for sampled single observation
@@ -110,4 +127,4 @@ class DeterministicMLPPolicy(Policy):
 
 
 
-        
\ No newline at end of file
+
diff --git a/example/reinforcement-learning/ddpg/qfuncs.py b/example/reinforcement-learning/ddpg/qfuncs.py
index 21957c4..7dbc1d6 100644
--- a/example/reinforcement-learning/ddpg/qfuncs.py
+++ b/example/reinforcement-learning/ddpg/qfuncs.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from utils import define_qfunc
 import mxnet as mx
 
@@ -47,7 +64,7 @@ class ContinuousMLPQ(QFunc):
         self.loss = mx.symbol.MakeLoss(loss_exp, name="qfunc_loss")
         self.loss = mx.symbol.Group([self.loss, mx.symbol.BlockGrad(self.qval)])
 
-    def define_exe(self, ctx, init, updater, input_shapes=None, args=None, 
+    def define_exe(self, ctx, init, updater, input_shapes=None, args=None,
                     grad_req=None):
 
         # define an executor, initializer and updater for batch version loss
@@ -55,11 +72,11 @@ class ContinuousMLPQ(QFunc):
         self.arg_arrays = self.exe.arg_arrays
         self.grad_arrays = self.exe.grad_arrays
         self.arg_dict = self.exe.arg_dict
-        
+
         for name, arr in self.arg_dict.items():
             if name not in input_shapes:
                 init(name, arr)
-                
+
         self.updater = updater
 
     def update_params(self, obs, act, yval):
diff --git a/example/reinforcement-learning/ddpg/replay_mem.py b/example/reinforcement-learning/ddpg/replay_mem.py
index 885d7da..47e9bc8 100644
--- a/example/reinforcement-learning/ddpg/replay_mem.py
+++ b/example/reinforcement-learning/ddpg/replay_mem.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import numpy as np
 
@@ -5,7 +22,7 @@ import numpy as np
 class ReplayMem(object):
 
     def __init__(
-        self, 
+        self,
         obs_dim,
         act_dim,
         memory_size=1000000):
diff --git a/example/reinforcement-learning/ddpg/run.py b/example/reinforcement-learning/ddpg/run.py
index 0cd162f..043cd99 100644
--- a/example/reinforcement-learning/ddpg/run.py
+++ b/example/reinforcement-learning/ddpg/run.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from ddpg import DDPG
 from rllab.envs.box2d.cartpole_env import CartpoleEnv
 from rllab.envs.normalized_env import normalize
@@ -32,4 +49,4 @@ algo = DDPG(
     policy_lr=1e-4,
     seed=SEED)
 
-algo.train()
\ No newline at end of file
+algo.train()
diff --git a/example/reinforcement-learning/ddpg/strategies.py b/example/reinforcement-learning/ddpg/strategies.py
index c346e9e..d73ad06 100644
--- a/example/reinforcement-learning/ddpg/strategies.py
+++ b/example/reinforcement-learning/ddpg/strategies.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 
 
@@ -28,7 +45,7 @@ class OUStrategy(BaseStrategy):
         self.sigma = sigma
         self.action_space = env_spec.action_space
         self.state = np.ones(self.action_space.flat_dim) * self.mu
-        
+
     def evolve_state(self):
 
         x = self.state
@@ -47,9 +64,9 @@ class OUStrategy(BaseStrategy):
     	obs = obs.reshape((1, -1))
         action = policy.get_action(obs)
         increment = self.evolve_state()
-        
-        return np.clip(action + increment, 
-                       self.action_space.low, 
+
+        return np.clip(action + increment,
+                       self.action_space.low,
                        self.action_space.high)
 
 
diff --git a/example/reinforcement-learning/ddpg/utils.py b/example/reinforcement-learning/ddpg/utils.py
index 8c063db..a9a4456 100644
--- a/example/reinforcement-learning/ddpg/utils.py
+++ b/example/reinforcement-learning/ddpg/utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 
diff --git a/example/reinforcement-learning/dqn/atari_game.py b/example/reinforcement-learning/dqn/atari_game.py
index 369016f..5c1314f 100644
--- a/example/reinforcement-learning/dqn/atari_game.py
+++ b/example/reinforcement-learning/dqn/atari_game.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __author__ = 'sxjscience'
 
 import mxnet as mx
diff --git a/example/reinforcement-learning/dqn/base.py b/example/reinforcement-learning/dqn/base.py
index f433d5d..ce82f2b 100644
--- a/example/reinforcement-learning/dqn/base.py
+++ b/example/reinforcement-learning/dqn/base.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import absolute_import, division, print_function
 
 import mxnet as mx
diff --git a/example/reinforcement-learning/dqn/dqn_demo.py b/example/reinforcement-learning/dqn/dqn_demo.py
index 000a796..750da7a 100644
--- a/example/reinforcement-learning/dqn/dqn_demo.py
+++ b/example/reinforcement-learning/dqn/dqn_demo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import mxnet.ndarray as nd
 import numpy
diff --git a/example/reinforcement-learning/dqn/dqn_run_test.py b/example/reinforcement-learning/dqn/dqn_run_test.py
index 1a918eb..2abf273 100644
--- a/example/reinforcement-learning/dqn/dqn_run_test.py
+++ b/example/reinforcement-learning/dqn/dqn_run_test.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import mxnet.ndarray as nd
 import numpy
diff --git a/example/reinforcement-learning/dqn/game.py b/example/reinforcement-learning/dqn/game.py
index 0e1b7f1..e17cc6f 100644
--- a/example/reinforcement-learning/dqn/game.py
+++ b/example/reinforcement-learning/dqn/game.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 DEFAULT_MAX_EPISODE_STEP = 1000000
 
@@ -29,4 +46,4 @@ class Game(object):
         return self.replay_memory.latest_slice()
 
     def play(self, a):
-        raise NotImplementedError
\ No newline at end of file
+        raise NotImplementedError
diff --git a/example/reinforcement-learning/dqn/operators.py b/example/reinforcement-learning/dqn/operators.py
index e8180c4..0c9b588 100644
--- a/example/reinforcement-learning/dqn/operators.py
+++ b/example/reinforcement-learning/dqn/operators.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import mxnet.ndarray as nd
 import numpy
diff --git a/example/reinforcement-learning/dqn/replay_memory.py b/example/reinforcement-learning/dqn/replay_memory.py
index 42f4866..02691a0 100644
--- a/example/reinforcement-learning/dqn/replay_memory.py
+++ b/example/reinforcement-learning/dqn/replay_memory.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import absolute_import, division, print_function
 
 import mxnet as mx
diff --git a/example/reinforcement-learning/dqn/utils.py b/example/reinforcement-learning/dqn/utils.py
index 7d84bba..bae11e1 100644
--- a/example/reinforcement-learning/dqn/utils.py
+++ b/example/reinforcement-learning/dqn/utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import absolute_import, division, print_function
 
 import os
diff --git a/example/reinforcement-learning/parallel_actor_critic/config.py b/example/reinforcement-learning/parallel_actor_critic/config.py
index 48ef1d0..e962cf9 100644
--- a/example/reinforcement-learning/parallel_actor_critic/config.py
+++ b/example/reinforcement-learning/parallel_actor_critic/config.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 
diff --git a/example/reinforcement-learning/parallel_actor_critic/envs.py b/example/reinforcement-learning/parallel_actor_critic/envs.py
index 09f30d7..a537df6 100644
--- a/example/reinforcement-learning/parallel_actor_critic/envs.py
+++ b/example/reinforcement-learning/parallel_actor_critic/envs.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 
 
diff --git a/example/reinforcement-learning/parallel_actor_critic/model.py b/example/reinforcement-learning/parallel_actor_critic/model.py
index 8fba78f..b90af67 100644
--- a/example/reinforcement-learning/parallel_actor_critic/model.py
+++ b/example/reinforcement-learning/parallel_actor_critic/model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from itertools import chain
 import numpy as np
 import scipy.signal
diff --git a/example/reinforcement-learning/parallel_actor_critic/train.py b/example/reinforcement-learning/parallel_actor_critic/train.py
index d559ff3..128a550 100644
--- a/example/reinforcement-learning/parallel_actor_critic/train.py
+++ b/example/reinforcement-learning/parallel_actor_critic/train.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Trains an `Agent` using trajectories from multiple environments."""
 
 import argparse
diff --git a/example/rnn-time-major/bucket_io.py b/example/rnn-time-major/bucket_io.py
index 5cf2c81..950b0c0 100644
--- a/example/rnn-time-major/bucket_io.py
+++ b/example/rnn-time-major/bucket_io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 from __future__ import print_function
@@ -58,7 +75,7 @@ def default_gen_buckets(sentences, batch_size, the_vocab):
 
     tl = 0
     buckets = []
-    for l, n in len_dict.items(): # TODO: There are better heuristic ways to do this    
+    for l, n in len_dict.items(): # TODO: There are better heuristic ways to do this
         if n + tl >= batch_size:
             buckets.append(l)
             tl = 0
@@ -217,7 +234,7 @@ class BucketSentenceIter(mx.io.DataIter):
             i_idx = self.bucket_curr_idx[i_bucket]
             idx = self.bucket_idx_all[i_bucket][i_idx:i_idx+self.batch_size]
             self.bucket_curr_idx[i_bucket] += self.batch_size
-            
+
             init_state_names = [x[0] for x in self.init_states]
 
             if self.time_major:
diff --git a/example/rnn-time-major/get_ptb_data.sh b/example/rnn-time-major/get_ptb_data.sh
index 1ec009a..d2641cb 100755
--- a/example/rnn-time-major/get_ptb_data.sh
+++ b/example/rnn-time-major/get_ptb_data.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 RNN_DIR=$(cd `dirname $0`; pwd)
 DATA_DIR="${RNN_DIR}/data/"
 
diff --git a/example/rnn-time-major/rnn_cell_demo.py b/example/rnn-time-major/rnn_cell_demo.py
index cb69c55..c29d1dd 100644
--- a/example/rnn-time-major/rnn_cell_demo.py
+++ b/example/rnn-time-major/rnn_cell_demo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """A simple demo of new RNN cell with PTB language model."""
 
 ################################################################################
@@ -100,7 +117,7 @@ if __name__ == '__main__':
         # RNN cell takes input of shape (time, batch, feature)
         rnn = mx.sym.RNN(data=embed, state_size=num_hidden,
                          num_layers=num_lstm_layer, mode='lstm',
-                         name='LSTM', 
+                         name='LSTM',
                          # The following params can be omitted
                          # provided we do not need to apply the
                          # workarounds mentioned above
@@ -134,7 +151,7 @@ if __name__ == '__main__':
     if len(buckets) == 1:
         mod = mx.mod.Module(*sym_gen(buckets[0]), context=contexts)
     else:
-        mod = mx.mod.BucketingModule(sym_gen, 
+        mod = mx.mod.BucketingModule(sym_gen,
                                      default_bucket_key=data_train.default_bucket_key,
                                      context=contexts)
 
diff --git a/example/rnn/cudnn_lstm_bucketing.py b/example/rnn/cudnn_lstm_bucketing.py
index fbf32bb..e9c3237 100644
--- a/example/rnn/cudnn_lstm_bucketing.py
+++ b/example/rnn/cudnn_lstm_bucketing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import mxnet as mx
 import argparse
diff --git a/example/rnn/get_ptb_data.sh b/example/rnn/get_ptb_data.sh
index 1ec009a..d2641cb 100755
--- a/example/rnn/get_ptb_data.sh
+++ b/example/rnn/get_ptb_data.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 RNN_DIR=$(cd `dirname $0`; pwd)
 DATA_DIR="${RNN_DIR}/data/"
 
diff --git a/example/rnn/lstm_bucketing.py b/example/rnn/lstm_bucketing.py
index 609276a..2e7bc65 100644
--- a/example/rnn/lstm_bucketing.py
+++ b/example/rnn/lstm_bucketing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import mxnet as mx
 import argparse
diff --git a/example/rnn/old/bucket_io.py b/example/rnn/old/bucket_io.py
index f515e34..21f96ef 100644
--- a/example/rnn/old/bucket_io.py
+++ b/example/rnn/old/bucket_io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 from __future__ import print_function
@@ -58,7 +75,7 @@ def default_gen_buckets(sentences, batch_size, the_vocab):
 
     tl = 0
     buckets = []
-    for l, n in len_dict.items(): # TODO: There are better heuristic ways to do this    
+    for l, n in len_dict.items(): # TODO: There are better heuristic ways to do this
         if n + tl >= batch_size:
             buckets.append(l)
             tl = 0
@@ -210,7 +227,7 @@ class BucketSentenceIter(mx.io.DataIter):
                 self.data_buffer.append(data)
 
         if self.model_parallel:
-            # Transpose data if model parallel 
+            # Transpose data if model parallel
             for i in range(len(self.data)):
                 bucket_data = self.data[i]
                 self.data[i] = np.transpose(bucket_data)
@@ -222,8 +239,8 @@ class BucketSentenceIter(mx.io.DataIter):
             i_idx = self.bucket_curr_idx[i_bucket]
             idx = self.bucket_idx_all[i_bucket][i_idx:i_idx+self.batch_size]
             self.bucket_curr_idx[i_bucket] += self.batch_size
-            
-            # Model parallelism 
+
+            # Model parallelism
             if self.model_parallel:
                 if self.data[i_bucket][:, idx].shape[1] == 0:
                     print("WARNING: detected shape " + str(self.data[i_bucket][:, idx].shape))
@@ -231,7 +248,7 @@ class BucketSentenceIter(mx.io.DataIter):
                 data[:] = self.data[i_bucket][:, idx]
                 data_batch = ModelParallelBatch(data, self.buckets[i_bucket])
                 yield data_batch
-            
+
             # Data parallelism
             else:
                 init_state_names = [x[0] for x in self.init_states]
@@ -239,7 +256,7 @@ class BucketSentenceIter(mx.io.DataIter):
 
                 for sentence in data:
                     assert len(sentence) == self.buckets[i_bucket]
-                
+
                 label = self.label_buffer[i_bucket]
                 label[:, :-1] = data[:, 1:]
                 label[:, -1] = 0
@@ -255,4 +272,4 @@ class BucketSentenceIter(mx.io.DataIter):
 
 
     def reset(self):
-        self.bucket_curr_idx = [0 for x in self.data]
\ No newline at end of file
+        self.bucket_curr_idx = [0 for x in self.data]
diff --git a/example/rnn/old/get_ptb_data.sh b/example/rnn/old/get_ptb_data.sh
index 1ec009a..d2641cb 100755
--- a/example/rnn/old/get_ptb_data.sh
+++ b/example/rnn/old/get_ptb_data.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 RNN_DIR=$(cd `dirname $0`; pwd)
 DATA_DIR="${RNN_DIR}/data/"
 
diff --git a/example/rnn/old/gru.py b/example/rnn/old/gru.py
index 5b5138b..e6ec095 100644
--- a/example/rnn/old/gru.py
+++ b/example/rnn/old/gru.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 import sys
diff --git a/example/rnn/old/gru_bucketing.py b/example/rnn/old/gru_bucketing.py
index 859d449..226018c 100644
--- a/example/rnn/old/gru_bucketing.py
+++ b/example/rnn/old/gru_bucketing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 import sys
diff --git a/example/rnn/old/lstm.py b/example/rnn/old/lstm.py
index d67b0db..84509a3 100644
--- a/example/rnn/old/lstm.py
+++ b/example/rnn/old/lstm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import sys
 sys.path.insert(0, "../../python")
diff --git a/example/rnn/old/lstm_bucketing.py b/example/rnn/old/lstm_bucketing.py
index 78fa4f8..3e34947 100644
--- a/example/rnn/old/lstm_bucketing.py
+++ b/example/rnn/old/lstm_bucketing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 import sys
diff --git a/example/rnn/old/rnn.py b/example/rnn/old/rnn.py
index 136c403..fe7bdbd 100644
--- a/example/rnn/old/rnn.py
+++ b/example/rnn/old/rnn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 sys.path.insert(0, "../../python/")
 import mxnet as mx
diff --git a/example/rnn/old/rnn_cell_demo.py b/example/rnn/old/rnn_cell_demo.py
index 2c798e2..3223e93 100644
--- a/example/rnn/old/rnn_cell_demo.py
+++ b/example/rnn/old/rnn_cell_demo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """A simple demo of new RNN cell with PTB language model."""
 
 import os
@@ -87,12 +104,12 @@ if __name__ == '__main__':
         # RNN cell takes input of shape (time, batch, feature)
         rnn = mx.sym.RNN(data=embed_tm, state_size=num_hidden,
                          num_layers=num_lstm_layer, mode='lstm',
-                         name='LSTM', 
+                         name='LSTM',
                          # The following params can be omitted
                          # provided we do not need to apply the
                          # workarounds mentioned above
                          state=rnn_h_init,
-                         state_cell=rnn_c_init, 
+                         state_cell=rnn_c_init,
                          parameters=rnn_params)
 
         # the RNN cell output is of shape (time, batch, dim)
diff --git a/example/rnn/old/rnn_model.py b/example/rnn/old/rnn_model.py
index 2135abd..6fe0d22 100644
--- a/example/rnn/old/rnn_model.py
+++ b/example/rnn/old/rnn_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
diff --git a/example/speech-demo/config_util.py b/example/speech-demo/config_util.py
index 9e2ecc4..6fd6a50 100644
--- a/example/speech-demo/config_util.py
+++ b/example/speech-demo/config_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import re
 import os
 import sys
diff --git a/example/speech-demo/decode_mxnet.py b/example/speech-demo/decode_mxnet.py
index 4680fbe..deb9c30 100644
--- a/example/speech-demo/decode_mxnet.py
+++ b/example/speech-demo/decode_mxnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import re
 import sys
 sys.path.insert(0, "../../python")
@@ -80,7 +97,7 @@ if __name__ == '__main__':
     num_epoch = args.config.getint('train', 'num_epoch')
     model_name = get_checkpoint_path(args)
     logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s')
-    
+
     # load the model
     sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, num_epoch)
 
@@ -89,7 +106,7 @@ if __name__ == '__main__':
         buckets = list(map(int, re.split(r'\W+', buckets)))
         data_test   = BucketSentenceIter(test_sets, buckets, batch_size, init_states, feat_dim=feat_dim, has_label=False)
         def sym_gen(seq_len):
-            sym = lstm_unroll(num_lstm_layer, seq_len, feat_dim, num_hidden=num_hidden, 
+            sym = lstm_unroll(num_lstm_layer, seq_len, feat_dim, num_hidden=num_hidden,
                               num_label=label_dim, take_softmax=True, num_hidden_proj=num_hidden_proj)
             data_names = ['data'] + state_names
             label_names = ['softmax_label']
@@ -102,7 +119,7 @@ if __name__ == '__main__':
         data_test = SimpleIter(test_sets, batch_size, init_states, feat_dim=feat_dim, label_dim=label_dim,
                 label_mean_sets=label_mean_sets, has_label=False)
         def sym_gen(seq_len):
-            sym = lstm_unroll(num_lstm_layer, seq_len, feat_dim, num_hidden=num_hidden, 
+            sym = lstm_unroll(num_lstm_layer, seq_len, feat_dim, num_hidden=num_hidden,
                               num_label=label_dim, take_softmax=False, num_hidden_proj=num_hidden_proj)
             data_names = ['data'] + state_names
             label_names = []
@@ -127,7 +144,7 @@ if __name__ == '__main__':
     # set the parameters
     module.bind(data_shapes=data_test.provide_data, label_shapes=None, for_training=False)
     module.set_params(arg_params=arg_params, aux_params=aux_params)
-    
+
     kaldiWriter = KaldiWriteOut(None, out_file)
     kaldiWriter.open_or_fd()
     for preds, i_batch, batch in module.iter_predict(data_test):
diff --git a/example/speech-demo/decode_mxnet.sh b/example/speech-demo/decode_mxnet.sh
index e5209b8..d300d0e 100755
--- a/example/speech-demo/decode_mxnet.sh
+++ b/example/speech-demo/decode_mxnet.sh
@@ -1,14 +1,32 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # Copyright 2012-2013 Karel Vesely, Daniel Povey
 # 	    2015 Yu Zhang
 # Apache 2.0
 
-# Begin configuration section.  
+# Begin configuration section.
 nnet= # Optionally pre-select network to use for getting state-likelihoods
 feature_transform= # Optionally pre-select feature transform (in front of nnet)
 model= # Optionally pre-select transition model
-class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors 
+class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors
 
 stage=0 # stage=1 skips lattice generation
 nj=4
@@ -44,7 +62,7 @@ mkdir -p $dir/log
 echo $nj > $dir/num_jobs
 
 if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
-  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  if [ -z $iter ]; then model=$srcdir/final.mdl;
   else model=$srcdir/$iter.mdl; fi
 fi
 
diff --git a/example/speech-demo/io_func/convert2kaldi.py b/example/speech-demo/io_func/convert2kaldi.py
index ffa8c4d..eac8ee6 100644
--- a/example/speech-demo/io_func/convert2kaldi.py
+++ b/example/speech-demo/io_func/convert2kaldi.py
@@ -1,4 +1,21 @@
-# Copyright 2013    Yajie Miao    Carnegie Mellon University 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Copyright 2013    Yajie Miao    Carnegie Mellon University
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,7 +45,7 @@ def _nnet2kaldi(nnet_spec, set_layer_num = -1, filein='nnet.in',
     _nnet2kaldi_main(nnet_spec, set_layer_num=set_layer_num, filein=filein,
                     fileout=fileout, activation=activation, withfinal=withfinal, maxout=False)
 
-def _nnet2kaldi_maxout(nnet_spec, pool_size = 1, set_layer_num = -1, 
+def _nnet2kaldi_maxout(nnet_spec, pool_size = 1, set_layer_num = -1,
                       filein='nnet.in', fileout='nnet.out', activation='sigmoid', withfinal=True):
     _nnet2kaldi_main(nnet_spec, set_layer_num=set_layer_num, filein=filein,
                     fileout=fileout, activation=activation, withfinal=withfinal,
@@ -110,4 +127,4 @@ def _nnet2kaldi_main(nnet_spec, set_layer_num = -1, filein='nnet.in',
         fout.write('[ ' + b_layer.strip() + ' ]' + '\n')
         fout.write('<softmax> ' + str(output_size) + ' ' + str(output_size) + '\n')
 
-    fout.close();
\ No newline at end of file
+    fout.close();
diff --git a/example/speech-demo/io_func/feat_io.py b/example/speech-demo/io_func/feat_io.py
index 83d417e..6a7e424 100644
--- a/example/speech-demo/io_func/feat_io.py
+++ b/example/speech-demo/io_func/feat_io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import os
 import sys
diff --git a/example/speech-demo/io_func/feat_readers/common.py b/example/speech-demo/io_func/feat_readers/common.py
index a7b6413..742d3e2 100644
--- a/example/speech-demo/io_func/feat_readers/common.py
+++ b/example/speech-demo/io_func/feat_readers/common.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy
 import os
 
@@ -55,4 +72,4 @@ def getReader(fileformat, featureFile, labelFile):
 		return reader_kaldi.kaldiReader(featureFile, labelFile)
 	else:
 		msg = "Error: Specified format '{}' is not supported".format(fileformat)
-		raise Exception(msg)
\ No newline at end of file
+		raise Exception(msg)
diff --git a/example/speech-demo/io_func/feat_readers/reader_atrack.py b/example/speech-demo/io_func/feat_readers/reader_atrack.py
index 0bf1dee..e8db0fd 100644
--- a/example/speech-demo/io_func/feat_readers/reader_atrack.py
+++ b/example/speech-demo/io_func/feat_readers/reader_atrack.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy
 import numpy as num
 import stats
@@ -30,11 +47,11 @@ class atrackReader(BaseReader):
         -1.677172 -1076449904 -1867655489
         -1.562828 -1077409088 -1073035073
         """
-            
+
         f = open(self.featureFile, "rb")
         header = num.fromfile(f, dtype=num.dtype('>i4'), count=7)
         self.checkHeader(header)
-        
+
         frameSize = header[1]
         numSamples = header[2]
 
@@ -46,4 +63,4 @@ class atrackReader(BaseReader):
 
         self._markDone()
 
-        return a, ReadLabel(self.labelFile)
\ No newline at end of file
+        return a, ReadLabel(self.labelFile)
diff --git a/example/speech-demo/io_func/feat_readers/reader_bvec.py b/example/speech-demo/io_func/feat_readers/reader_bvec.py
index ac68bf4..3a0f745 100644
--- a/example/speech-demo/io_func/feat_readers/reader_bvec.py
+++ b/example/speech-demo/io_func/feat_readers/reader_bvec.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import struct
 import array
@@ -22,7 +39,7 @@ class bvecReader(BaseReader):
             print('Num samples = {}'.format(numSamples))
             print('dim = {}'.format(dim))
 
-            dt = numpy.dtype([('sample',(numpy.float32,dim))]) 
+            dt = numpy.dtype([('sample',(numpy.float32,dim))])
             samples = numpy.fromfile(f,dt.newbyteorder('>'),count=numSamples)
 
         self._markDone()
diff --git a/example/speech-demo/io_func/feat_readers/reader_htk.py b/example/speech-demo/io_func/feat_readers/reader_htk.py
index b04d6f3..dca24d9 100644
--- a/example/speech-demo/io_func/feat_readers/reader_htk.py
+++ b/example/speech-demo/io_func/feat_readers/reader_htk.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy
 import stats
 from common import *
@@ -24,7 +41,7 @@ class htkReader(BaseReader):
             # print 'Sample period = {}'.format(sampPeriod)
             # print 'Sample size = {}'.format(sampSize)
             # print 'Sample kind = {}'.format(sampKind)
-            dt = numpy.dtype([('sample',(numpy.float32,sampSize/4))]) 
+            dt = numpy.dtype([('sample',(numpy.float32,sampSize/4))])
             samples = numpy.fromfile(f,dt.newbyteorder('>' if self.byteOrder==ByteOrder.BigEndian else '<'),count=numSamples)
 
         self._markDone()
@@ -33,5 +50,5 @@ class htkReader(BaseReader):
             labels = None
         else:
             labels = ReadLabel(self.labelFile)
-            
+
         return samples[:]['sample'], labels
diff --git a/example/speech-demo/io_func/feat_readers/reader_kaldi.py b/example/speech-demo/io_func/feat_readers/reader_kaldi.py
index bad6a9d..345934a 100644
--- a/example/speech-demo/io_func/feat_readers/reader_kaldi.py
+++ b/example/speech-demo/io_func/feat_readers/reader_kaldi.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from common import *
 
 import random
@@ -95,7 +112,7 @@ class kaldiReader(BaseReader):
         feat_rows = kaldi.MatrixF_NumRows(feat_value)
         feat_cols = kaldi.MatrixF_NumCols(feat_value)
         feat_data = kaldi.MatrixF_Data(feat_value)
-        
+
         # never use numpy.ndarray(buf=) or numpy.ctypeslib.as_array
         # because you don't know if Python or C owns buffer
         # (even if you numpy.copy() resulting array)
@@ -114,7 +131,7 @@ class kaldiReader(BaseReader):
         if self.targets_rspecifier is not None:
             if kaldi.RAPReader_HasKey(self.targets_reader, utt):
                 tgt_value = kaldi.RAPReader_Value(self.targets_reader, utt)
-                
+
                 tgts = numpy.empty((feat_rows,), dtype=numpy.int32)
                 # ok to use memmove because this is 1-dimensional array I made in C (no stride)
                 tgts_numpy_ptr = ctypes.cast(tgts.ctypes.data, c_int_ptr)
@@ -125,7 +142,7 @@ class kaldiReader(BaseReader):
                 tgts = None
         else:
             tgts = None
-        
+
         kaldi.SBFMReader_Next(self.feature_reader)
 
         #print "FEATS:", feats[0:5][0:5]
diff --git a/example/speech-demo/io_func/feat_readers/stats.py b/example/speech-demo/io_func/feat_readers/stats.py
index 70033eb..a2c8473 100644
--- a/example/speech-demo/io_func/feat_readers/stats.py
+++ b/example/speech-demo/io_func/feat_readers/stats.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import numpy
 
diff --git a/example/speech-demo/io_func/feat_readers/writer_kaldi.py b/example/speech-demo/io_func/feat_readers/writer_kaldi.py
index f331160..0f8fb93 100644
--- a/example/speech-demo/io_func/feat_readers/writer_kaldi.py
+++ b/example/speech-demo/io_func/feat_readers/writer_kaldi.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 import numpy
 import struct
diff --git a/example/speech-demo/io_func/info.py b/example/speech-demo/io_func/info.py
index 64bb77d..eaf95ab 100644
--- a/example/speech-demo/io_func/info.py
+++ b/example/speech-demo/io_func/info.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 
 _mydir = os.path.dirname(__file__) or '.'
diff --git a/example/speech-demo/io_func/kaldi_parser.py b/example/speech-demo/io_func/kaldi_parser.py
index 8b1d678..10a373d 100644
--- a/example/speech-demo/io_func/kaldi_parser.py
+++ b/example/speech-demo/io_func/kaldi_parser.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import struct
 import numpy as num
@@ -199,4 +216,4 @@ if __name__ == '__main__':
             fout.write('<maxout> ' + str(int(layers[i + 1])) + ' ' + str(output_size) + '\n')
         else:
             fout.write('<sigmoid> ' + str(output_size) + ' ' + str(output_size) + '\n')
-"""
\ No newline at end of file
+"""
diff --git a/example/speech-demo/io_func/model_io.py b/example/speech-demo/io_func/model_io.py
index 1849663..8b6e043 100755
--- a/example/speech-demo/io_func/model_io.py
+++ b/example/speech-demo/io_func/model_io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import numpy as np
 import os
@@ -59,7 +76,7 @@ def _nnet2file(layers, set_layer_num = -1, filename='nnet.out', activation='sigm
         nnet_dict[dict_a] = array_2_string(layers[i].delta_params[0].get_value())
         dict_a = str(i) + ' ' + activation + ' db'
         nnet_dict[dict_a] = array_2_string(layers[i].delta_params[1].get_value())
-    
+
         if layers[i].kahan:
             logger.info("Loading hidden kahan")
             dict_a = str(i) + ' ' + activation + ' W_carry'
@@ -71,9 +88,9 @@ def _nnet2file(layers, set_layer_num = -1, filename='nnet.out', activation='sigm
             #dict_a = str(i) + ' ' + activation + ' db_carry'
             #nnet_dict[dict_a] = array_2_string(layers[i].delta_params_carry[1].get_value())
 
-    if withfinal: 
+    if withfinal:
         logger.info("Saving final layer ")
-        
+
         dict_a = 'logreg W'
         nnet_dict[dict_a] = array_2_string((1.0 - factor[-1]) * layers[-1].params[0].get_value())
         dict_a = 'logreg b'
@@ -96,7 +113,7 @@ def _nnet2file(layers, set_layer_num = -1, filename='nnet.out', activation='sigm
             #dict_a = 'logreg db_carry'
             #nnet_dict[dict_a] = array_2_string(layers[-1].delta_params_carry[1].get_value())
 
-    utils.pickle_save(nnet_dict, filename)   
+    utils.pickle_save(nnet_dict, filename)
 
 def zero(x):
     x.set_value(np.zeros_like(x.get_value(borrow=True), dtype=theano.config.floatX))
@@ -147,14 +164,14 @@ def _file2nnet(layers, set_layer_num = -1, filename='nnet.in', activation='sigmo
 
         dict_key = str(i) + ' ' + activation + ' W'
         layers[i].params[0].set_value(factors[i] * factor * np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
-        dict_key = str(i) + ' ' + activation + ' b' 
+        dict_key = str(i) + ' ' + activation + ' b'
         layers[i].params[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
 
         if gradients:
             dict_key = str(i) + ' ' + activation + ' dW'
             layers[i].delta_params[0].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
-            dict_key = str(i) + ' ' + activation + ' db' 
-            layers[i].delta_params[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))            
+            dict_key = str(i) + ' ' + activation + ' db'
+            layers[i].delta_params[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
         else:
             zero(layers[i].delta_params[0])
             zero(layers[i].delta_params[1])
@@ -164,12 +181,12 @@ def _file2nnet(layers, set_layer_num = -1, filename='nnet.in', activation='sigmo
             logger.info("Loading hidden kahan")
             dict_key = str(i) + ' ' + activation + ' W_carry'
             layers[i].params_carry[0].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
-            dict_key = str(i) + ' ' + activation + ' b_carry' 
-            layers[i].params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))            
+            dict_key = str(i) + ' ' + activation + ' b_carry'
+            layers[i].params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
             #dict_key = str(i) + ' ' + activation + ' dW_carry'
             #layers[i].delta_params_carry[0].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
-            #dict_key = str(i) + ' ' + activation + ' db_carry' 
-            #layers[i].delta_params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))            
+            #dict_key = str(i) + ' ' + activation + ' db_carry'
+            #layers[i].delta_params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
 
         if layers[i].sync:
             layers[i].params_sync[0].set_value(layers[i].params[0].get_value().astype('float32'))
@@ -197,12 +214,12 @@ def _file2nnet(layers, set_layer_num = -1, filename='nnet.in', activation='sigmo
             logger.info("Loading softmax kahan")
             dict_key = 'logreg W_carry'
             layers[-1].params_carry[0].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
-            dict_key = 'logreg b_carry' 
-            layers[-1].params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))            
+            dict_key = 'logreg b_carry'
+            layers[-1].params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
             #dict_key = 'logreg dW_carry'
             #layers[-1].delta_params_carry[0].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
-            #dict_key = 'logreg db_carry' 
-            #layers[-1].delta_params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))            
+            #dict_key = 'logreg db_carry'
+            #layers[-1].delta_params_carry[1].set_value(np.asarray(string_2_array(nnet_dict[dict_key]), dtype=theano.config.floatX))
 
         if layers[-1].sync:
             layers[-1].params_sync[0].set_value(layers[-1].params[0].get_value().astype('float32'))
@@ -220,10 +237,10 @@ def _cnn2file(conv_layers, filename='nnet.out', activation='sigmoid', withfinal=
     for i in xrange(n_layers):
        conv_layer = conv_layers[i]
        filter_shape = conv_layer.filter_shape
-       
+
        for next_X in xrange(filter_shape[0]):
            for this_X in xrange(filter_shape[1]):
-               dict_a = 'W ' + str(i) + ' ' + str(next_X) + ' ' + str(this_X) 
+               dict_a = 'W ' + str(i) + ' ' + str(next_X) + ' ' + str(this_X)
                if i == 0:
                    nnet_dict[dict_a] = array_2_string(input_factor * (conv_layer.W.get_value())[next_X, this_X])
                else:
@@ -231,7 +248,7 @@ def _cnn2file(conv_layers, filename='nnet.out', activation='sigmoid', withfinal=
 
        dict_a = 'b ' + str(i)
        nnet_dict[dict_a] = array_2_string(conv_layer.b.get_value())
-    
+
     with open(filename, 'wb') as fp:
         json.dump(nnet_dict, fp, indent=2, sort_keys = True)
         fp.flush()
@@ -252,7 +269,7 @@ def _file2cnn(conv_layers, filename='nnet.in', activation='sigmoid', withfinal=T
                 dict_a = 'W ' + str(i) + ' ' + str(next_X) + ' ' + str(this_X)
                 W_array[next_X, this_X, :, :] = factor * np.asarray(string_2_array(nnet_dict[dict_a]))
 
-        conv_layer.W.set_value(W_array) 
+        conv_layer.W.set_value(W_array)
 
         dict_a = 'b ' + str(i)
-        conv_layer.b.set_value(np.asarray(string_2_array(nnet_dict[dict_a]), dtype=theano.config.floatX)) 
+        conv_layer.b.set_value(np.asarray(string_2_array(nnet_dict[dict_a]), dtype=theano.config.floatX))
diff --git a/example/speech-demo/io_func/regr_feat_io.py b/example/speech-demo/io_func/regr_feat_io.py
index 2f3c4ec..a1737bf 100644
--- a/example/speech-demo/io_func/regr_feat_io.py
+++ b/example/speech-demo/io_func/regr_feat_io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import sys
 import random
@@ -36,7 +53,7 @@ class RegrDataReadStream(object):
     def make_shared(self):
         self.input.make_shared()
         self.output.make_shared()
-        
+
     def get_shared(self):
         iret = self.input.get_shared()
         oret = self.output.get_shared()
@@ -56,13 +73,13 @@ class RegrDataReadStream(object):
 
     def load_next_block(self):
         a = self.input.load_next_block()
-        b = self.output.load_next_block()        
+        b = self.output.load_next_block()
         assert(a == b)
         return a
 
     def get_state(self):
         a = self.input.get_state()
-        b = self.output.get_state()   
+        b = self.output.get_state()
         assert(a[0] == b[0])
         assert(a[2] == b[2])
         assert(a[3] == b[3])
@@ -72,4 +89,4 @@ class RegrDataReadStream(object):
 
     def set_state(self, state):
         self.input.set_state(state)
-        self.output.set_state(state)        
+        self.output.set_state(state)
diff --git a/example/speech-demo/io_func/utils.py b/example/speech-demo/io_func/utils.py
index 513261e..4ba8496 100644
--- a/example/speech-demo/io_func/utils.py
+++ b/example/speech-demo/io_func/utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys, subprocess, pickle, os, json, logging, socket
 import logging.config
 import datetime
diff --git a/example/speech-demo/io_util.py b/example/speech-demo/io_util.py
index 926f20f..e5bd74c 100644
--- a/example/speech-demo/io_util.py
+++ b/example/speech-demo/io_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 import sys
@@ -310,7 +327,7 @@ class TruncatedSentenceIter(mx.io.DataIter):
             self.data = [mx.nd.zeros((batch_size, truncate_len, feat_dim))]
             if has_label:
                 self.label = [mx.nd.zeros((batch_size, truncate_len))]
- 
+
         self.init_state_names = [x[0] for x in init_states]
         self.init_state_arrays = [mx.nd.zeros(x[1]) for x in init_states]
 
@@ -368,14 +385,14 @@ class TruncatedSentenceIter(mx.io.DataIter):
         next_utt_idx = self.batch_size
         is_pad = [False] * self.batch_size
         pad = 0
-        
+
         if self.time_major:
             np_data_buffer = np.zeros((self.truncate_len, self.batch_size, self.feat_dim))
             np_label_buffer = np.zeros((self.truncate_len, self.batch_size))
         else:
             np_data_buffer = np.zeros((self.batch_size, self.truncate_len, self.feat_dim))
             np_label_buffer = np.zeros((self.batch_size, self.truncate_len))
- 
+
         utt_id_buffer = [None] * self.batch_size
 
         data_names = [self.data_name] + self.init_state_names
@@ -432,7 +449,7 @@ class TruncatedSentenceIter(mx.io.DataIter):
                     else:
                         np_data_buffer[i, :n_take, :] = fea_utt[idx_take]
                         np_label_buffer[i, :n_take] = self.labels[idx][idx_take]
- 
+
                     if n_take < self.truncate_len:
                         if self.time_major:
                             np_data_buffer[n_take:, i, :] = 0
@@ -440,7 +457,7 @@ class TruncatedSentenceIter(mx.io.DataIter):
                         else:
                             np_data_buffer[i, n_take:, :] = 0
                             np_label_buffer[i, n_take:] = 0
- 
+
                         effective_sample_count -= self.truncate_len - n_take
 
                     utt_inside_idx[i] += n_take
@@ -450,11 +467,11 @@ class TruncatedSentenceIter(mx.io.DataIter):
             if pad == self.batch_size:
                 # finished all the senteces
                 break
-            
+
             self.data[0][:] = np_data_buffer
             self.label[0][:] = np_label_buffer
- 
-            data_batch = SimpleBatch(data_names, 
+
+            data_batch = SimpleBatch(data_names,
                                      self.data + self.init_state_arrays,
                                      label_names, self.label, bucket_key=None,
                                      utt_id=utt_id_buffer,
diff --git a/example/speech-demo/lstm_proj.py b/example/speech-demo/lstm_proj.py
index ae2271c..a27518c 100644
--- a/example/speech-demo/lstm_proj.py
+++ b/example/speech-demo/lstm_proj.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import mxnet as mx
 import numpy as np
@@ -17,7 +34,7 @@ def lstm(num_hidden, indata, prev_state, param, seqidx, layeridx, dropout=0., nu
     """LSTM Cell symbol"""
     if dropout > 0.:
         indata = mx.sym.Dropout(data=indata, p=dropout)
-    
+
     i2h = mx.sym.FullyConnected(data=indata,
                                 weight=param.i2h_weight,
                                 bias=param.i2h_bias,
@@ -115,7 +132,7 @@ def lstm_unroll(num_lstm_layer, seq_len, input_size,
     pred = mx.sym.Reshape(pred, shape=(-1, num_label))
     label = mx.sym.Reshape(label, shape=(-1,))
     if take_softmax:
-        sm = mx.sym.SoftmaxOutput(data=pred, label=label, ignore_label=0, 
+        sm = mx.sym.SoftmaxOutput(data=pred, label=label, ignore_label=0,
                                   use_ignore=True, name='softmax')
     else:
         sm = pred
diff --git a/example/speech-demo/make_stats.py b/example/speech-demo/make_stats.py
index 440f514..64991db 100644
--- a/example/speech-demo/make_stats.py
+++ b/example/speech-demo/make_stats.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import re
 import sys
 sys.path.insert(0, "../../python")
diff --git a/example/speech-demo/python_wrap/ctypes.cc b/example/speech-demo/python_wrap/ctypes.cc
index cd77d47..a2c7946 100644
--- a/example/speech-demo/python_wrap/ctypes.cc
+++ b/example/speech-demo/python_wrap/ctypes.cc
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 #include <iostream>
 
 #include "util/table-types.h"
@@ -143,7 +162,7 @@ extern "C" {
 
   RAPReader* RAPReader_new_char(char * rspecifier) {
     return new RAPReader(rspecifier);
-  }  
+  }
 
   //bool  HasKey (const std::string &key)
   int RAPReader_HasKey(RAPReader* r, char * key) {
@@ -178,7 +197,7 @@ extern "C" {
       }
       vals[row] = pair.first;
     }
-    
+
     return vals;
   }
 
diff --git a/example/speech-demo/python_wrap/example_usage/example.py b/example/speech-demo/python_wrap/example_usage/example.py
index 766bb6e..d930327 100644
--- a/example/speech-demo/python_wrap/example_usage/example.py
+++ b/example/speech-demo/python_wrap/example_usage/example.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import ctypes
 import numpy
@@ -53,7 +70,7 @@ if __name__ == "__main__":
     print("-------- Kaldi SBFMReader and MatrixF class example --------")
 
     reader = kaldi.SBFMReader_new_char("scp:data.scp")
-    
+
     # data.scp has exactly one utterance, assert it's there
     assert(not kaldi.SBFMReader_Done(reader))
 
@@ -63,7 +80,7 @@ if __name__ == "__main__":
     feat_rows = kaldi.MatrixF_NumRows(feat_value)
     feat_cols = kaldi.MatrixF_NumCols(feat_value)
     feat_data = kaldi.MatrixF_Data(feat_value)
-    
+
     # never use numpy.ndarray(buf=) or numpy.ctypeslib.as_array
     # because you don't know if Python or C owns buffer
     # (even if you numpy.copy() resulting array)
diff --git a/example/speech-demo/run_ami.sh b/example/speech-demo/run_ami.sh
index 6c4dc13..0103fd1 100755
--- a/example/speech-demo/run_ami.sh
+++ b/example/speech-demo/run_ami.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # This script trains and evaluate LSTM models. There is no
 # discriminative training yet.
 # In this recipe, MXNet directly read Kaldi features and labels,
diff --git a/example/speech-demo/run_timit.sh b/example/speech-demo/run_timit.sh
index 4bc037d..023ae6f 100755
--- a/example/speech-demo/run_timit.sh
+++ b/example/speech-demo/run_timit.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # This script trains and evaluate LSTM models. There is no
 # discriminative training yet.
 # In this recipe, MXNet directly read Kaldi features and labels,
diff --git a/example/speech-demo/speechSGD.py b/example/speech-demo/speechSGD.py
index 37fb89d..931f40a 100644
--- a/example/speech-demo/speechSGD.py
+++ b/example/speech-demo/speechSGD.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 from mxnet.ndarray import NDArray, zeros, clip, sqrt
@@ -44,7 +61,7 @@ class speechSGD(mx.optimizer.Optimizer):
             return None
         else:
             return zeros(weight.shape, weight.context, dtype=weight.dtype)
-    
+
     def _get_lr(self, index):
         """get learning rate for index.
 
diff --git a/example/speech-demo/tests/test_nothing.py b/example/speech-demo/tests/test_nothing.py
index 1436522..d6e810f 100644
--- a/example/speech-demo/tests/test_nothing.py
+++ b/example/speech-demo/tests/test_nothing.py
@@ -1,2 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 def test_nothing():
-	pass
\ No newline at end of file
+	pass
diff --git a/example/speech-demo/tests/test_system.py b/example/speech-demo/tests/test_system.py
index 9d2a4b9..a64879a 100644
--- a/example/speech-demo/tests/test_system.py
+++ b/example/speech-demo/tests/test_system.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 from pdnn.run_DNN import run_DNN
 from pdnn.run_RBM import run_RBM
@@ -37,7 +54,7 @@ def test_rbm_dnn():
         "with_final": 1
     }
     mnist_conf["train_rbm"]["max_iters"] = 0
-    run_RBM(mnist_conf)    
+    run_RBM(mnist_conf)
 
 def test_sda_dnn():
     banner("sda dnn")
@@ -60,7 +77,7 @@ def test_sda_dnn():
         "with_final": 1
     }
     mnist_conf["train_sda"]["max_iters"] = 1
-    run_SDA(mnist_conf)    
+    run_SDA(mnist_conf)
 
 def test_dnn_eval():
     banner("dnn cv")
@@ -82,7 +99,7 @@ def test_dnn_eval():
     eval_DNN(mnist_conf)
 
     mnist_conf["eval_dnn"] = {"mode": "per-feat", "batch_size": 1024}
-    eval_DNN(mnist_conf)    
+    eval_DNN(mnist_conf)
 
 def test_dropout():
     banner("dropout")
diff --git a/example/speech-demo/train_lstm_proj.py b/example/speech-demo/train_lstm_proj.py
index d2a7a27..5749b0c 100644
--- a/example/speech-demo/train_lstm_proj.py
+++ b/example/speech-demo/train_lstm_proj.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import re
 import sys
 sys.path.insert(0, "../../python")
@@ -129,7 +146,7 @@ def do_training(training_method, args, module, data_train, data_val):
     mkpath(os.path.dirname(get_checkpoint_path(args)))
 
     batch_size = data_train.batch_size
-    batch_end_callbacks = [mx.callback.Speedometer(batch_size, 
+    batch_end_callbacks = [mx.callback.Speedometer(batch_size,
                                                    args.config.getint('train', 'show_every'))]
     eval_allow_extra = True if training_method == METHOD_TBPTT else False
     eval_metric = [mx.metric.np(CrossEntropy, allow_extra_outputs=eval_allow_extra),
diff --git a/example/speech_recognition/arch_deepspeech.py b/example/speech_recognition/arch_deepspeech.py
index 4288b24..e5b3d43 100644
--- a/example/speech_recognition/arch_deepspeech.py
+++ b/example/speech_recognition/arch_deepspeech.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111, too-many-statements, too-many-locals
 # pylint: too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
diff --git a/example/speech_recognition/config_util.py b/example/speech_recognition/config_util.py
index e8b7fda..0973391 100644
--- a/example/speech_recognition/config_util.py
+++ b/example/speech_recognition/config_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import os
 import re
diff --git a/example/speech_recognition/flac_to_wav.sh b/example/speech_recognition/flac_to_wav.sh
index a622b60..7fd53f8 100644
--- a/example/speech_recognition/flac_to_wav.sh
+++ b/example/speech_recognition/flac_to_wav.sh
@@ -1,8 +1,25 @@
-# Convert all .flac files within this folder to .wav files
-
-find . -iname "*.flac" | wc
-
-for flacfile in `find . -iname "*.flac"`
-do
-    sox "${flacfile%.*}.flac" -e signed -b 16 -c 1 -r 16000 "${flacfile%.*}.wav"
-done
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Convert all .flac files within this folder to .wav files
+
+find . -iname "*.flac" | wc
+
+for flacfile in `find . -iname "*.flac"`
+do
+    sox "${flacfile%.*}.flac" -e signed -b 16 -c 1 -r 16000 "${flacfile%.*}.wav"
+done
diff --git a/example/speech_recognition/label_util.py b/example/speech_recognition/label_util.py
index 3eb56c5..dab1d1e 100644
--- a/example/speech_recognition/label_util.py
+++ b/example/speech_recognition/label_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # -*- coding: utf-8 -*-
 
 import csv
diff --git a/example/speech_recognition/log_util.py b/example/speech_recognition/log_util.py
index 097cfbd..e61407f 100644
--- a/example/speech_recognition/log_util.py
+++ b/example/speech_recognition/log_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import logging
 import logging.handlers
 
diff --git a/example/speech_recognition/main.py b/example/speech_recognition/main.py
index 4ecb116..e450263 100644
--- a/example/speech_recognition/main.py
+++ b/example/speech_recognition/main.py
@@ -1,351 +1,368 @@
-import json
-import os
-import sys
-from collections import namedtuple
-from datetime import datetime
-from config_util import parse_args, parse_contexts, generate_file_path
-from train import do_training
-import mxnet as mx
-from stt_io_iter import STTIter
-from label_util import LabelUtil
-from log_util import LogUtil
-import numpy as np
-from stt_datagenerator import DataGenerator
-from stt_metric import STTMetric
-from stt_bi_graphemes_util import generate_bi_graphemes_dictionary
-from stt_bucketing_module import STTBucketingModule
-from stt_io_bucketingiter import BucketSTTIter
-sys.path.insert(0, "../../python")
-
-# os.environ['MXNET_ENGINE_TYPE'] = "NaiveEngine"
-os.environ['MXNET_ENGINE_TYPE'] = "ThreadedEnginePerDevice"
-os.environ['MXNET_ENABLE_GPU_P2P'] = "0"
-
-class WHCS:
-    width = 0
-    height = 0
-    channel = 0
-    stride = 0
-
-class ConfigLogger(object):
-    def __init__(self, log):
-        self.__log = log
-
-    def __call__(self, config):
-        self.__log.info("Config:")
-        config.write(self)
-
-    def write(self, data):
-        # stripping the data makes the output nicer and avoids empty lines
-        line = data.strip()
-        self.__log.info(line)
-
-def load_labelutil(labelUtil, is_bi_graphemes, language="en"):
-    if language == "en":
-        if is_bi_graphemes:
-            try:
-                labelUtil.load_unicode_set("resources/unicodemap_en_baidu_bi_graphemes.csv")
-            except:
-                raise Exception("There is no resources/unicodemap_en_baidu_bi_graphemes.csv." +
-                                " Please set overwrite_bi_graphemes_dictionary True at train section")
-        else:
-            labelUtil.load_unicode_set("resources/unicodemap_en_baidu.csv")
-    else:
-        raise Exception("Error: Language Type: %s" % language)
-
-
-
-def load_data(args):
-    mode = args.config.get('common', 'mode')
-    if mode not in ['train', 'predict', 'load']:
-        raise Exception('mode must be the one of the followings - train,predict,load')
-    batch_size = args.config.getint('common', 'batch_size')
-
-    whcs = WHCS()
-    whcs.width = args.config.getint('data', 'width')
-    whcs.height = args.config.getint('data', 'height')
-    whcs.channel = args.config.getint('data', 'channel')
-    whcs.stride = args.config.getint('data', 'stride')
-    save_dir = 'checkpoints'
-    model_name = args.config.get('common', 'prefix')
-    is_bi_graphemes = args.config.getboolean('common', 'is_bi_graphemes')
-    overwrite_meta_files = args.config.getboolean('train', 'overwrite_meta_files')
-    overwrite_bi_graphemes_dictionary = args.config.getboolean('train', 'overwrite_bi_graphemes_dictionary')
-    max_duration = args.config.getfloat('data', 'max_duration')
-    language = args.config.get('data', 'language')
-
-    log = LogUtil().getlogger()
-    labelUtil = LabelUtil.getInstance()
-    if mode == "train" or mode == "load":
-        data_json = args.config.get('data', 'train_json')
-        val_json = args.config.get('data', 'val_json')
-        datagen = DataGenerator(save_dir=save_dir, model_name=model_name)
-        datagen.load_train_data(data_json, max_duration=max_duration)
-        datagen.load_validation_data(val_json, max_duration=max_duration)
-        if is_bi_graphemes:
-            if not os.path.isfile("resources/unicodemap_en_baidu_bi_graphemes.csv") or overwrite_bi_graphemes_dictionary:
-                load_labelutil(labelUtil=labelUtil, is_bi_graphemes=False, language=language)
-                generate_bi_graphemes_dictionary(datagen.train_texts+datagen.val_texts)
-        load_labelutil(labelUtil=labelUtil, is_bi_graphemes=is_bi_graphemes, language=language)
-        args.config.set('arch', 'n_classes', str(labelUtil.get_count()))
-
-        if mode == "train":
-            if overwrite_meta_files:
-                log.info("Generate mean and std from samples")
-                normalize_target_k = args.config.getint('train', 'normalize_target_k')
-                datagen.sample_normalize(normalize_target_k, True)
-            else:
-                log.info("Read mean and std from meta files")
-                datagen.get_meta_from_file(
-                    np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')),
-                    np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std')))
-        elif mode == "load":
-            # get feat_mean and feat_std to normalize dataset
-            datagen.get_meta_from_file(
-                np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')),
-                np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std')))
-
-    elif mode == 'predict':
-        test_json = args.config.get('data', 'test_json')
-        datagen = DataGenerator(save_dir=save_dir, model_name=model_name)
-        datagen.load_train_data(test_json, max_duration=max_duration)
-        labelutil = load_labelutil(labelUtil, is_bi_graphemes, language="en")
-        args.config.set('arch', 'n_classes', str(labelUtil.get_count()))
-        datagen.get_meta_from_file(
-            np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')),
-            np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std')))
-
-    is_batchnorm = args.config.getboolean('arch', 'is_batchnorm')
-    if batch_size == 1 and is_batchnorm and (mode == 'train' or mode == 'load'):
-        raise Warning('batch size 1 is too small for is_batchnorm')
-
-    # sort file paths by its duration in ascending order to implement sortaGrad
-    if mode == "train" or mode == "load":
-        max_t_count = datagen.get_max_seq_length(partition="train")
-        max_label_length = \
-            datagen.get_max_label_length(partition="train", is_bi_graphemes=is_bi_graphemes)
-    elif mode == "predict":
-        max_t_count = datagen.get_max_seq_length(partition="test")
-        max_label_length = \
-            datagen.get_max_label_length(partition="test", is_bi_graphemes=is_bi_graphemes)
-
-    args.config.set('arch', 'max_t_count', str(max_t_count))
-    args.config.set('arch', 'max_label_length', str(max_label_length))
-    from importlib import import_module
-    prepare_data_template = import_module(args.config.get('arch', 'arch_file'))
-    init_states = prepare_data_template.prepare_data(args)
-    sort_by_duration = (mode == "train")
-    is_bucketing = args.config.getboolean('arch', 'is_bucketing')
-    save_feature_as_csvfile = args.config.getboolean('train', 'save_feature_as_csvfile')
-    if is_bucketing:
-        buckets = json.loads(args.config.get('arch', 'buckets'))
-        data_loaded = BucketSTTIter(partition="train",
-                                    count=datagen.count,
-                                    datagen=datagen,
-                                    batch_size=batch_size,
-                                    num_label=max_label_length,
-                                    init_states=init_states,
-                                    seq_length=max_t_count,
-                                    width=whcs.width,
-                                    height=whcs.height,
-                                    sort_by_duration=sort_by_duration,
-                                    is_bi_graphemes=is_bi_graphemes,
-                                    buckets=buckets,
-                                    save_feature_as_csvfile=save_feature_as_csvfile)
-    else:
-        data_loaded = STTIter(partition="train",
-                              count=datagen.count,
-                              datagen=datagen,
-                              batch_size=batch_size,
-                              num_label=max_label_length,
-                              init_states=init_states,
-                              seq_length=max_t_count,
-                              width=whcs.width,
-                              height=whcs.height,
-                              sort_by_duration=sort_by_duration,
-                              is_bi_graphemes=is_bi_graphemes,
-                              save_feature_as_csvfile=save_feature_as_csvfile)
-
-    if mode == 'train' or mode == 'load':
-        if is_bucketing:
-            validation_loaded = BucketSTTIter(partition="validation",
-                                              count=datagen.val_count,
-                                              datagen=datagen,
-                                              batch_size=batch_size,
-                                              num_label=max_label_length,
-                                              init_states=init_states,
-                                              seq_length=max_t_count,
-                                              width=whcs.width,
-                                              height=whcs.height,
-                                              sort_by_duration=False,
-                                              is_bi_graphemes=is_bi_graphemes,
-                                              buckets=buckets,
-                                              save_feature_as_csvfile=save_feature_as_csvfile)
-        else:
-            validation_loaded = STTIter(partition="validation",
-                                        count=datagen.val_count,
-                                        datagen=datagen,
-                                        batch_size=batch_size,
-                                        num_label=max_label_length,
-                                        init_states=init_states,
-                                        seq_length=max_t_count,
-                                        width=whcs.width,
-                                        height=whcs.height,
-                                        sort_by_duration=False,
-                                        is_bi_graphemes=is_bi_graphemes,
-                                        save_feature_as_csvfile=save_feature_as_csvfile)
-        return data_loaded, validation_loaded, args
-    elif mode == 'predict':
-        return data_loaded, args
-
-
-def load_model(args, contexts, data_train):
-    # load model from model_name prefix and epoch of model_num_epoch with gpu contexts of contexts
-    mode = args.config.get('common', 'mode')
-    load_optimizer_states = args.config.getboolean('load', 'load_optimizer_states')
-    is_start_from_batch = args.config.getboolean('load', 'is_start_from_batch')
-
-    from importlib import import_module
-    symbol_template = import_module(args.config.get('arch', 'arch_file'))
-    is_bucketing = args.config.getboolean('arch', 'is_bucketing')
-
-    if mode == 'train':
-        if is_bucketing:
-            bucketing_arch = symbol_template.BucketingArch(args)
-            model_loaded = bucketing_arch.get_sym_gen()
-        else:
-            model_loaded = symbol_template.arch(args)
-        model_num_epoch = None
-    elif mode == 'load' or mode == 'predict':
-        model_file = args.config.get('common', 'model_file')
-        model_name = os.path.splitext(model_file)[0]
-        model_num_epoch = int(model_name[-4:])
-        if is_bucketing:
-            bucketing_arch = symbol_template.BucketingArch(args)
-            model_loaded = bucketing_arch.get_sym_gen()
-        else:
-            model_path = 'checkpoints/' + str(model_name[:-5])
-
-            data_names = [x[0] for x in data_train.provide_data]
-            label_names = [x[0] for x in data_train.provide_label]
-
-            model_loaded = mx.module.Module.load(
-                prefix=model_path, epoch=model_num_epoch, context=contexts,
-                data_names=data_names, label_names=label_names,
-                load_optimizer_states=load_optimizer_states)
-        if is_start_from_batch:
-            import re
-            model_num_epoch = int(re.findall('\d+', model_file)[0])
-
-    return model_loaded, model_num_epoch
-
-
-if __name__ == '__main__':
-    if len(sys.argv) <= 1:
-        raise Exception('cfg file path must be provided. ' +
-                        'ex)python main.py --configfile examplecfg.cfg')
-    args = parse_args(sys.argv[1])
-    # set parameters from cfg file
-    # give random seed
-    random_seed = args.config.getint('common', 'random_seed')
-    mx_random_seed = args.config.getint('common', 'mx_random_seed')
-    # random seed for shuffling data list
-    if random_seed != -1:
-        np.random.seed(random_seed)
-    # set mx.random.seed to give seed for parameter initialization
-    if mx_random_seed != -1:
-        mx.random.seed(mx_random_seed)
-    else:
-        mx.random.seed(hash(datetime.now()))
-    # set log file name
-    log_filename = args.config.get('common', 'log_filename')
-    log = LogUtil(filename=log_filename).getlogger()
-
-    # set parameters from data section(common)
-    mode = args.config.get('common', 'mode')
-    if mode not in ['train', 'predict', 'load']:
-        raise Exception(
-            'Define mode in the cfg file first. ' +
-            'train or predict or load can be the candidate for the mode.')
-
-    # get meta file where character to number conversions are defined
-
-    contexts = parse_contexts(args)
-    num_gpu = len(contexts)
-    batch_size = args.config.getint('common', 'batch_size')
-    # check the number of gpus is positive divisor of the batch size for data parallel
-    if batch_size % num_gpu != 0:
-        raise Exception('num_gpu should be positive divisor of batch_size')
-    if mode == "train" or mode == "load":
-        data_train, data_val, args = load_data(args)
-    elif mode == "predict":
-        data_train, args = load_data(args)
-    is_batchnorm = args.config.getboolean('arch', 'is_batchnorm')
-    is_bucketing = args.config.getboolean('arch', 'is_bucketing')
-
-    # log current config
-    config_logger = ConfigLogger(log)
-    config_logger(args.config)
-
-    # load model
-    model_loaded, model_num_epoch = load_model(args, contexts, data_train)
-    # if mode is 'train', it trains the model
-    if mode == 'train':
-        if is_bucketing:
-            module = STTBucketingModule(
-                sym_gen=model_loaded,
-                default_bucket_key=data_train.default_bucket_key,
-                context=contexts
-                )
-        else:
-            data_names = [x[0] for x in data_train.provide_data]
-            label_names = [x[0] for x in data_train.provide_label]
-            module = mx.mod.Module(model_loaded, context=contexts,
-                                   data_names=data_names, label_names=label_names)
-        do_training(args=args, module=module, data_train=data_train, data_val=data_val)
-    # if mode is 'load', it loads model from the checkpoint and continues the training.
-    elif mode == 'load':
-        do_training(args=args, module=model_loaded, data_train=data_train, data_val=data_val,
-                    begin_epoch=model_num_epoch + 1)
-    # if mode is 'predict', it predict label from the input by the input model
-    elif mode == 'predict':
-        # predict through data
-        if is_bucketing:
-            max_t_count = args.config.getint('arch', 'max_t_count')
-            load_optimizer_states = args.config.getboolean('load', 'load_optimizer_states')
-            model_file = args.config.get('common', 'model_file')
-            model_name = os.path.splitext(model_file)[0]
-            model_num_epoch = int(model_name[-4:])
-
-            model_path = 'checkpoints/' + str(model_name[:-5])
-            model = STTBucketingModule(
-                sym_gen=model_loaded,
-                default_bucket_key=data_train.default_bucket_key,
-                context=contexts
-                )
-
-            model.bind(data_shapes=data_train.provide_data,
-                       label_shapes=data_train.provide_label,
-                       for_training=True)
-            _, arg_params, aux_params = mx.model.load_checkpoint(model_path, model_num_epoch)
-            model.set_params(arg_params, aux_params)
-            model_loaded = model
-        else:
-            model_loaded.bind(for_training=False, data_shapes=data_train.provide_data,
-                              label_shapes=data_train.provide_label)
-        max_t_count = args.config.getint('arch', 'max_t_count')
-        eval_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu)
-        if is_batchnorm:
-            for nbatch, data_batch in enumerate(data_train):
-                model_loaded.forward(data_batch, is_train=False)
-                model_loaded.update_metric(eval_metric, data_batch.label)
-        else:
-            #model_loaded.score(eval_data=data_train, num_batch=None,
-            #                   eval_metric=eval_metric, reset=True)
-            for nbatch, data_batch in enumerate(data_train):
-                model_loaded.forward(data_batch, is_train=False)
-                model_loaded.update_metric(eval_metric, data_batch.label)
-    else:
-        raise Exception(
-            'Define mode in the cfg file first. ' +
-            'train or predict or load can be the candidate for the mode')
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import os
+import sys
+from collections import namedtuple
+from datetime import datetime
+from config_util import parse_args, parse_contexts, generate_file_path
+from train import do_training
+import mxnet as mx
+from stt_io_iter import STTIter
+from label_util import LabelUtil
+from log_util import LogUtil
+import numpy as np
+from stt_datagenerator import DataGenerator
+from stt_metric import STTMetric
+from stt_bi_graphemes_util import generate_bi_graphemes_dictionary
+from stt_bucketing_module import STTBucketingModule
+from stt_io_bucketingiter import BucketSTTIter
+sys.path.insert(0, "../../python")
+
+# os.environ['MXNET_ENGINE_TYPE'] = "NaiveEngine"
+os.environ['MXNET_ENGINE_TYPE'] = "ThreadedEnginePerDevice"
+os.environ['MXNET_ENABLE_GPU_P2P'] = "0"
+
+class WHCS:
+    width = 0
+    height = 0
+    channel = 0
+    stride = 0
+
+class ConfigLogger(object):
+    def __init__(self, log):
+        self.__log = log
+
+    def __call__(self, config):
+        self.__log.info("Config:")
+        config.write(self)
+
+    def write(self, data):
+        # stripping the data makes the output nicer and avoids empty lines
+        line = data.strip()
+        self.__log.info(line)
+
+def load_labelutil(labelUtil, is_bi_graphemes, language="en"):
+    if language == "en":
+        if is_bi_graphemes:
+            try:
+                labelUtil.load_unicode_set("resources/unicodemap_en_baidu_bi_graphemes.csv")
+            except:
+                raise Exception("There is no resources/unicodemap_en_baidu_bi_graphemes.csv." +
+                                " Please set overwrite_bi_graphemes_dictionary True at train section")
+        else:
+            labelUtil.load_unicode_set("resources/unicodemap_en_baidu.csv")
+    else:
+        raise Exception("Error: Language Type: %s" % language)
+
+
+
+def load_data(args):
+    mode = args.config.get('common', 'mode')
+    if mode not in ['train', 'predict', 'load']:
+        raise Exception('mode must be the one of the followings - train,predict,load')
+    batch_size = args.config.getint('common', 'batch_size')
+
+    whcs = WHCS()
+    whcs.width = args.config.getint('data', 'width')
+    whcs.height = args.config.getint('data', 'height')
+    whcs.channel = args.config.getint('data', 'channel')
+    whcs.stride = args.config.getint('data', 'stride')
+    save_dir = 'checkpoints'
+    model_name = args.config.get('common', 'prefix')
+    is_bi_graphemes = args.config.getboolean('common', 'is_bi_graphemes')
+    overwrite_meta_files = args.config.getboolean('train', 'overwrite_meta_files')
+    overwrite_bi_graphemes_dictionary = args.config.getboolean('train', 'overwrite_bi_graphemes_dictionary')
+    max_duration = args.config.getfloat('data', 'max_duration')
+    language = args.config.get('data', 'language')
+
+    log = LogUtil().getlogger()
+    labelUtil = LabelUtil.getInstance()
+    if mode == "train" or mode == "load":
+        data_json = args.config.get('data', 'train_json')
+        val_json = args.config.get('data', 'val_json')
+        datagen = DataGenerator(save_dir=save_dir, model_name=model_name)
+        datagen.load_train_data(data_json, max_duration=max_duration)
+        datagen.load_validation_data(val_json, max_duration=max_duration)
+        if is_bi_graphemes:
+            if not os.path.isfile("resources/unicodemap_en_baidu_bi_graphemes.csv") or overwrite_bi_graphemes_dictionary:
+                load_labelutil(labelUtil=labelUtil, is_bi_graphemes=False, language=language)
+                generate_bi_graphemes_dictionary(datagen.train_texts+datagen.val_texts)
+        load_labelutil(labelUtil=labelUtil, is_bi_graphemes=is_bi_graphemes, language=language)
+        args.config.set('arch', 'n_classes', str(labelUtil.get_count()))
+
+        if mode == "train":
+            if overwrite_meta_files:
+                log.info("Generate mean and std from samples")
+                normalize_target_k = args.config.getint('train', 'normalize_target_k')
+                datagen.sample_normalize(normalize_target_k, True)
+            else:
+                log.info("Read mean and std from meta files")
+                datagen.get_meta_from_file(
+                    np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')),
+                    np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std')))
+        elif mode == "load":
+            # get feat_mean and feat_std to normalize dataset
+            datagen.get_meta_from_file(
+                np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')),
+                np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std')))
+
+    elif mode == 'predict':
+        test_json = args.config.get('data', 'test_json')
+        datagen = DataGenerator(save_dir=save_dir, model_name=model_name)
+        datagen.load_train_data(test_json, max_duration=max_duration)
+        labelutil = load_labelutil(labelUtil, is_bi_graphemes, language="en")
+        args.config.set('arch', 'n_classes', str(labelUtil.get_count()))
+        datagen.get_meta_from_file(
+            np.loadtxt(generate_file_path(save_dir, model_name, 'feats_mean')),
+            np.loadtxt(generate_file_path(save_dir, model_name, 'feats_std')))
+
+    is_batchnorm = args.config.getboolean('arch', 'is_batchnorm')
+    if batch_size == 1 and is_batchnorm and (mode == 'train' or mode == 'load'):
+        raise Warning('batch size 1 is too small for is_batchnorm')
+
+    # sort file paths by its duration in ascending order to implement sortaGrad
+    if mode == "train" or mode == "load":
+        max_t_count = datagen.get_max_seq_length(partition="train")
+        max_label_length = \
+            datagen.get_max_label_length(partition="train", is_bi_graphemes=is_bi_graphemes)
+    elif mode == "predict":
+        max_t_count = datagen.get_max_seq_length(partition="test")
+        max_label_length = \
+            datagen.get_max_label_length(partition="test", is_bi_graphemes=is_bi_graphemes)
+
+    args.config.set('arch', 'max_t_count', str(max_t_count))
+    args.config.set('arch', 'max_label_length', str(max_label_length))
+    from importlib import import_module
+    prepare_data_template = import_module(args.config.get('arch', 'arch_file'))
+    init_states = prepare_data_template.prepare_data(args)
+    sort_by_duration = (mode == "train")
+    is_bucketing = args.config.getboolean('arch', 'is_bucketing')
+    save_feature_as_csvfile = args.config.getboolean('train', 'save_feature_as_csvfile')
+    if is_bucketing:
+        buckets = json.loads(args.config.get('arch', 'buckets'))
+        data_loaded = BucketSTTIter(partition="train",
+                                    count=datagen.count,
+                                    datagen=datagen,
+                                    batch_size=batch_size,
+                                    num_label=max_label_length,
+                                    init_states=init_states,
+                                    seq_length=max_t_count,
+                                    width=whcs.width,
+                                    height=whcs.height,
+                                    sort_by_duration=sort_by_duration,
+                                    is_bi_graphemes=is_bi_graphemes,
+                                    buckets=buckets,
+                                    save_feature_as_csvfile=save_feature_as_csvfile)
+    else:
+        data_loaded = STTIter(partition="train",
+                              count=datagen.count,
+                              datagen=datagen,
+                              batch_size=batch_size,
+                              num_label=max_label_length,
+                              init_states=init_states,
+                              seq_length=max_t_count,
+                              width=whcs.width,
+                              height=whcs.height,
+                              sort_by_duration=sort_by_duration,
+                              is_bi_graphemes=is_bi_graphemes,
+                              save_feature_as_csvfile=save_feature_as_csvfile)
+
+    if mode == 'train' or mode == 'load':
+        if is_bucketing:
+            validation_loaded = BucketSTTIter(partition="validation",
+                                              count=datagen.val_count,
+                                              datagen=datagen,
+                                              batch_size=batch_size,
+                                              num_label=max_label_length,
+                                              init_states=init_states,
+                                              seq_length=max_t_count,
+                                              width=whcs.width,
+                                              height=whcs.height,
+                                              sort_by_duration=False,
+                                              is_bi_graphemes=is_bi_graphemes,
+                                              buckets=buckets,
+                                              save_feature_as_csvfile=save_feature_as_csvfile)
+        else:
+            validation_loaded = STTIter(partition="validation",
+                                        count=datagen.val_count,
+                                        datagen=datagen,
+                                        batch_size=batch_size,
+                                        num_label=max_label_length,
+                                        init_states=init_states,
+                                        seq_length=max_t_count,
+                                        width=whcs.width,
+                                        height=whcs.height,
+                                        sort_by_duration=False,
+                                        is_bi_graphemes=is_bi_graphemes,
+                                        save_feature_as_csvfile=save_feature_as_csvfile)
+        return data_loaded, validation_loaded, args
+    elif mode == 'predict':
+        return data_loaded, args
+
+
+def load_model(args, contexts, data_train):
+    # load model from model_name prefix and epoch of model_num_epoch with gpu contexts of contexts
+    mode = args.config.get('common', 'mode')
+    load_optimizer_states = args.config.getboolean('load', 'load_optimizer_states')
+    is_start_from_batch = args.config.getboolean('load', 'is_start_from_batch')
+
+    from importlib import import_module
+    symbol_template = import_module(args.config.get('arch', 'arch_file'))
+    is_bucketing = args.config.getboolean('arch', 'is_bucketing')
+
+    if mode == 'train':
+        if is_bucketing:
+            bucketing_arch = symbol_template.BucketingArch(args)
+            model_loaded = bucketing_arch.get_sym_gen()
+        else:
+            model_loaded = symbol_template.arch(args)
+        model_num_epoch = None
+    elif mode == 'load' or mode == 'predict':
+        model_file = args.config.get('common', 'model_file')
+        model_name = os.path.splitext(model_file)[0]
+        model_num_epoch = int(model_name[-4:])
+        if is_bucketing:
+            bucketing_arch = symbol_template.BucketingArch(args)
+            model_loaded = bucketing_arch.get_sym_gen()
+        else:
+            model_path = 'checkpoints/' + str(model_name[:-5])
+
+            data_names = [x[0] for x in data_train.provide_data]
+            label_names = [x[0] for x in data_train.provide_label]
+
+            model_loaded = mx.module.Module.load(
+                prefix=model_path, epoch=model_num_epoch, context=contexts,
+                data_names=data_names, label_names=label_names,
+                load_optimizer_states=load_optimizer_states)
+        if is_start_from_batch:
+            import re
+            model_num_epoch = int(re.findall('\d+', model_file)[0])
+
+    return model_loaded, model_num_epoch
+
+
+if __name__ == '__main__':
+    if len(sys.argv) <= 1:
+        raise Exception('cfg file path must be provided. ' +
+                        'ex)python main.py --configfile examplecfg.cfg')
+    args = parse_args(sys.argv[1])
+    # set parameters from cfg file
+    # give random seed
+    random_seed = args.config.getint('common', 'random_seed')
+    mx_random_seed = args.config.getint('common', 'mx_random_seed')
+    # random seed for shuffling data list
+    if random_seed != -1:
+        np.random.seed(random_seed)
+    # set mx.random.seed to give seed for parameter initialization
+    if mx_random_seed != -1:
+        mx.random.seed(mx_random_seed)
+    else:
+        mx.random.seed(hash(datetime.now()))
+    # set log file name
+    log_filename = args.config.get('common', 'log_filename')
+    log = LogUtil(filename=log_filename).getlogger()
+
+    # set parameters from data section(common)
+    mode = args.config.get('common', 'mode')
+    if mode not in ['train', 'predict', 'load']:
+        raise Exception(
+            'Define mode in the cfg file first. ' +
+            'train or predict or load can be the candidate for the mode.')
+
+    # get meta file where character to number conversions are defined
+
+    contexts = parse_contexts(args)
+    num_gpu = len(contexts)
+    batch_size = args.config.getint('common', 'batch_size')
+    # check the number of gpus is positive divisor of the batch size for data parallel
+    if batch_size % num_gpu != 0:
+        raise Exception('num_gpu should be positive divisor of batch_size')
+    if mode == "train" or mode == "load":
+        data_train, data_val, args = load_data(args)
+    elif mode == "predict":
+        data_train, args = load_data(args)
+    is_batchnorm = args.config.getboolean('arch', 'is_batchnorm')
+    is_bucketing = args.config.getboolean('arch', 'is_bucketing')
+
+    # log current config
+    config_logger = ConfigLogger(log)
+    config_logger(args.config)
+
+    # load model
+    model_loaded, model_num_epoch = load_model(args, contexts, data_train)
+    # if mode is 'train', it trains the model
+    if mode == 'train':
+        if is_bucketing:
+            module = STTBucketingModule(
+                sym_gen=model_loaded,
+                default_bucket_key=data_train.default_bucket_key,
+                context=contexts
+                )
+        else:
+            data_names = [x[0] for x in data_train.provide_data]
+            label_names = [x[0] for x in data_train.provide_label]
+            module = mx.mod.Module(model_loaded, context=contexts,
+                                   data_names=data_names, label_names=label_names)
+        do_training(args=args, module=module, data_train=data_train, data_val=data_val)
+    # if mode is 'load', it loads model from the checkpoint and continues the training.
+    elif mode == 'load':
+        do_training(args=args, module=model_loaded, data_train=data_train, data_val=data_val,
+                    begin_epoch=model_num_epoch + 1)
+    # if mode is 'predict', it predict label from the input by the input model
+    elif mode == 'predict':
+        # predict through data
+        if is_bucketing:
+            max_t_count = args.config.getint('arch', 'max_t_count')
+            load_optimizer_states = args.config.getboolean('load', 'load_optimizer_states')
+            model_file = args.config.get('common', 'model_file')
+            model_name = os.path.splitext(model_file)[0]
+            model_num_epoch = int(model_name[-4:])
+
+            model_path = 'checkpoints/' + str(model_name[:-5])
+            model = STTBucketingModule(
+                sym_gen=model_loaded,
+                default_bucket_key=data_train.default_bucket_key,
+                context=contexts
+                )
+
+            model.bind(data_shapes=data_train.provide_data,
+                       label_shapes=data_train.provide_label,
+                       for_training=True)
+            _, arg_params, aux_params = mx.model.load_checkpoint(model_path, model_num_epoch)
+            model.set_params(arg_params, aux_params)
+            model_loaded = model
+        else:
+            model_loaded.bind(for_training=False, data_shapes=data_train.provide_data,
+                              label_shapes=data_train.provide_label)
+        max_t_count = args.config.getint('arch', 'max_t_count')
+        eval_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu)
+        if is_batchnorm:
+            for nbatch, data_batch in enumerate(data_train):
+                model_loaded.forward(data_batch, is_train=False)
+                model_loaded.update_metric(eval_metric, data_batch.label)
+        else:
+            #model_loaded.score(eval_data=data_train, num_batch=None,
+            #                   eval_metric=eval_metric, reset=True)
+            for nbatch, data_batch in enumerate(data_train):
+                model_loaded.forward(data_batch, is_train=False)
+                model_loaded.update_metric(eval_metric, data_batch.label)
+    else:
+        raise Exception(
+            'Define mode in the cfg file first. ' +
+            'train or predict or load can be the candidate for the mode')
diff --git a/example/speech_recognition/singleton.py b/example/speech_recognition/singleton.py
index 16f129b..aa9531b 100644
--- a/example/speech_recognition/singleton.py
+++ b/example/speech_recognition/singleton.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import logging as log
 
 class Singleton:
diff --git a/example/speech_recognition/stt_bi_graphemes_util.py b/example/speech_recognition/stt_bi_graphemes_util.py
index b8246a0..7ac8314 100644
--- a/example/speech_recognition/stt_bi_graphemes_util.py
+++ b/example/speech_recognition/stt_bi_graphemes_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import csv
 from collections import Counter
 
diff --git a/example/speech_recognition/stt_bucketing_module.py b/example/speech_recognition/stt_bucketing_module.py
index 796a336..073f6bf 100644
--- a/example/speech_recognition/stt_bucketing_module.py
+++ b/example/speech_recognition/stt_bucketing_module.py
@@ -1,13 +1,30 @@
-import mxnet as mx
-
-
-class STTBucketingModule(mx.mod.BucketingModule):
-
-    def save_checkpoint(self, prefix, epoch, save_optimizer_states=False):
-        symbol, data_names, label_names = self._sym_gen(self._default_bucket_key)
-        symbol.save('%s-symbol.json' % prefix)
-        param_name = '%s-%04d.params' % (prefix, epoch)
-        self.save_params(param_name)
-        if save_optimizer_states:
-            state_name = '%s-%04d.states' % (prefix, epoch)
-            self._curr_module.save_optimizer_states(state_name)
\ No newline at end of file
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+
+
+class STTBucketingModule(mx.mod.BucketingModule):
+
+    def save_checkpoint(self, prefix, epoch, save_optimizer_states=False):
+        symbol, data_names, label_names = self._sym_gen(self._default_bucket_key)
+        symbol.save('%s-symbol.json' % prefix)
+        param_name = '%s-%04d.params' % (prefix, epoch)
+        self.save_params(param_name)
+        if save_optimizer_states:
+            state_name = '%s-%04d.states' % (prefix, epoch)
+            self._curr_module.save_optimizer_states(state_name)
diff --git a/example/speech_recognition/stt_datagenerator.py b/example/speech_recognition/stt_datagenerator.py
index d2a7b4b..8fafa79 100644
--- a/example/speech_recognition/stt_datagenerator.py
+++ b/example/speech_recognition/stt_datagenerator.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import absolute_import, division, print_function
 
 import json
@@ -55,7 +72,7 @@ class DataGenerator(object):
         """
         return spectrogram_from_file(
             audio_clip, step=self.step, window=self.window,
-            max_freq=self.max_freq, overwrite=overwrite, 
+            max_freq=self.max_freq, overwrite=overwrite,
             save_feature_as_csvfile=save_feature_as_csvfile)
 
     def load_metadata_from_desc_file(self, desc_file, partition='train',
diff --git a/example/speech_recognition/stt_io_bucketingiter.py b/example/speech_recognition/stt_io_bucketingiter.py
index 9655688..41b93f3 100644
--- a/example/speech_recognition/stt_io_bucketingiter.py
+++ b/example/speech_recognition/stt_io_bucketingiter.py
@@ -1,148 +1,165 @@
-from __future__ import print_function
-import mxnet as mx
-import sys
-sys.path.insert(0, "../../python")
-
-import bisect
-import random
-import numpy as np
-
-BATCH_SIZE = 1
-SEQ_LENGTH = 0
-NUM_GPU = 1
-
-
-def get_label(buf, num_lable):
-    ret = np.zeros(num_lable)
-    for i in range(len(buf)):
-        ret[i] = int(buf[i])
-    return ret
-
-
-class BucketSTTIter(mx.io.DataIter):
-    def __init__(self, count, datagen, batch_size, num_label, init_states, seq_length, width, height,
-                 sort_by_duration=True,
-                 is_bi_graphemes=False,
-                 partition="train",
-                 buckets=[],
-                 save_feature_as_csvfile=False
-                 ):
-        super(BucketSTTIter, self).__init__()
-
-        self.maxLabelLength = num_label
-        # global param
-        self.batch_size = batch_size
-        self.count = count
-        self.num_label = num_label
-        self.init_states = init_states
-        self.init_state_arrays = [mx.nd.zeros(x[1]) for x in init_states]
-        self.width = width
-        self.height = height
-        self.datagen = datagen
-        self.label = None
-        self.is_bi_graphemes = is_bi_graphemes
-        # self.partition = datagen.partition
-        if partition == 'train':
-            durations = datagen.train_durations
-            audio_paths = datagen.train_audio_paths
-            texts = datagen.train_texts
-        elif partition == 'validation':
-            durations = datagen.val_durations
-            audio_paths = datagen.val_audio_paths
-            texts = datagen.val_texts
-        elif partition == 'test':
-            durations = datagen.test_durations
-            audio_paths = datagen.test_audio_paths
-            texts = datagen.test_texts
-        else:
-            raise Exception("Invalid partition to load metadata. "
-                            "Must be train/validation/test")
-        # if sortagrad
-        if sort_by_duration:
-            durations, audio_paths, texts = datagen.sort_by_duration(durations,
-                                                                     audio_paths,
-                                                                     texts)
-        else:
-            durations = durations
-            audio_paths = audio_paths
-            texts = texts
-        self.trainDataList = zip(durations, audio_paths, texts)
-
-        self.trainDataIter = iter(self.trainDataList)
-        self.is_first_epoch = True
-
-        data_lengths = [int(d*100) for d in durations]
-        if len(buckets) == 0:
-            buckets = [i for i, j in enumerate(np.bincount(data_lengths))
-                       if j >= batch_size]
-        if len(buckets) == 0:
-            raise Exception('There is no valid buckets. It may occured by large batch_size for each buckets. max bincount:%d batch_size:%d' % (max(np.bincount(data_lengths)), batch_size))
-        buckets.sort()
-        ndiscard = 0
-        self.data = [[] for _ in buckets]
-        for i, sent in enumerate(data_lengths):
-            buck = bisect.bisect_left(buckets, sent)
-            if buck == len(buckets):
-                ndiscard += 1
-                continue
-            self.data[buck].append(self.trainDataList[i])
-        if ndiscard != 0:
-            print("WARNING: discarded %d sentences longer than the largest bucket."% ndiscard)
-        
-        self.buckets = buckets
-        self.nddata = []
-        self.ndlabel = []
-        self.default_bucket_key = max(buckets)
-
-        self.idx = []
-        for i, buck in enumerate(self.data):
-            self.idx.extend([(i, j) for j in range(0, len(buck) - batch_size + 1, batch_size)])
-        self.curr_idx = 0
-
-        self.provide_data = [('data', (self.batch_size, self.default_bucket_key , width * height))] + init_states
-        self.provide_label = [('label', (self.batch_size, self.maxLabelLength))]
-        self.save_feature_as_csvfile=save_feature_as_csvfile
-
-        #self.reset()
-
-    def reset(self):
-        """Resets the iterator to the beginning of the data."""
-        self.curr_idx = 0
-        random.shuffle(self.idx)
-        for buck in self.data:
-            np.random.shuffle(buck)
-
-    def next(self):
-        """Returns the next batch of data."""
-        if self.curr_idx == len(self.idx):
-            raise StopIteration
-        i, j = self.idx[self.curr_idx]
-        self.curr_idx += 1
-
-        audio_paths = []
-        texts = []
-        for duration, audio_path, text in self.data[i][j:j+self.batch_size]:
-            audio_paths.append(audio_path)
-            texts.append(text)
-
-        if self.is_first_epoch:
-            data_set = self.datagen.prepare_minibatch(audio_paths, texts, overwrite=True,
-                                                      is_bi_graphemes=self.is_bi_graphemes,
-                                                      seq_length=self.buckets[i],
-                                                      save_feature_as_csvfile=self.save_feature_as_csvfile)
-        else:
-            data_set = self.datagen.prepare_minibatch(audio_paths, texts, overwrite=False,
-                                                      is_bi_graphemes=self.is_bi_graphemes,
-                                                      seq_length=self.buckets[i],
-                                                      save_feature_as_csvfile=self.save_feature_as_csvfile)
-
-        data_all = [mx.nd.array(data_set['x'])] + self.init_state_arrays
-        label_all = [mx.nd.array(data_set['y'])]
-
-        self.label = label_all
-        provide_data = [('data', (self.batch_size, self.buckets[i], self.width * self.height))] + self.init_states
-
-        return mx.io.DataBatch(data_all, label_all, pad=0,
-                               bucket_key=self.buckets[i],
-                               provide_data=provide_data,
-                               provide_label=self.provide_label)
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+import mxnet as mx
+import sys
+sys.path.insert(0, "../../python")
+
+import bisect
+import random
+import numpy as np
+
+BATCH_SIZE = 1
+SEQ_LENGTH = 0
+NUM_GPU = 1
+
+
+def get_label(buf, num_lable):
+    ret = np.zeros(num_lable)
+    for i in range(len(buf)):
+        ret[i] = int(buf[i])
+    return ret
+
+
+class BucketSTTIter(mx.io.DataIter):
+    def __init__(self, count, datagen, batch_size, num_label, init_states, seq_length, width, height,
+                 sort_by_duration=True,
+                 is_bi_graphemes=False,
+                 partition="train",
+                 buckets=[],
+                 save_feature_as_csvfile=False
+                 ):
+        super(BucketSTTIter, self).__init__()
+
+        self.maxLabelLength = num_label
+        # global param
+        self.batch_size = batch_size
+        self.count = count
+        self.num_label = num_label
+        self.init_states = init_states
+        self.init_state_arrays = [mx.nd.zeros(x[1]) for x in init_states]
+        self.width = width
+        self.height = height
+        self.datagen = datagen
+        self.label = None
+        self.is_bi_graphemes = is_bi_graphemes
+        # self.partition = datagen.partition
+        if partition == 'train':
+            durations = datagen.train_durations
+            audio_paths = datagen.train_audio_paths
+            texts = datagen.train_texts
+        elif partition == 'validation':
+            durations = datagen.val_durations
+            audio_paths = datagen.val_audio_paths
+            texts = datagen.val_texts
+        elif partition == 'test':
+            durations = datagen.test_durations
+            audio_paths = datagen.test_audio_paths
+            texts = datagen.test_texts
+        else:
+            raise Exception("Invalid partition to load metadata. "
+                            "Must be train/validation/test")
+        # if sortagrad
+        if sort_by_duration:
+            durations, audio_paths, texts = datagen.sort_by_duration(durations,
+                                                                     audio_paths,
+                                                                     texts)
+        else:
+            durations = durations
+            audio_paths = audio_paths
+            texts = texts
+        self.trainDataList = zip(durations, audio_paths, texts)
+
+        self.trainDataIter = iter(self.trainDataList)
+        self.is_first_epoch = True
+
+        data_lengths = [int(d*100) for d in durations]
+        if len(buckets) == 0:
+            buckets = [i for i, j in enumerate(np.bincount(data_lengths))
+                       if j >= batch_size]
+        if len(buckets) == 0:
+            raise Exception('There is no valid buckets. It may occured by large batch_size for each buckets. max bincount:%d batch_size:%d' % (max(np.bincount(data_lengths)), batch_size))
+        buckets.sort()
+        ndiscard = 0
+        self.data = [[] for _ in buckets]
+        for i, sent in enumerate(data_lengths):
+            buck = bisect.bisect_left(buckets, sent)
+            if buck == len(buckets):
+                ndiscard += 1
+                continue
+            self.data[buck].append(self.trainDataList[i])
+        if ndiscard != 0:
+            print("WARNING: discarded %d sentences longer than the largest bucket."% ndiscard)
+
+        self.buckets = buckets
+        self.nddata = []
+        self.ndlabel = []
+        self.default_bucket_key = max(buckets)
+
+        self.idx = []
+        for i, buck in enumerate(self.data):
+            self.idx.extend([(i, j) for j in range(0, len(buck) - batch_size + 1, batch_size)])
+        self.curr_idx = 0
+
+        self.provide_data = [('data', (self.batch_size, self.default_bucket_key , width * height))] + init_states
+        self.provide_label = [('label', (self.batch_size, self.maxLabelLength))]
+        self.save_feature_as_csvfile=save_feature_as_csvfile
+
+        #self.reset()
+
+    def reset(self):
+        """Resets the iterator to the beginning of the data."""
+        self.curr_idx = 0
+        random.shuffle(self.idx)
+        for buck in self.data:
+            np.random.shuffle(buck)
+
+    def next(self):
+        """Returns the next batch of data."""
+        if self.curr_idx == len(self.idx):
+            raise StopIteration
+        i, j = self.idx[self.curr_idx]
+        self.curr_idx += 1
+
+        audio_paths = []
+        texts = []
+        for duration, audio_path, text in self.data[i][j:j+self.batch_size]:
+            audio_paths.append(audio_path)
+            texts.append(text)
+
+        if self.is_first_epoch:
+            data_set = self.datagen.prepare_minibatch(audio_paths, texts, overwrite=True,
+                                                      is_bi_graphemes=self.is_bi_graphemes,
+                                                      seq_length=self.buckets[i],
+                                                      save_feature_as_csvfile=self.save_feature_as_csvfile)
+        else:
+            data_set = self.datagen.prepare_minibatch(audio_paths, texts, overwrite=False,
+                                                      is_bi_graphemes=self.is_bi_graphemes,
+                                                      seq_length=self.buckets[i],
+                                                      save_feature_as_csvfile=self.save_feature_as_csvfile)
+
+        data_all = [mx.nd.array(data_set['x'])] + self.init_state_arrays
+        label_all = [mx.nd.array(data_set['y'])]
+
+        self.label = label_all
+        provide_data = [('data', (self.batch_size, self.buckets[i], self.width * self.height))] + self.init_states
+
+        return mx.io.DataBatch(data_all, label_all, pad=0,
+                               bucket_key=self.buckets[i],
+                               provide_data=provide_data,
+                               provide_label=self.provide_label)
diff --git a/example/speech_recognition/stt_io_iter.py b/example/speech_recognition/stt_io_iter.py
index 5ae6519..6c9bacd 100644
--- a/example/speech_recognition/stt_io_iter.py
+++ b/example/speech_recognition/stt_io_iter.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 
 import sys
diff --git a/example/speech_recognition/stt_layer_batchnorm.py b/example/speech_recognition/stt_layer_batchnorm.py
index 5b73f4f..eb61ba6 100644
--- a/example/speech_recognition/stt_layer_batchnorm.py
+++ b/example/speech_recognition/stt_layer_batchnorm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 
diff --git a/example/speech_recognition/stt_layer_conv.py b/example/speech_recognition/stt_layer_conv.py
index ab0035e..c34ddf2 100644
--- a/example/speech_recognition/stt_layer_conv.py
+++ b/example/speech_recognition/stt_layer_conv.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 
diff --git a/example/speech_recognition/stt_layer_fc.py b/example/speech_recognition/stt_layer_fc.py
index f435922..b3db1b1 100644
--- a/example/speech_recognition/stt_layer_fc.py
+++ b/example/speech_recognition/stt_layer_fc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 from stt_layer_batchnorm import batchnorm
diff --git a/example/speech_recognition/stt_layer_gru.py b/example/speech_recognition/stt_layer_gru.py
index 89af1c7..0dd1328 100644
--- a/example/speech_recognition/stt_layer_gru.py
+++ b/example/speech_recognition/stt_layer_gru.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from collections import namedtuple
 
 import mxnet as mx
diff --git a/example/speech_recognition/stt_layer_lstm.py b/example/speech_recognition/stt_layer_lstm.py
index 93b4ca0..4adbbd4 100644
--- a/example/speech_recognition/stt_layer_lstm.py
+++ b/example/speech_recognition/stt_layer_lstm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 from collections import namedtuple
 
diff --git a/example/speech_recognition/stt_layer_slice.py b/example/speech_recognition/stt_layer_slice.py
index 6b434ec..ac7eae9 100644
--- a/example/speech_recognition/stt_layer_slice.py
+++ b/example/speech_recognition/stt_layer_slice.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 
diff --git a/example/speech_recognition/stt_layer_warpctc.py b/example/speech_recognition/stt_layer_warpctc.py
index 9f97adf..c821f9c 100644
--- a/example/speech_recognition/stt_layer_warpctc.py
+++ b/example/speech_recognition/stt_layer_warpctc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 
diff --git a/example/speech_recognition/stt_metric.py b/example/speech_recognition/stt_metric.py
index 1c5f440..fc1916b 100644
--- a/example/speech_recognition/stt_metric.py
+++ b/example/speech_recognition/stt_metric.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 
diff --git a/example/speech_recognition/stt_utils.py b/example/speech_recognition/stt_utils.py
index 3c7ffce..0539d59 100644
--- a/example/speech_recognition/stt_utils.py
+++ b/example/speech_recognition/stt_utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import logging
 import os
 import os.path
@@ -104,7 +121,7 @@ def spectrogram_from_file(filename, step=10, window=20, max_freq=None,
     """
 
     csvfilename = filename.replace(".wav", ".csv")
-    if (os.path.isfile(csvfilename) is False) or overwrite: 
+    if (os.path.isfile(csvfilename) is False) or overwrite:
         with soundfile.SoundFile(filename) as sound_file:
             audio = sound_file.read(dtype='float32')
             sample_rate = sound_file.samplerate
diff --git a/example/speech_recognition/train.py b/example/speech_recognition/train.py
index f3a7555..0d04e4e 100644
--- a/example/speech_recognition/train.py
+++ b/example/speech_recognition/train.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 sys.path.insert(0, "../../python")
diff --git a/example/ssd/config/config.py b/example/ssd/config/config.py
index 278b770..38a07b5 100644
--- a/example/ssd/config/config.py
+++ b/example/ssd/config/config.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 from utils import DotDict, namedtuple_with_defaults, zip_namedtuple, config_as_dict
 
diff --git a/example/ssd/config/utils.py b/example/ssd/config/utils.py
index 1d66655..5c8af6a 100644
--- a/example/ssd/config/utils.py
+++ b/example/ssd/config/utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import collections
 
 class DotDict(dict):
diff --git a/example/ssd/data/demo/download_demo_images.py b/example/ssd/data/demo/download_demo_images.py
index 8546aa5..554ba7e 100755
--- a/example/ssd/data/demo/download_demo_images.py
+++ b/example/ssd/data/demo/download_demo_images.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 
 wd = os.path.dirname(os.path.realpath(__file__))
diff --git a/example/ssd/dataset/concat_db.py b/example/ssd/dataset/concat_db.py
index da9e151..cb6c99e 100644
--- a/example/ssd/dataset/concat_db.py
+++ b/example/ssd/dataset/concat_db.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from imdb import Imdb
 import random
 
diff --git a/example/ssd/dataset/imdb.py b/example/ssd/dataset/imdb.py
index 279fe9c..4fbb5d8 100644
--- a/example/ssd/dataset/imdb.py
+++ b/example/ssd/dataset/imdb.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import os.path as osp
 
diff --git a/example/ssd/dataset/iterator.py b/example/ssd/dataset/iterator.py
index 5cefece..8b6857b 100644
--- a/example/ssd/dataset/iterator.py
+++ b/example/ssd/dataset/iterator.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 import cv2
diff --git a/example/ssd/dataset/mscoco.py b/example/ssd/dataset/mscoco.py
index b46b227..ff2a753 100644
--- a/example/ssd/dataset/mscoco.py
+++ b/example/ssd/dataset/mscoco.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import numpy as np
 from imdb import Imdb
diff --git a/example/ssd/dataset/pascal_voc.py b/example/ssd/dataset/pascal_voc.py
index 31e287e..d986890 100644
--- a/example/ssd/dataset/pascal_voc.py
+++ b/example/ssd/dataset/pascal_voc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import os
 import numpy as np
diff --git a/example/ssd/dataset/pycocotools/__init__.py b/example/ssd/dataset/pycocotools/__init__.py
index 3f7d85b..2f4e0d4 100755
--- a/example/ssd/dataset/pycocotools/__init__.py
+++ b/example/ssd/dataset/pycocotools/__init__.py
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __author__ = 'tylin'
diff --git a/example/ssd/dataset/pycocotools/coco.py b/example/ssd/dataset/pycocotools/coco.py
index a8939f6..4dd54ad 100755
--- a/example/ssd/dataset/pycocotools/coco.py
+++ b/example/ssd/dataset/pycocotools/coco.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __author__ = 'tylin'
 __version__ = '2.0'
 # Interface for accessing the Microsoft COCO dataset.
diff --git a/example/ssd/dataset/testdb.py b/example/ssd/dataset/testdb.py
index 7477d77..9a4b985 100644
--- a/example/ssd/dataset/testdb.py
+++ b/example/ssd/dataset/testdb.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 from imdb import Imdb
 
diff --git a/example/ssd/dataset/yolo_format.py b/example/ssd/dataset/yolo_format.py
index ce6605f..f1b73d0 100644
--- a/example/ssd/dataset/yolo_format.py
+++ b/example/ssd/dataset/yolo_format.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import numpy as np
 from imdb import Imdb
diff --git a/example/ssd/demo.py b/example/ssd/demo.py
index bda4606..5212675 100644
--- a/example/ssd/demo.py
+++ b/example/ssd/demo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import tools.find_mxnet
 import mxnet as mx
diff --git a/example/ssd/deploy.py b/example/ssd/deploy.py
index aa70cac..415f334 100644
--- a/example/ssd/deploy.py
+++ b/example/ssd/deploy.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import argparse
 import tools.find_mxnet
diff --git a/example/ssd/detect/detector.py b/example/ssd/detect/detector.py
index 19b78f6..b6adac1 100644
--- a/example/ssd/detect/detector.py
+++ b/example/ssd/detect/detector.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 import numpy as np
diff --git a/example/ssd/evaluate.py b/example/ssd/evaluate.py
index 65e0b30..4e7f0a4 100644
--- a/example/ssd/evaluate.py
+++ b/example/ssd/evaluate.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import tools.find_mxnet
 import mxnet as mx
diff --git a/example/ssd/evaluate/eval_metric.py b/example/ssd/evaluate/eval_metric.py
index bb03e01..bb2b77b 100644
--- a/example/ssd/evaluate/eval_metric.py
+++ b/example/ssd/evaluate/eval_metric.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 
diff --git a/example/ssd/evaluate/eval_voc.py b/example/ssd/evaluate/eval_voc.py
index d16856e..0ba7f7e 100644
--- a/example/ssd/evaluate/eval_voc.py
+++ b/example/ssd/evaluate/eval_voc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 given a pascal voc imdb, compute mAP
 """
diff --git a/example/ssd/evaluate/evaluate_net.py b/example/ssd/evaluate/evaluate_net.py
index 4c629f8..7f1a32d 100644
--- a/example/ssd/evaluate/evaluate_net.py
+++ b/example/ssd/evaluate/evaluate_net.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import os
 import sys
diff --git a/example/ssd/symbol/common.py b/example/ssd/symbol/common.py
index 474d3ea..ea58c15 100644
--- a/example/ssd/symbol/common.py
+++ b/example/ssd/symbol/common.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 
diff --git a/example/ssd/symbol/inceptionv3.py b/example/ssd/symbol/inceptionv3.py
index 1c38ae6..6022ce5 100644
--- a/example/ssd/symbol/inceptionv3.py
+++ b/example/ssd/symbol/inceptionv3.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Inception V3, suitable for images with around 299 x 299
 
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_300.py b/example/ssd/symbol/legacy_vgg16_ssd_300.py
index 257fdd6..c1f8ea7 100644
--- a/example/ssd/symbol/legacy_vgg16_ssd_300.py
+++ b/example/ssd/symbol/legacy_vgg16_ssd_300.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from common import legacy_conv_act_layer
 from common import multibox_layer
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_512.py b/example/ssd/symbol/legacy_vgg16_ssd_512.py
index 15424f9..6cc3aa2 100644
--- a/example/ssd/symbol/legacy_vgg16_ssd_512.py
+++ b/example/ssd/symbol/legacy_vgg16_ssd_512.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from common import legacy_conv_act_layer
 from common import multibox_layer
diff --git a/example/ssd/symbol/resnet.py b/example/ssd/symbol/resnet.py
index 9c121e8..d7dc3cc 100644
--- a/example/ssd/symbol/resnet.py
+++ b/example/ssd/symbol/resnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 '''
 Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
 Original author Wei Wu
diff --git a/example/ssd/symbol/symbol_builder.py b/example/ssd/symbol/symbol_builder.py
index 48c9d1d..4cd7f88 100644
--- a/example/ssd/symbol/symbol_builder.py
+++ b/example/ssd/symbol/symbol_builder.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from common import multi_layer_feature, multibox_layer
 
diff --git a/example/ssd/symbol/symbol_factory.py b/example/ssd/symbol/symbol_factory.py
index ef156f1..c451cd6 100644
--- a/example/ssd/symbol/symbol_factory.py
+++ b/example/ssd/symbol/symbol_factory.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Presets for various network configurations"""
 import logging
 import symbol_builder
diff --git a/example/ssd/symbol/vgg16_reduced.py b/example/ssd/symbol/vgg16_reduced.py
index c6a55b1..16535e6 100644
--- a/example/ssd/symbol/vgg16_reduced.py
+++ b/example/ssd/symbol/vgg16_reduced.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 def get_symbol(num_classes=1000, **kwargs):
diff --git a/example/ssd/tools/caffe_converter/caffe_parse/parse_from_protobuf.py b/example/ssd/tools/caffe_converter/caffe_parse/parse_from_protobuf.py
index b828ca2..862049a 100644
--- a/example/ssd/tools/caffe_converter/caffe_parse/parse_from_protobuf.py
+++ b/example/ssd/tools/caffe_converter/caffe_parse/parse_from_protobuf.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from google.protobuf import text_format
 import numpy as np
 import caffe_parse.caffe_pb2 as caffe_pb2
diff --git a/example/ssd/tools/caffe_converter/convert_model.py b/example/ssd/tools/caffe_converter/convert_model.py
index a06b655..f17a3f2 100644
--- a/example/ssd/tools/caffe_converter/convert_model.py
+++ b/example/ssd/tools/caffe_converter/convert_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import sys
 import os.path as osp
diff --git a/example/ssd/tools/caffe_converter/convert_symbol.py b/example/ssd/tools/caffe_converter/convert_symbol.py
index 63b044a..10510aa 100644
--- a/example/ssd/tools/caffe_converter/convert_symbol.py
+++ b/example/ssd/tools/caffe_converter/convert_symbol.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 from google.protobuf import text_format
 import argparse
diff --git a/example/ssd/tools/caffe_converter/mean_image.py b/example/ssd/tools/caffe_converter/mean_image.py
index d28a750..e07c6fb 100644
--- a/example/ssd/tools/caffe_converter/mean_image.py
+++ b/example/ssd/tools/caffe_converter/mean_image.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 import argparse
diff --git a/example/ssd/tools/find_mxnet.py b/example/ssd/tools/find_mxnet.py
index 66545f3..0ad64cc 100644
--- a/example/ssd/tools/find_mxnet.py
+++ b/example/ssd/tools/find_mxnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 try:
     import mxnet as mx
 except ImportError:
diff --git a/example/ssd/tools/prepare_coco.sh b/example/ssd/tools/prepare_coco.sh
index 1b11f93..fd34bd5 100644
--- a/example/ssd/tools/prepare_coco.sh
+++ b/example/ssd/tools/prepare_coco.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 python $DIR/prepare_dataset.py --dataset coco --set train2014,valminusminival2014 --target $DIR/../data/train.lst  --root $DIR/../data/coco
 python $DIR/prepare_dataset.py --dataset coco --set minival2014 --target $DIR/../data/val.lst --shuffle False --root $DIR/../data/coco
diff --git a/example/ssd/tools/prepare_dataset.py b/example/ssd/tools/prepare_dataset.py
index 12b090c..9b4fceb 100644
--- a/example/ssd/tools/prepare_dataset.py
+++ b/example/ssd/tools/prepare_dataset.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import sys, os
 import argparse
diff --git a/example/ssd/tools/prepare_pascal.sh b/example/ssd/tools/prepare_pascal.sh
index 954327a..1c23cd5 100644
--- a/example/ssd/tools/prepare_pascal.sh
+++ b/example/ssd/tools/prepare_pascal.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 python $DIR/prepare_dataset.py --dataset pascal --year 2007,2012 --set trainval --target $DIR/../data/train.lst
 python $DIR/prepare_dataset.py --dataset pascal --year 2007 --set test --target $DIR/../data/val.lst --shuffle False
diff --git a/example/ssd/tools/rand_sampler.py b/example/ssd/tools/rand_sampler.py
index d2ed3ad..7f0cb6f 100644
--- a/example/ssd/tools/rand_sampler.py
+++ b/example/ssd/tools/rand_sampler.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import math
 
diff --git a/example/ssd/tools/visualize_net.py b/example/ssd/tools/visualize_net.py
index f72d6a6..b3b714a 100644
--- a/example/ssd/tools/visualize_net.py
+++ b/example/ssd/tools/visualize_net.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import find_mxnet
 import mxnet as mx
diff --git a/example/ssd/train.py b/example/ssd/train.py
index 62ad2c4..f08aafb 100644
--- a/example/ssd/train.py
+++ b/example/ssd/train.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import argparse
 import tools.find_mxnet
 import mxnet as mx
diff --git a/example/ssd/train/metric.py b/example/ssd/train/metric.py
index c7d0078..731f8fc 100644
--- a/example/ssd/train/metric.py
+++ b/example/ssd/train/metric.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 
diff --git a/example/ssd/train/train_net.py b/example/ssd/train/train_net.py
index 54cae05..767e324 100644
Binary files a/example/ssd/train/train_net.py and b/example/ssd/train/train_net.py differ
diff --git a/example/stochastic-depth/sd_cifar10.py b/example/stochastic-depth/sd_cifar10.py
index 9c6f273..c123562 100644
--- a/example/stochastic-depth/sd_cifar10.py
+++ b/example/stochastic-depth/sd_cifar10.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ###########################################################################################
 # Implementation of the stochastic depth algorithm described in the paper
 #
diff --git a/example/stochastic-depth/sd_mnist.py b/example/stochastic-depth/sd_mnist.py
index 8a13d4b..7eb9374 100644
--- a/example/stochastic-depth/sd_mnist.py
+++ b/example/stochastic-depth/sd_mnist.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ################################################################################
 # A sanity check mainly for debugging purpose. See sd_cifar10.py for a non-trivial
 # example of stochastic depth on cifar10.
diff --git a/example/stochastic-depth/sd_module.py b/example/stochastic-depth/sd_module.py
index ae8cfe0..f30913d 100644
--- a/example/stochastic-depth/sd_module.py
+++ b/example/stochastic-depth/sd_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import logging
 import mxnet as mx
 import numpy as np
diff --git a/example/svm_mnist/svm_mnist.py b/example/svm_mnist/svm_mnist.py
index ac2702e..6795401 100644
--- a/example/svm_mnist/svm_mnist.py
+++ b/example/svm_mnist/svm_mnist.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 #############################################################
 ## Please read the README.md document for better reference ##
diff --git a/example/torch/data.py b/example/torch/data.py
index d39821f..0ca8e1f 100644
--- a/example/torch/data.py
+++ b/example/torch/data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 """ data iterator for mnist """
 import sys
diff --git a/example/torch/torch_function.py b/example/torch/torch_function.py
index 4ea4558..af285de 100644
--- a/example/torch/torch_function.py
+++ b/example/torch/torch_function.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 x = mx.th.randn(2, 2, ctx=mx.cpu(0))
diff --git a/example/torch/torch_module.py b/example/torch/torch_module.py
index 02eacc3..1595173 100644
--- a/example/torch/torch_module.py
+++ b/example/torch/torch_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from data import mnist_iterator
 import mxnet as mx
diff --git a/example/utils/get_data.py b/example/utils/get_data.py
index 64a7532..861d16c 100644
--- a/example/utils/get_data.py
+++ b/example/utils/get_data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import mxnet as mx
 
diff --git a/example/warpctc/infer_ocr.py b/example/warpctc/infer_ocr.py
index 2d496f0..d469990 100644
--- a/example/warpctc/infer_ocr.py
+++ b/example/warpctc/infer_ocr.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding=utf-8
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
diff --git a/example/warpctc/lstm.py b/example/warpctc/lstm.py
index 4be4a0d..9e0e05c 100644
--- a/example/warpctc/lstm.py
+++ b/example/warpctc/lstm.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint:skip-file
 import sys
 sys.path.insert(0, "../../python")
diff --git a/example/warpctc/lstm_model.py b/example/warpctc/lstm_model.py
index e9c8aa7..d359f1a 100644
--- a/example/warpctc/lstm_model.py
+++ b/example/warpctc/lstm_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
@@ -51,4 +68,4 @@ class LSTMInferenceModel(object):
         for key in self.states_dict.keys():
             self.states_dict[key].copyto(self.executor.arg_dict[key])
         prob = self.executor.outputs[0].asnumpy()
-        return prob
\ No newline at end of file
+        return prob
diff --git a/example/warpctc/lstm_ocr.py b/example/warpctc/lstm_ocr.py
index 540c676..49df98a 100644
--- a/example/warpctc/lstm_ocr.py
+++ b/example/warpctc/lstm_ocr.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 from __future__ import print_function
@@ -80,8 +97,8 @@ class OCRIter(mx.io.DataIter):
             label_all = [mx.nd.array(label)]
             data_names = ['data'] + init_state_names
             label_names = ['label']
-            
-            
+
+
             data_batch = SimpleBatch(data_names, data_all, label_names, label_all)
             yield data_batch
 
@@ -198,14 +215,14 @@ if __name__ == '__main__':
     import logging
     head = '%(asctime)-15s %(message)s'
     logging.basicConfig(level=logging.DEBUG, format=head)
-    
+
     print('begin fit')
 
     prefix = 'ocr'
     model.fit(X=data_train, eval_data=data_val,
               eval_metric = mx.metric.np(Accuracy),
               # Use the following eval_metric if your num_label >= 10, or varies in a wide range
-              # eval_metric = mx.metric.np(Accuracy_LCS), 
+              # eval_metric = mx.metric.np(Accuracy_LCS),
               batch_end_callback=mx.callback.Speedometer(BATCH_SIZE, 50),
               epoch_end_callback = mx.callback.do_checkpoint(prefix, 1))
 
diff --git a/example/warpctc/ocr_predict.py b/example/warpctc/ocr_predict.py
index a07733e..3096a66 100644
--- a/example/warpctc/ocr_predict.py
+++ b/example/warpctc/ocr_predict.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python2.7
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding=utf-8
 from __future__ import print_function
 import sys, os
diff --git a/example/warpctc/toy_ctc.py b/example/warpctc/toy_ctc.py
index 46bab57..c7b0ccc 100644
--- a/example/warpctc/toy_ctc.py
+++ b/example/warpctc/toy_ctc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
 # pylint: disable=superfluous-parens, no-member, invalid-name
 from __future__ import print_function
@@ -68,13 +85,13 @@ class DataIter(mx.io.DataIter):
                 num, img = gen_rand()
                 data.append(img)
                 label.append(get_label(num))
-                
+
             data_all = [mx.nd.array(data)] + self.init_state_arrays
             label_all = [mx.nd.array(label)]
             data_names = ['data'] + init_state_names
             label_names = ['label']
-            
-            
+
+
             data_batch = SimpleBatch(data_names, data_all, label_names, label_all)
             yield data_batch
 
@@ -94,7 +111,7 @@ def ctc_label(p):
             continue
         ret.append(c2)
     return ret
-        
+
 
 def Accuracy(label, pred):
     global BATCH_SIZE
@@ -154,7 +171,7 @@ if __name__ == '__main__':
     import logging
     head = '%(asctime)-15s %(message)s'
     logging.basicConfig(level=logging.DEBUG, format=head)
-    
+
     print('begin fit')
 
     model.fit(X=data_train, eval_data=data_val,
diff --git a/include/mxnet/base.h b/include/mxnet/base.h
index 739105b..5064204 100644
--- a/include/mxnet/base.h
+++ b/include/mxnet/base.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file base.h
  * \brief configuation of mxnet as well as basic data structure.
  */
diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 3b8d54c..7a45099 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file c_api.h
  * \brief C API of mxnet
  */
diff --git a/include/mxnet/c_lapack_api.h b/include/mxnet/c_lapack_api.h
index 440d284..1ae90a9 100644
--- a/include/mxnet/c_lapack_api.h
+++ b/include/mxnet/c_lapack_api.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file c_lapack_api.h
  * \brief Unified interface for LAPACK calls from within mxnet.
  *  Purpose is to hide the platform specific differences.
diff --git a/include/mxnet/c_predict_api.h b/include/mxnet/c_predict_api.h
index df60c84..8cf153e 100644
--- a/include/mxnet/c_predict_api.h
+++ b/include/mxnet/c_predict_api.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file c_predict_api.h
  * \brief C predict API of mxnet, contains a minimum API to run prediction.
  *  This file is self-contained, and do not dependent on any other files.
diff --git a/include/mxnet/engine.h b/include/mxnet/engine.h
index ed46c84..e997a2b 100644
--- a/include/mxnet/engine.h
+++ b/include/mxnet/engine.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file engine.h
  * \brief Engine that schedules all the operations according to dependency.
  */
diff --git a/include/mxnet/executor.h b/include/mxnet/executor.h
index 9308587..a74d3b0 100644
--- a/include/mxnet/executor.h
+++ b/include/mxnet/executor.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file executor.h
  * \brief Symbolic executor interface of mxnet.
  * \author Min Lin, Bing Xu
diff --git a/include/mxnet/io.h b/include/mxnet/io.h
index b4429a9..68c1ede 100644
--- a/include/mxnet/io.h
+++ b/include/mxnet/io.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file io.h
  * \brief mxnet io data structure and data iterator
  */
diff --git a/include/mxnet/kvstore.h b/include/mxnet/kvstore.h
index 11db28e..d2924ec 100644
--- a/include/mxnet/kvstore.h
+++ b/include/mxnet/kvstore.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file kvstore.h
  * \brief key-value store interface for mxnet
  */
diff --git a/include/mxnet/mxrtc.h b/include/mxnet/mxrtc.h
index 9de59f6..8d7facc 100644
--- a/include/mxnet/mxrtc.h
+++ b/include/mxnet/mxrtc.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file mxrtc.h
  * \brief Wrapper for NVRTC
  * \author Junyuan Xie
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index e349b30..d40b549 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ndarray.h
  * \brief NDArray interface that handles array arithematics.
  */
diff --git a/include/mxnet/op_attr_types.h b/include/mxnet/op_attr_types.h
index dbf9a07..1ba0737 100644
--- a/include/mxnet/op_attr_types.h
+++ b/include/mxnet/op_attr_types.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file op_attr_types.h
  * \brief Additional operator attributes
  *  beside the ones provided by NNVM
diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h
index 09a6433..2245db0 100644
--- a/include/mxnet/operator.h
+++ b/include/mxnet/operator.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file operator.h
  * \brief Operator interface of mxnet.
  * \author Naiyan Wang
diff --git a/include/mxnet/operator_util.h b/include/mxnet/operator_util.h
index 0f27b10..92ef2ec 100644
--- a/include/mxnet/operator_util.h
+++ b/include/mxnet/operator_util.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file operator_util.h
  * \brief Utility functions and registries to help quickly build new operators.
  *  [Deprecated]
diff --git a/include/mxnet/resource.h b/include/mxnet/resource.h
index 93b8352..1ca1fc6 100644
--- a/include/mxnet/resource.h
+++ b/include/mxnet/resource.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file resource.h
  * \brief Global resource allocation handling.
  */
diff --git a/include/mxnet/storage.h b/include/mxnet/storage.h
index 1b76523..bfb42de 100644
--- a/include/mxnet/storage.h
+++ b/include/mxnet/storage.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file storage.h
  * \brief Storage manager across multiple devices.
  */
diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h
index 1928aa4..18bf4fa 100755
--- a/include/mxnet/tensor_blob.h
+++ b/include/mxnet/tensor_blob.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2014 by Contributors
  * \file tensor_blob.h
  * \brief TBlob class that holds common representation of
  *  arbirary dimension tensor, can be used to transformed
diff --git a/matlab/get_inception_model.sh b/matlab/get_inception_model.sh
index aa0092d..af2479b 100755
--- a/matlab/get_inception_model.sh
+++ b/matlab/get_inception_model.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MATLAB_DIR=$(cd `dirname $0`; pwd)
 DATA_DIR="${MATLAB_DIR}/data/"
 
diff --git a/perl-package/AI-MXNet/examples/get_ptb_data.sh b/perl-package/AI-MXNet/examples/get_ptb_data.sh
index 1ec009a..d2641cb 100755
--- a/perl-package/AI-MXNet/examples/get_ptb_data.sh
+++ b/perl-package/AI-MXNet/examples/get_ptb_data.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 RNN_DIR=$(cd `dirname $0`; pwd)
 DATA_DIR="${RNN_DIR}/data/"
 
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet.pm b/perl-package/AI-MXNet/lib/AI/MXNet.pm
index 54fb6b3..1d21253 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet;
 use v5.14.0;
 use strict;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm
index 69f8e43..d5ff0dd 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Base;
 use strict;
 use warnings;
@@ -12,7 +29,7 @@ use Exporter;
 use base qw(Exporter);
 use List::Util qw(shuffle);
 
-@AI::MXNet::Base::EXPORT = qw(product enumerate assert zip check_call build_param_doc 
+@AI::MXNet::Base::EXPORT = qw(product enumerate assert zip check_call build_param_doc
                               pdl cat dog svd bisect_left pdl_shuffle
                               DTYPE_STR_TO_MX DTYPE_MX_TO_STR DTYPE_MX_TO_PDL
                               DTYPE_PDL_TO_MX DTYPE_MX_TO_PERL GRAD_REQ_MAP);
@@ -239,12 +256,12 @@ sub build_param_doc
     $remove_dup //= 1;
     my %param_keys;
     my @param_str;
-    zip(sub { 
+    zip(sub {
             my ($key, $type_info, $desc) = @_;
             return if exists $param_keys{$key} and $remove_dup;
             $param_keys{$key} = 1;
             my $ret = sprintf("%s : %s", $key, $type_info);
-            $ret .= "\n    ".$desc if length($desc); 
+            $ret .= "\n    ".$desc if length($desc);
             push @param_str,  $ret;
         },
         $arg_names, $arg_types, $arg_descs
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm b/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm
index ede4826..f3c21ed 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::CachedOp;
 
 =head1 NAME
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm
index 04aaea0..da33097 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Callback.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Callback;
 use strict;
 use warnings;
@@ -92,7 +109,7 @@ extends 'AI::MXNet::Callback';
 
 =head1 NAME
 
-    AI::MXNet::Speedometer - A callback that logs training speed 
+    AI::MXNet::Speedometer - A callback that logs training speed
 =cut
 
 =head1 DESCRIPTION
@@ -244,4 +261,4 @@ method LogValidationMetricsCallback()
     AI::MXNet::LogValidationMetricsCallback->new
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Context.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Context.pm
index 68628a8..2eca424 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Context.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Context.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Context;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib.pm
index cb6bc01..a81030b 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Contrib;
 use strict;
 use warnings;
@@ -9,4 +26,4 @@ sub symbol { 'AI::MXNet::Contrib::Symbol'  }
 sub nd     { 'AI::MXNet::Contrib::NDArray' }
 sub autograd { 'AI::MXNet::Contrib::AutoGrad' }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/AutoGrad.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/AutoGrad.pm
index 6d9c103..ff65998 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/AutoGrad.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/AutoGrad.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Contrib::AutoGrad;
 use strict;
 use warnings;
@@ -224,4 +241,4 @@ method test_section(CodeRef $sub)
     __PACKAGE__->set_is_training(1) if $prev;
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/NDArray.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/NDArray.pm
index 239f1c4..78aed8f 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/NDArray.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/NDArray.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Contrib::NDArray;
 use strict;
 use warnings;
@@ -10,4 +27,4 @@ sub AUTOLOAD {
     return AI::MXNet::NDArray->$sub(@_);
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/Symbol.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/Symbol.pm
index c67cdad..efe785d 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/Symbol.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Contrib/Symbol.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Contrib::Symbol;
 use strict;
 use warnings;
@@ -10,4 +27,4 @@ sub AUTOLOAD {
     return AI::MXNet::Symbol->$sub(@_);
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm
index a2ab786..20a6f58 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Executor;
 use strict;
 use warnings;
@@ -9,7 +26,7 @@ use AI::MXNet::Function::Parameters;
 
 has 'handle'            => (is => 'ro', isa => 'ExecutorHandle', required => 1);
 has 'arg_arrays'        => (is => 'rw', isa => 'Maybe[ArrayRef[AI::MXNet::NDArray]]');
-has 'grad_arrays'       => (is => 'rw', isa => 'Maybe[ArrayRef[Undef|AI::MXNet::NDArray]]'); 
+has 'grad_arrays'       => (is => 'rw', isa => 'Maybe[ArrayRef[Undef|AI::MXNet::NDArray]]');
 has 'aux_arrays'        => (is => 'rw', isa => 'Maybe[ArrayRef[AI::MXNet::NDArray]]');
 has '_symbol'           => (is => 'rw', init_arg => 'symbol',    isa => 'AI::MXNet::Symbol');
 has '_ctx'              => (is => 'rw', init_arg => 'ctx',       isa => 'AI::MXNet::Context' );
@@ -420,7 +437,7 @@ method copy_params_from(
 method reshape(HashRef[Shape] $kwargs, Int :$partial_shaping=0, Int :$allow_up_sizing=0)
 {
     my ($arg_shapes, undef, $aux_shapes) = $self->_symbol->infer_shape(%{ $kwargs });
-    confess("Insufficient argument shapes provided.") 
+    confess("Insufficient argument shapes provided.")
         unless defined $arg_shapes;
     my %new_arg_dict;
     my %new_grad_dict;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm
index 35f1b57..611c931 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Executor::Group;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Function/Parameters.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Function/Parameters.pm
index 0212520..e4bbc90 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Function/Parameters.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Function/Parameters.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Function::Parameters;
 use strict;
 use warnings;
@@ -32,4 +49,4 @@ sub import {
     };
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/IO.pm b/perl-package/AI-MXNet/lib/AI/MXNet/IO.pm
index 73a672e..7a61cd9 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/IO.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/IO.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::IO;
 use strict;
 use warnings;
@@ -784,7 +801,7 @@ method _init_io_module()
             no strict 'refs';
             {
                 *{__PACKAGE__."::$name"} = $data_iter;
-            } 
+            }
         }
     }
 }
@@ -792,4 +809,4 @@ method _init_io_module()
 # Initialize the io in startups
 __PACKAGE__->_init_io_module;
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Image.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Image.pm
index 50e4a41..b996b02 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Image.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Image.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Image;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm
index e6beffb..182327d 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::InitDesc;
 use Mouse;
 use AI::MXNet::Function::Parameters;
@@ -104,7 +121,7 @@ method register()
     {
         my $existing = $init_registry{ $name };
         warn(
-            "WARNING: New initializer $self.$name" 
+            "WARNING: New initializer $self.$name"
             ."is overriding existing initializer $existing.$name"
         );
     }
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm b/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm
index 465cfd6..eff57a3 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::KVStore;
 use strict;
 use warnings;
@@ -13,7 +30,7 @@ use AI::MXNet::Function::Parameters;
 
     AI::MXNet::KVStore - Key value store interface of MXNet.
 
-=head1 DESCRIPTION 
+=head1 DESCRIPTION
 
     Key value store interface of MXNet for parameter synchronization, over multiple devices.
 =cut
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/KVStoreServer.pm b/perl-package/AI-MXNet/lib/AI/MXNet/KVStoreServer.pm
index 6b018af..4c274b9 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/KVStoreServer.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/KVStoreServer.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::KVStoreServer;
 use strict;
 use warnings;
@@ -30,7 +47,7 @@ has 'init_logging' => (is => 'rw', isa => 'Int', default => 0);
 # return the server controller
 method _controller()
 {
-    return  sub { 
+    return  sub {
         my ($cmd_id, $cmd_body) = @_;
         if (not $self->init_logging)
         {
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/LRScheduler.pm b/perl-package/AI-MXNet/lib/AI/MXNet/LRScheduler.pm
index a9ffb37..27420f4 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/LRScheduler.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/LRScheduler.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::LRScheduler;
 use strict;
 use warnings;
@@ -173,4 +190,4 @@ method call(Int $num_update)
     return $self->base_lr;
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Logging.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Logging.pm
index d6d3744..f3039cc 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Logging.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Logging.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Logging;
 ## TODO
 use Mouse;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm
index fbb93b0..6504481 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Metric.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Metric;
 use strict;
 use warnings;
@@ -484,7 +501,7 @@ method update(ArrayRef[AI::MXNet::NDArray] $labels, ArrayRef[AI::MXNet::NDArray]
         my $label_shape = $label->shape->at(0);
         my $pred_shape  = $pred->shape->at(-1);
         confess(
-            "Size of label  $label_shape and 
+            "Size of label  $label_shape and
             .first dimension of pred $pred_shape do not match"
         ) unless $label_shape == $pred_shape;
         my $prob = $pred->index($label);
@@ -599,4 +616,4 @@ method create(Metric|ArrayRef[Metric] $metric, %kwargs)
     }
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm
index ba70fd0..967a511 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ## TODO
 ## this class is here because of https://github.com/gfx/p5-Mouse/pull/67
 ## once 2.4.7 version of Mouse in Ubuntu for affected Perl version
@@ -173,7 +190,7 @@ has 'state_names'       => (is => 'rw', isa => 'Maybe[ArrayRef[Str]]');
 has 'logger'            => (is => 'ro', default => sub { AI::MXNet::Logging->get_logger });
 has '_p'                => (is => 'rw', init_arg => undef);
 has 'context'           => (
-    is => 'ro', 
+    is => 'ro',
     isa => 'AI::MXNet::Context|ArrayRef[AI::MXNet::Context]',
     default => sub { AI::MXNet::Context->cpu }
 );
@@ -952,4 +969,4 @@ method _kvstore()
     $self->_p->_kvstore;
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm
index 293696db..7a9e3de 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::BatchEndParam;
 use Mouse;
 use AI::MXNet::Function::Parameters;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm
index af768f0..531f41d 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Module::Bucketing;
 use Mouse;
 use AI::MXNet::Function::Parameters;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Monitor.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Monitor.pm
index 7ac989c..9934617 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Monitor.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Monitor.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Monitor;
 use Mouse;
 use AI::MXNet::Function::Parameters;
@@ -153,7 +170,7 @@ method toc()
         my $s = '';
         for my $v (@{ $v_list })
         {
-            confess("the argument must be NDArray") 
+            confess("the argument must be NDArray")
                 unless blessed($v) and $v->isa('AI::MXNet::NDArray');
             if($v->size == 1)
             {
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
index edeb9b1..1f58a74 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::NDArray;
 
 =head1 NAME
@@ -68,7 +85,7 @@ method at(Index @indices)
     my $isize = @indices;
     confess("Dimensions size $dsize < indexes size $isize")
         if $dsize < $isize;
-    confess("Dimensions size $dsize = indexes size $isize, 
+    confess("Dimensions size $dsize = indexes size $isize,
                    ndarray only supports either ->at on dimension 0
                    or full crop")
         if $isize > 1 and $dsize != $isize;
@@ -78,7 +95,7 @@ method at(Index @indices)
         confess("Dimension $i mismatch Idx: $idx >= Dim Size: $dim_size")
             if $idx >= $dim_size or ($idx + $dim_size) < 0;
         ++$i;
-    }, \@indices, $shape);  
+    }, \@indices, $shape);
     $i = 0;
     for my $v (@indices)
     {
@@ -181,7 +198,7 @@ method _sync_copyfrom(ArrayRef|PDL|PDL::Matrix $source_array)
         my $convert_func = $pdl_type->convertfunc;
         $source_array = $source_array->$convert_func;
     }
-    $source_array = pdl($pdl_type, [@{ $source_array->unpdl } ? $source_array->unpdl->[0] : 0 ]) 
+    $source_array = pdl($pdl_type, [@{ $source_array->unpdl } ? $source_array->unpdl->[0] : 0 ])
         unless @{ $source_array->shape->unpdl };
     my $pdl_shape = $source_array->shape->unpdl;
     my $pdl_shape_str = join(',', ref($source_array) eq 'PDL' ? reverse @{ $pdl_shape } : @{ $pdl_shape });
@@ -222,7 +239,7 @@ method aspdl()
     my $pdl = PDL->new_from_specification($pdl_type, reverse @{ $self->shape });
     my $perl_pack_type = DTYPE_MX_TO_PERL->{$dtype};
     my $buf = pack("$perl_pack_type*", (0)x$self->size);
-    check_call(AI::MXNetCAPI::NDArraySyncCopyToCPU($self->handle, $buf, $self->size)); 
+    check_call(AI::MXNetCAPI::NDArraySyncCopyToCPU($self->handle, $buf, $self->size));
     ## special handling for float16
     if($perl_pack_type eq 'S')
     {
@@ -253,7 +270,7 @@ method asmpdl()
     my $pdl = PDL::Matrix->new_from_specification($pdl_type, @{ $self->shape });
     my $perl_pack_type = DTYPE_MX_TO_PERL->{$dtype};
     my $buf = pack("$perl_pack_type*", (0)x$self->size);
-    check_call(AI::MXNetCAPI::NDArraySyncCopyToCPU($self->handle, $buf, $self->size)); 
+    check_call(AI::MXNetCAPI::NDArraySyncCopyToCPU($self->handle, $buf, $self->size));
     ## special handling for float16
     if($perl_pack_type eq 'S')
     {
@@ -394,7 +411,7 @@ method moveaxis(Int $source, Int $dest)
 
 =head2 broadcast_to
 
-    Broadcasting the current NDArray into the given shape. 
+    Broadcasting the current NDArray into the given shape.
 
     Parameters
     ---------
@@ -404,7 +421,7 @@ method moveaxis(Int $source, Int $dest)
 method broadcast_to(Shape $shape)
 {
     my $cur_shape = $self->shape;
-    my $err_str = "operands could not be broadcast together with remapped shapes" 
+    my $err_str = "operands could not be broadcast together with remapped shapes"
                   ."[original->remapped]: [@$cur_shape] and requested shape [@$shape]";
     if(@$shape < @$cur_shape)
     {
@@ -494,7 +511,7 @@ method context()
 
     Returns
     -------
-    a data type string ('float32', 'float64', 'float16', 'uint8', 'int32') 
+    a data type string ('float32', 'float64', 'float16', 'uint8', 'int32')
     representing the data type of the ndarray.
     'float32' is the default dtype for the ndarray class.
 =cut
@@ -707,7 +724,7 @@ method stringify($other=, $reverse=)
 method iadd(AI::MXNet::NDArray|Num $other, $reverse=)
 {
     confess('trying to add to a readonly NDArray') unless $self->writable;
-    return ref $other 
+    return ref $other
         ? __PACKAGE__->broadcast_add($self, $other, { out => $self })
         : __PACKAGE__->_plus_scalar($self, $other, { out => $self })
 }
@@ -752,9 +769,9 @@ method multiply(AI::MXNet::NDArray|Num $other, $reverse=)
 method imultiply(AI::MXNet::NDArray|Num $other, $reverse=)
 {
     confess('trying to add to a readonly NDArray') unless $self->writable;
-    return ref $other 
-        ? __PACKAGE__->broadcast_mul($self, $other, { out => $self }) 
-        : __PACKAGE__->_mul_scalar($self, $other, { out => $self }) 
+    return ref $other
+        ? __PACKAGE__->broadcast_mul($self, $other, { out => $self })
+        : __PACKAGE__->_mul_scalar($self, $other, { out => $self })
 }
 
 method divide(AI::MXNet::NDArray|Num $other, $reverse=)
@@ -770,9 +787,9 @@ method divide(AI::MXNet::NDArray|Num $other, $reverse=)
 method idivide(AI::MXNet::NDArray|Num $other, $reverse=)
 {
     confess('trying to add to a readonly NDArray') unless $self->writable;
-    return ref $other 
-        ? __PACKAGE__->broadcast_div($self, $other, { out => $self }) 
-        : __PACKAGE__->_div_scalar($self, $other, { out => $self }) 
+    return ref $other
+        ? __PACKAGE__->broadcast_div($self, $other, { out => $self })
+        : __PACKAGE__->_div_scalar($self, $other, { out => $self })
 }
 
 method power(AI::MXNet::NDArray|Num $other, $reverse=)
@@ -1094,11 +1111,11 @@ method concatenate(ArrayRef[AI::MXNet::NDArray] $arrays, Index :$axis=0, :$alway
         $shape_axis += $arr->shape->[$axis];
         my $arr_shape_rest1 = [@{ $arr->shape }[0..($axis-1)]];
         my $arr_shape_rest2 = [@{ $arr->shape }[($axis+1)..(@{ $arr->shape }-1)]];
-        confess("first array $arrays->[0] and $i array $arr do not match") 
+        confess("first array $arrays->[0] and $i array $arr do not match")
             unless  join(',',@$arr_shape_rest1) eq join(',',@$shape_rest1);
-        confess("first array $arrays->[0] and $i array $arr do not match") 
+        confess("first array $arrays->[0] and $i array $arr do not match")
             unless  join(',',@$arr_shape_rest2) eq join(',',@$shape_rest2);
-        confess("first array $arrays->[0] and $i array $arr dtypes do not match") 
+        confess("first array $arrays->[0] and $i array $arr dtypes do not match")
             unless  join(',',@$arr_shape_rest2) eq join(',',@$shape_rest2);
         $i++;
     }
@@ -1118,8 +1135,8 @@ method concatenate(ArrayRef[AI::MXNet::NDArray] $arrays, Index :$axis=0, :$alway
             $begin->[$axis] = $idx;
             $end->[$axis] = $idx+$arr->shape->[$axis];
             __PACKAGE__->_crop_assign(
-                $ret, $arr, 
-                { 
+                $ret, $arr,
+                {
                     out => $ret,
                     begin => $begin,
                     end => $end
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Base.pm b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Base.pm
index 7fb6d0e..b514361 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Base.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Base.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::NDArray::Base;
 use strict;
 use warnings;
@@ -100,7 +117,7 @@ func _make_ndarray_function($handle, $func_name)
         }
         for my $key (keys %kwargs)
         {
-            $kwargs{ $key } = "(" .join(", ", @{ $kwargs{ $key } }) .")" 
+            $kwargs{ $key } = "(" .join(", ", @{ $kwargs{ $key } }) .")"
                 if ref $kwargs{ $key } eq 'ARRAY';
         }
         my $out = check_call(AI::MXNetCAPI::ImperativeInvoke(
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Doc.pm b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Doc.pm
index a1a7812..fc44812 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Doc.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Doc.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::NDArray::Doc;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Slice.pm b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Slice.pm
index a52f8ee..40312eb 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Slice.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray/Slice.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::NDArray::Slice;
 use strict;
 use warnings;
@@ -13,7 +30,7 @@ use AI::MXNet::Function::Parameters;
 has parent => (is => 'ro', isa => 'AI::MXNet::NDArray', required => 1);
 has begin  => (is => 'ro', isa => 'Shape', required => 1);
 has end    => (is => 'ro', isa => 'Shape', required => 1);
-use overload 
+use overload
     '.=' => \&set,
     '='  => sub { $_[0] },
     '""' => \&notsupported,
@@ -37,10 +54,10 @@ method set(AcceptableInput $value, $reverse=)
 {
     confess("set value must be defined") unless defined $value;
     confess("${\ $self->parent } is not writable") unless $self->parent->writable;
-    my $shape = []; 
+    my $shape = [];
     zip(
         sub { my ($begin, $end) = @_; push @$shape, ($end-$begin); },
-        $self->begin, 
+        $self->begin,
         $self->end
     );
     if(ref $value)
@@ -58,12 +75,12 @@ method set(AcceptableInput $value, $reverse=)
             $value = AI::MXNet::NDArray->array($value, ctx => $self->parent->context);
         }
         confess("value $value does not match slice dim sizes [@$shape]")
-            if @{$value->shape} != @$shape;    
+            if @{$value->shape} != @$shape;
         zip(
-            sub { 
-                my ($dsize, $vdsize) = @_; 
-                confess("Slice [@$shape]  != $value given as value") 
-                    if $dsize != $vdsize; 
+            sub {
+                my ($dsize, $vdsize) = @_;
+                confess("Slice [@$shape]  != $value given as value")
+                    if $dsize != $vdsize;
             },
             $shape,
             $value->shape
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm
index 08b9565..c6f6822 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Optimizer;
 use strict;
 use warnings;
@@ -33,7 +50,7 @@ method register()
     {
         my $existing = $opt_registry{ $name };
         warn(
-            "WARNING: New optimizer $self.$name" 
+            "WARNING: New optimizer $self.$name"
             ."is overriding existing optimizer $existing.$name"
         );
     }
@@ -505,7 +522,7 @@ method update(
     if($self->clip_gradient)
     {
         $grad = AI::MXNet::NDArray->clip(
-            $grad, 
+            $grad,
             -$self->clip_gradient,
             $self->clip_gradient
         );
@@ -566,7 +583,7 @@ method create_state(Index $index, AI::MXNet::NDArray $weight)
 }
 
 method update(
-    Index $index, 
+    Index $index,
     AI::MXNet::NDArray $weight,
     AI::MXNet::NDArray $grad,
     AI::MXNet::NDArray|Undef $state
@@ -678,7 +695,7 @@ method create_state(Index $index, AI::MXNet::NDArray $weight)
 }
 
 method update(
-    Index $index, 
+    Index $index,
     AI::MXNet::NDArray $weight,
     AI::MXNet::NDArray $grad,
     ArrayRef[AI::MXNet::NDArray] $state
@@ -748,7 +765,7 @@ has '+learning_rate'       => (default => 0.05);
 method create_state(Index $index, AI::MXNet::NDArray $weight)
 {
     return AI::MXNet::NDArray->zeros(
-                $weight->shape, 
+                $weight->shape,
                 ctx => $weight->context
     );  # history
 }
@@ -1025,7 +1042,7 @@ extends 'AI::MXNet::Optimizer';
 method create_state(Index $index, AI::MXNet::NDArray $weight)
 {
     return AI::MXNet::NDArray->zeros(
-                $weight->shape, 
+                $weight->shape,
                 ctx => $weight->context
     );
 }
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Profiler.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Profiler.pm
index 6398fcb..47d7a0d 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Profiler.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Profiler.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Profiler;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/RNN.pm b/perl-package/AI-MXNet/lib/AI/MXNet/RNN.pm
index 13dc4f2..1ccab31 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/RNN.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/RNN.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::RNN;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm
index c7523aa..0221a90 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::RNN::Params;
 use Mouse;
 use AI::MXNet::Function::Parameters;
@@ -560,7 +577,7 @@ use Mouse;
 use AI::MXNet::Base;
 extends 'AI::MXNet::RNN::Cell';
 
-=head1 NAME 
+=head1 NAME
 
     AI::MXNet::RNN::LSTMCell
 =cut
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm
index 731f776..be3bdbd 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::RNN::IO;
 use strict;
 use warnings;
@@ -289,4 +306,4 @@ method next()
     );
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Random.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Random.pm
index dd17523..9ca013c 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Random.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Random.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Random;
 use strict;
 use warnings;
@@ -59,4 +76,4 @@ for my $method (
     }
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/RecordIO.pm b/perl-package/AI-MXNet/lib/AI/MXNet/RecordIO.pm
index f0833bf..2027a90 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/RecordIO.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/RecordIO.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::RecordIO;
 use strict;
 use warnings;
@@ -24,7 +41,7 @@ use Mouse;
 has 'uri'         => (is => 'ro', isa => 'Str', required => 1);
 has 'flag'        => (is => 'ro', isa => enum([qw/r w/]), required => 1);
 has 'handle'      => (is => 'rw', isa => 'RecordIOHandle');
-has [qw/writable 
+has [qw/writable
         is_open/] => (is => 'rw', isa => 'Bool');
 
 sub BUILD
@@ -336,4 +353,4 @@ method write_idx(Int $idx, Str $buf)
     push @{ $self->keys }, $idx;
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Rtc.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Rtc.pm
index 92edcaf..09dc662 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Rtc.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Rtc.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Rtc;
 use strict;
 use warnings;
@@ -124,4 +141,4 @@ method push(
     );
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
index 8b14f4e..a5298c7 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Symbol;
 
 =head1 NAME
@@ -340,7 +357,7 @@ method attr_dict()
 
 method _set_attr(Str @args)
 {
-    my %kwargs = @args; 
+    my %kwargs = @args;
     while(my ($key, $val) = each(%kwargs))
     {
         check_call(
@@ -492,7 +509,7 @@ method list_inputs()
 
 method infer_type(Str|Undef @args)
 {
-    my ($positional_arguments, $kwargs, $kwargs_order) = _parse_arguments("Dtype", @args); 
+    my ($positional_arguments, $kwargs, $kwargs_order) = _parse_arguments("Dtype", @args);
     my $sdata = [];
     my $keys  = [];
     if(@$positional_arguments)
@@ -710,7 +727,7 @@ method _get_ndarray_inputs(
     my ($arg_handles, $arg_arrays) = ([], []);
     if(ref $args eq 'ARRAY')
     {
-        confess("Length of $arg_key do not match number of arguments") 
+        confess("Length of $arg_key do not match number of arguments")
             unless @$args == @$arg_names;
         @{ $arg_handles } = map { $_->handle } @{ $args };
         $arg_arrays = $args;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/AttrScope.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/AttrScope.pm
index 0171684..c728ed1 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/AttrScope.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/AttrScope.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Symbol::AttrScope;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Base.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Base.pm
index 69ff952..4282f12 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Base.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Base.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Symbol::Base;
 use strict;
 use warnings;
@@ -68,7 +85,7 @@ sub _compose
 # Create an atomic symbol function by handle and funciton name
 func _make_atomic_symbol_function($handle, $name)
 {
-    my ($real_name, $desc, $arg_names, 
+    my ($real_name, $desc, $arg_names,
         $arg_types, $arg_descs, $key_var_num_args,
         $ret_type) = @{ check_call(AI::MXNetCAPI::SymbolGetAtomicSymbolInfo($handle)) };
     $ret_type //= '';
@@ -76,7 +93,7 @@ func _make_atomic_symbol_function($handle, $name)
     my $doc_str = build_doc($func_name,
                             $desc,
                             $arg_names,
-                            $arg_types, 
+                            $arg_types,
                             $arg_descs,
                             $key_var_num_args,
                             $ret_type
@@ -162,7 +179,7 @@ method _init_symbol_module()
             no strict 'refs';
             {
                 *{__PACKAGE__."::$name"} = $function;
-            } 
+            }
         }
     }
 }
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Doc.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Doc.pm
index 2485f21..1d9a2c1 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Doc.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/Doc.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Symbol::Doc;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/NameManager.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/NameManager.pm
index 1e31730..109949c 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/NameManager.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol/NameManager.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Symbol::NameManager;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm b/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm
index 52050fa..ea918c0 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::TestUtils;
 use strict;
 use warnings;
@@ -399,4 +416,4 @@ func dies_like($code, $regexp)
     }
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Types.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Types.pm
index 424591e..e48ae3c 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Types.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Types.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Types;
 use strict;
 use warnings;
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Util/Printable.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Util/Printable.pm
index 4e8f805..1ae6c2d 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Util/Printable.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Util/Printable.pm
@@ -1,5 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Util::Printable;
 use strict;
 use warnings;
 use Data::Dumper qw();
-use overload '""' => sub { print Data::Dumper->new([shift])->Purity(1)->Deepcopy(1)->Terse(1)->Dump };
\ No newline at end of file
+use overload '""' => sub { print Data::Dumper->new([shift])->Purity(1)->Deepcopy(1)->Terse(1)->Dump };
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm
index d6ea5aa..4cdc135 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNet::Visualization;
 use strict;
 use warnings;
@@ -37,7 +54,7 @@ use JSON::PP;
     my $softmax = mx->symbol->SoftmaxOutput(data => $fc2, name => 'softmax');
 
     ## creates the image file working directory
-    mx->viz->plot_network($softmax, save_format => 'png')->render("network.png"); 
+    mx->viz->plot_network($softmax, save_format => 'png')->render("network.png");
 
 =head1 DESCRIPTION
 
@@ -408,4 +425,4 @@ method render($output=)
     return $self->graph->$method($output);
 }
 
-1;
\ No newline at end of file
+1;
diff --git a/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm b/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm
index 48ebe80..f092057 100644
--- a/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm
+++ b/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::MXNetCAPI;
 use base qw(DynaLoader);
 bootstrap AI::MXNetCAPI;
diff --git a/perl-package/AI-NNVMCAPI/lib/AI/NNVMCAPI.pm b/perl-package/AI-NNVMCAPI/lib/AI/NNVMCAPI.pm
index 62d4dd2..134d922 100644
--- a/perl-package/AI-NNVMCAPI/lib/AI/NNVMCAPI.pm
+++ b/perl-package/AI-NNVMCAPI/lib/AI/NNVMCAPI.pm
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 package AI::NNVMCAPI;
 use base qw(DynaLoader);
 bootstrap AI::NNVMCAPI;
diff --git a/perl-package/test.sh b/perl-package/test.sh
index 5aef8e6..c8509c1 100755
--- a/perl-package/test.sh
+++ b/perl-package/test.sh
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 MXNET_HOME=${PWD}
 export LD_LIBRARY_PATH=${MXNET_HOME}/lib
 export PERL5LIB=${MXNET_HOME}/perl5/lib/perl5
diff --git a/plugin/caffe/caffe_blob.cc b/plugin/caffe/caffe_blob.cc
index c6d5156..697efbf 100644
--- a/plugin/caffe/caffe_blob.cc
+++ b/plugin/caffe/caffe_blob.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_blob.cc
  * \brief Implementations of SetDataGradToBlob given various device/dimension
  * \author Haoran Wang
diff --git a/plugin/caffe/caffe_blob.h b/plugin/caffe/caffe_blob.h
index 3037031..666d269 100644
--- a/plugin/caffe/caffe_blob.h
+++ b/plugin/caffe/caffe_blob.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_blob.h
  * \brief conversion between tensor and caffeBlob
  * \author Haoran Wang
diff --git a/plugin/caffe/caffe_common.cc b/plugin/caffe/caffe_common.cc
index 722b191..53513a1 100644
--- a/plugin/caffe/caffe_common.cc
+++ b/plugin/caffe/caffe_common.cc
@@ -1,8 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_common.h
  * \brief Common functions for caffeOp and caffeLoss symbols
- * \author Haoran Wang 
+ * \author Haoran Wang
 */
 #include<mshadow/tensor.h>
 #include<caffe/common.hpp>
diff --git a/plugin/caffe/caffe_common.h b/plugin/caffe/caffe_common.h
index 6ee3c26..8565d9e 100644
--- a/plugin/caffe/caffe_common.h
+++ b/plugin/caffe/caffe_common.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_common.h
  * \brief Common functions for caffeOp and caffeLoss symbols
  * \author Haoran Wang
diff --git a/plugin/caffe/caffe_data_iter.cc b/plugin/caffe/caffe_data_iter.cc
index ecf7762..2682298 100644
--- a/plugin/caffe/caffe_data_iter.cc
+++ b/plugin/caffe/caffe_data_iter.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file caffe_data_iter.cc
  * \brief register mnist iterator
 */
diff --git a/plugin/caffe/caffe_fieldentry.h b/plugin/caffe/caffe_fieldentry.h
index a020cf9..47d246f 100644
--- a/plugin/caffe/caffe_fieldentry.h
+++ b/plugin/caffe/caffe_fieldentry.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_fieldentry.h
  * \brief Implement FieldEntry<caffe::LayerParameter>
  * \author Haoran Wang
diff --git a/plugin/caffe/caffe_loss-inl.h b/plugin/caffe/caffe_loss-inl.h
index 038ee14..37bfcf0 100644
--- a/plugin/caffe/caffe_loss-inl.h
+++ b/plugin/caffe/caffe_loss-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_loss-inl.h
  * \brief Caffe Operator
  * \author Haoran Wang
diff --git a/plugin/caffe/caffe_loss.cc b/plugin/caffe/caffe_loss.cc
index a51f126..ce697d6 100644
--- a/plugin/caffe/caffe_loss.cc
+++ b/plugin/caffe/caffe_loss.cc
@@ -1,8 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_loss.cc
- * \brief caffe loss 
- * \author Haoran Wang 
+ * \brief caffe loss
+ * \author Haoran Wang
 */
 #include "./caffe_loss-inl.h"
 
diff --git a/plugin/caffe/caffe_loss.cu b/plugin/caffe/caffe_loss.cu
index 55489ca..2002cf2 100644
--- a/plugin/caffe/caffe_loss.cu
+++ b/plugin/caffe/caffe_loss.cu
@@ -1,8 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_loss_gpu.cc
- * \brief caffe loss 
- * \author Haoran Wang 
+ * \brief caffe loss
+ * \author Haoran Wang
 */
 #include "./caffe_loss-inl.h"
 
diff --git a/plugin/caffe/caffe_op-inl.h b/plugin/caffe/caffe_op-inl.h
index 1950865..43b9b5a 100644
--- a/plugin/caffe/caffe_op-inl.h
+++ b/plugin/caffe/caffe_op-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_op-inl.h
  * \brief Caffe Operator
  * \author Haoran Wang
diff --git a/plugin/caffe/caffe_op.cc b/plugin/caffe/caffe_op.cc
index 90cb4da..5198cca 100644
--- a/plugin/caffe/caffe_op.cc
+++ b/plugin/caffe/caffe_op.cc
@@ -1,8 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_op.cc
  * \brief caffe operator
- * \author Haoran Wang 
+ * \author Haoran Wang
 */
 #include "./caffe_op-inl.h"
 namespace mxnet {
diff --git a/plugin/caffe/caffe_op.cu b/plugin/caffe/caffe_op.cu
index c52f2b6..be6c20a 100644
--- a/plugin/caffe/caffe_op.cu
+++ b/plugin/caffe/caffe_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_operator_gpu.cc
  * \brief caffe operator
  * \author Haoran Wang
diff --git a/plugin/caffe/caffe_stream.cc b/plugin/caffe/caffe_stream.cc
index 99202bf..03badda 100644
--- a/plugin/caffe/caffe_stream.cc
+++ b/plugin/caffe/caffe_stream.cc
@@ -1,8 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_stream.cc
  * \brief define stream opertors >> and <<
- * \author Haoran Wang 
+ * \author Haoran Wang
 */
 #include"caffe_stream.h"
 
diff --git a/plugin/caffe/caffe_stream.h b/plugin/caffe/caffe_stream.h
index de9edb8..b9a08d0 100644
--- a/plugin/caffe/caffe_stream.h
+++ b/plugin/caffe/caffe_stream.h
@@ -1,8 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file caffe_stream.h
  * \brief define stream opertors >> and <<
- * \author Haoran Wang 
+ * \author Haoran Wang
 */
 #ifndef PLUGIN_CAFFE_CAFFE_STREAM_H_
 #define PLUGIN_CAFFE_CAFFE_STREAM_H_
diff --git a/plugin/opencv/__init__.py b/plugin/opencv/__init__.py
index 0725751..bcf6d1e 100644
--- a/plugin/opencv/__init__.py
+++ b/plugin/opencv/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import
 
diff --git a/plugin/opencv/cv_api.cc b/plugin/opencv/cv_api.cc
index 78bec01..b0bcbbc 100644
--- a/plugin/opencv/cv_api.cc
+++ b/plugin/opencv/cv_api.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file cv_api.h
  * \brief C API for opencv
  * \author Junyuan Xie
diff --git a/plugin/opencv/cv_api.h b/plugin/opencv/cv_api.h
index fc224d0..e04357b 100644
--- a/plugin/opencv/cv_api.h
+++ b/plugin/opencv/cv_api.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file cv_api.h
  * \brief C API for opencv
  * \author Junyuan Xie
diff --git a/plugin/opencv/opencv.py b/plugin/opencv/opencv.py
index 43b73b6..52138af 100644
--- a/plugin/opencv/opencv.py
+++ b/plugin/opencv/opencv.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=too-many-arguments,no-member,invalid-name
 
diff --git a/plugin/sframe/iter_sframe.cc b/plugin/sframe/iter_sframe.cc
index d91c0de..2a987e2 100644
--- a/plugin/sframe/iter_sframe.cc
+++ b/plugin/sframe/iter_sframe.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file iter_sframe_image.cc
  * \brief
  * \author Bing Xu
diff --git a/plugin/torch/torch_base.cc b/plugin/torch/torch_base.cc
index af102d6..89f832c 100644
--- a/plugin/torch/torch_base.cc
+++ b/plugin/torch/torch_base.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file torch_base.cc
  * \brief torch_state
  * \author Junyuan Xie
diff --git a/plugin/torch/torch_base.h b/plugin/torch/torch_base.h
index acfefe7..3aaaa2f 100644
--- a/plugin/torch/torch_base.h
+++ b/plugin/torch/torch_base.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file torch_base.h
  * \brief Torch interface.
  * \author Junyuan Xie
diff --git a/plugin/torch/torch_criterion-inl.h b/plugin/torch/torch_criterion-inl.h
index 174ebf2..7f592f1 100644
--- a/plugin/torch/torch_criterion-inl.h
+++ b/plugin/torch/torch_criterion-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file torch_module-inl.h
  * \brief torch module operator
  * \author Min Lin
diff --git a/plugin/torch/torch_criterion.cc b/plugin/torch/torch_criterion.cc
index a54be46..bdfb2f4 100644
--- a/plugin/torch/torch_criterion.cc
+++ b/plugin/torch/torch_criterion.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation.cc
  * \brief activation op
  * \author Junyuan Xie
diff --git a/plugin/torch/torch_criterion.cu b/plugin/torch/torch_criterion.cu
index 57730a0..68c519c 100644
--- a/plugin/torch/torch_criterion.cu
+++ b/plugin/torch/torch_criterion.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation.cc
  * \brief activation op
  * \author Bing Xu
diff --git a/plugin/torch/torch_function.cc b/plugin/torch/torch_function.cc
index b47ab56..a1c5ff5 100644
--- a/plugin/torch/torch_function.cc
+++ b/plugin/torch/torch_function.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file torch_base.cc
  * \brief torch_state
  * \author Junyuan Xie
diff --git a/plugin/torch/torch_function.h b/plugin/torch/torch_function.h
index 0151d5a..8fb2ccf 100644
--- a/plugin/torch/torch_function.h
+++ b/plugin/torch/torch_function.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file torch_function.h
  * \brief Torch interface.
  * \author Junyuan Xie
diff --git a/plugin/torch/torch_module-inl.h b/plugin/torch/torch_module-inl.h
index 31138fd..15b569f 100644
--- a/plugin/torch/torch_module-inl.h
+++ b/plugin/torch/torch_module-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file torch_module-inl.h
  * \brief torch module operator
  * \author Min Lin
diff --git a/plugin/torch/torch_module.cc b/plugin/torch/torch_module.cc
index 81dc481..658669f 100644
--- a/plugin/torch/torch_module.cc
+++ b/plugin/torch/torch_module.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation.cc
  * \brief activation op
  * \author Bing Xu
diff --git a/plugin/torch/torch_module.cu b/plugin/torch/torch_module.cu
index a298a23..caf9eb1 100644
--- a/plugin/torch/torch_module.cu
+++ b/plugin/torch/torch_module.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation.cc
  * \brief activation op
  * \author Bing Xu
diff --git a/plugin/warpctc/warpctc-inl.h b/plugin/warpctc/warpctc-inl.h
index 328c08b..d492656 100644
--- a/plugin/warpctc/warpctc-inl.h
+++ b/plugin/warpctc/warpctc-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file warpctc-inl.h
  * \brief warpctc operator
  * \author Liang Xiang
diff --git a/plugin/warpctc/warpctc.cc b/plugin/warpctc/warpctc.cc
index db88a33..0ff61be 100644
--- a/plugin/warpctc/warpctc.cc
+++ b/plugin/warpctc/warpctc.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file warpctc.cc
  * \brief warpctc op
  * \author Liang Xiang
diff --git a/plugin/warpctc/warpctc.cu b/plugin/warpctc/warpctc.cu
index 186c4d0..7562a12 100644
--- a/plugin/warpctc/warpctc.cu
+++ b/plugin/warpctc/warpctc.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file warpctc.cc
  * \brief warpctc op
  * \author Liang Xiang
diff --git a/prepare_mkl.sh b/prepare_mkl.sh
index a320c44..9769731 100755
--- a/prepare_mkl.sh
+++ b/prepare_mkl.sh
@@ -1,4 +1,22 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # set -ex
 #
 # All modification made by Intel Corporation: © 2016 Intel Corporation
diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index d878f9b..3c3ce76 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """MXNet: a concise, fast and flexible framework for deep learning."""
 from __future__ import absolute_import
diff --git a/python/mxnet/_ctypes/__init__.py b/python/mxnet/_ctypes/__init__.py
index 2708cc5..a9433ed 100644
--- a/python/mxnet/_ctypes/__init__.py
+++ b/python/mxnet/_ctypes/__init__.py
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 "ctypes module"
diff --git a/python/mxnet/_ctypes/ndarray.py b/python/mxnet/_ctypes/ndarray.py
index 396c57a..5a50f80 100644
--- a/python/mxnet/_ctypes/ndarray.py
+++ b/python/mxnet/_ctypes/ndarray.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, protected-access, too-many-arguments
 # pylint: disable=global-statement, unused-import
diff --git a/python/mxnet/_ctypes/symbol.py b/python/mxnet/_ctypes/symbol.py
index 5cbff55..3ec2ddc 100644
--- a/python/mxnet/_ctypes/symbol.py
+++ b/python/mxnet/_ctypes/symbol.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, protected-access, too-many-arguments,  global-statement
 """Symbolic configuration API."""
diff --git a/python/mxnet/_cy2/__init__.py b/python/mxnet/_cy2/__init__.py
index 910cbe2..1961cd9 100644
--- a/python/mxnet/_cy2/__init__.py
+++ b/python/mxnet/_cy2/__init__.py
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Namespace for cython generated modules for python2"""
diff --git a/python/mxnet/_cy3/__init__.py b/python/mxnet/_cy3/__init__.py
index e89f266..44dcca5 100644
--- a/python/mxnet/_cy3/__init__.py
+++ b/python/mxnet/_cy3/__init__.py
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Namespace for cython generated modules for python3"""
diff --git a/python/mxnet/_ndarray_internal.py b/python/mxnet/_ndarray_internal.py
index 52ec16d..8f151f1 100644
--- a/python/mxnet/_ndarray_internal.py
+++ b/python/mxnet/_ndarray_internal.py
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """NDArray namespace used to register internal functions."""
diff --git a/python/mxnet/_symbol_internal.py b/python/mxnet/_symbol_internal.py
index 58a8e4b..cd6ae41 100644
--- a/python/mxnet/_symbol_internal.py
+++ b/python/mxnet/_symbol_internal.py
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Symbol namespace used to register internal functions."""
diff --git a/python/mxnet/attribute.py b/python/mxnet/attribute.py
index b860402..15d38f8 100644
--- a/python/mxnet/attribute.py
+++ b/python/mxnet/attribute.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Attribute scoping support for symbolic API."""
 from __future__ import absolute_import
diff --git a/python/mxnet/autograd.py b/python/mxnet/autograd.py
index 2c3feab..7340851 100644
--- a/python/mxnet/autograd.py
+++ b/python/mxnet/autograd.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Autograd for NDArray."""
 from __future__ import absolute_import
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index ddaeb6e..7d5a5bf 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, no-member
 """ctypes library of mxnet and helper functions."""
diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py
index 1214600..8c9f64a 100644
--- a/python/mxnet/callback.py
+++ b/python/mxnet/callback.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Callback functions that can be used to track various status during epoch."""
 from __future__ import absolute_import
diff --git a/python/mxnet/context.py b/python/mxnet/context.py
index 9822a6d..9798b48 100644
--- a/python/mxnet/context.py
+++ b/python/mxnet/context.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Context management API of mxnet."""
 from __future__ import absolute_import
diff --git a/python/mxnet/contrib/__init__.py b/python/mxnet/contrib/__init__.py
index c46fa2a..2730bc4 100644
--- a/python/mxnet/contrib/__init__.py
+++ b/python/mxnet/contrib/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Experimental contributions"""
 
diff --git a/python/mxnet/contrib/autograd.py b/python/mxnet/contrib/autograd.py
index 9074e45..c7fb6e1 100644
--- a/python/mxnet/contrib/autograd.py
+++ b/python/mxnet/contrib/autograd.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Autograd for NDArray."""
 from __future__ import absolute_import
diff --git a/python/mxnet/contrib/ndarray.py b/python/mxnet/contrib/ndarray.py
index cf1815c..3c86fe7 100644
--- a/python/mxnet/contrib/ndarray.py
+++ b/python/mxnet/contrib/ndarray.py
@@ -1,2 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """NDArray namespace used to register contrib functions"""
diff --git a/python/mxnet/contrib/symbol.py b/python/mxnet/contrib/symbol.py
index 81c5ce8..1d53345 100644
--- a/python/mxnet/contrib/symbol.py
+++ b/python/mxnet/contrib/symbol.py
@@ -1,2 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Symbol namespace used to register contrib functions"""
diff --git a/python/mxnet/contrib/tensorboard.py b/python/mxnet/contrib/tensorboard.py
index 40e3104..2bb766e 100644
--- a/python/mxnet/contrib/tensorboard.py
+++ b/python/mxnet/contrib/tensorboard.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """TensorBoard functions that can be used to log various status during epoch."""
 from __future__ import absolute_import
diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py
index d2b108c..baff834 100644
--- a/python/mxnet/executor.py
+++ b/python/mxnet/executor.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, protected-access, too-many-locals, too-many-arguments
 """Symbolic Executor component of MXNet."""
diff --git a/python/mxnet/executor_manager.py b/python/mxnet/executor_manager.py
index 0fb9eb3..33c6c97 100644
--- a/python/mxnet/executor_manager.py
+++ b/python/mxnet/executor_manager.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, protected-access, too-many-locals, too-many-arguments, too-many-statements
 """Executor manager."""
diff --git a/python/mxnet/gluon/__init__.py b/python/mxnet/gluon/__init__.py
index e3c341e..089340e 100644
--- a/python/mxnet/gluon/__init__.py
+++ b/python/mxnet/gluon/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import
 """Neural network module."""
diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index cfc5e57..74a9058 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Base container class for all neural network models."""
diff --git a/python/mxnet/gluon/data/__init__.py b/python/mxnet/gluon/data/__init__.py
index a062325..23ae3e9 100644
--- a/python/mxnet/gluon/data/__init__.py
+++ b/python/mxnet/gluon/data/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import
 """Dataset utilities."""
diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py
index b251deb..6497c7e 100644
--- a/python/mxnet/gluon/data/dataloader.py
+++ b/python/mxnet/gluon/data/dataloader.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=
 """Dataset generator."""
diff --git a/python/mxnet/gluon/data/dataset.py b/python/mxnet/gluon/data/dataset.py
index aefff0a..f3dd691 100644
--- a/python/mxnet/gluon/data/dataset.py
+++ b/python/mxnet/gluon/data/dataset.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=
 """Dataset container."""
diff --git a/python/mxnet/gluon/data/sampler.py b/python/mxnet/gluon/data/sampler.py
index f6cedf0..80f115e 100644
--- a/python/mxnet/gluon/data/sampler.py
+++ b/python/mxnet/gluon/data/sampler.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=
 """Dataset sampler."""
diff --git a/python/mxnet/gluon/data/vision.py b/python/mxnet/gluon/data/vision.py
index 36c4642..4ddbbbd 100644
--- a/python/mxnet/gluon/data/vision.py
+++ b/python/mxnet/gluon/data/vision.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=
 """Dataset container."""
diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
index 38fdcb8..2b31840 100644
--- a/python/mxnet/gluon/loss.py
+++ b/python/mxnet/gluon/loss.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=arguments-differ
 """ losses for training neural networks """
diff --git a/python/mxnet/gluon/model_zoo/__init__.py b/python/mxnet/gluon/model_zoo/__init__.py
index aa5f148..b8c32af 100644
--- a/python/mxnet/gluon/model_zoo/__init__.py
+++ b/python/mxnet/gluon/model_zoo/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Predefined and pretrained models."""
 
diff --git a/python/mxnet/gluon/model_zoo/custom_layers.py b/python/mxnet/gluon/model_zoo/custom_layers.py
index da1ca86..cf91876 100644
--- a/python/mxnet/gluon/model_zoo/custom_layers.py
+++ b/python/mxnet/gluon/model_zoo/custom_layers.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Custom neural network layers in model_zoo."""
diff --git a/python/mxnet/gluon/model_zoo/model_store.py b/python/mxnet/gluon/model_zoo/model_store.py
index 6a11626..e3c48ba 100644
--- a/python/mxnet/gluon/model_zoo/model_store.py
+++ b/python/mxnet/gluon/model_zoo/model_store.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Model zoo for pre-trained models."""
 from __future__ import print_function
diff --git a/python/mxnet/gluon/model_zoo/vision/__init__.py b/python/mxnet/gluon/model_zoo/vision/__init__.py
index 56e46f9..e4016db 100644
--- a/python/mxnet/gluon/model_zoo/vision/__init__.py
+++ b/python/mxnet/gluon/model_zoo/vision/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import, arguments-differ
 r"""Module for pre-defined neural network models.
diff --git a/python/mxnet/gluon/model_zoo/vision/alexnet.py b/python/mxnet/gluon/model_zoo/vision/alexnet.py
index 86ff932..4d5bc8c 100644
--- a/python/mxnet/gluon/model_zoo/vision/alexnet.py
+++ b/python/mxnet/gluon/model_zoo/vision/alexnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Alexnet, implemented in Gluon."""
diff --git a/python/mxnet/gluon/model_zoo/vision/densenet.py b/python/mxnet/gluon/model_zoo/vision/densenet.py
index 9e1ff77..57dbe5d 100644
--- a/python/mxnet/gluon/model_zoo/vision/densenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/densenet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """DenseNet, implemented in Gluon."""
diff --git a/python/mxnet/gluon/model_zoo/vision/inception.py b/python/mxnet/gluon/model_zoo/vision/inception.py
index 8a28666..1afd3e3 100644
--- a/python/mxnet/gluon/model_zoo/vision/inception.py
+++ b/python/mxnet/gluon/model_zoo/vision/inception.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Inception, implemented in Gluon."""
diff --git a/python/mxnet/gluon/model_zoo/vision/resnet.py b/python/mxnet/gluon/model_zoo/vision/resnet.py
index 48ba079..78bc726 100644
--- a/python/mxnet/gluon/model_zoo/vision/resnet.py
+++ b/python/mxnet/gluon/model_zoo/vision/resnet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """ResNets, implemented in Gluon."""
diff --git a/python/mxnet/gluon/model_zoo/vision/squeezenet.py b/python/mxnet/gluon/model_zoo/vision/squeezenet.py
index bfcb5cb..1a14201 100644
--- a/python/mxnet/gluon/model_zoo/vision/squeezenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/squeezenet.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """SqueezeNet, implemented in Gluon."""
diff --git a/python/mxnet/gluon/model_zoo/vision/vgg.py b/python/mxnet/gluon/model_zoo/vision/vgg.py
index 96a4fa1..2f4daf9 100644
--- a/python/mxnet/gluon/model_zoo/vision/vgg.py
+++ b/python/mxnet/gluon/model_zoo/vision/vgg.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """VGG, implemented in Gluon."""
diff --git a/python/mxnet/gluon/nn/__init__.py b/python/mxnet/gluon/nn/__init__.py
index e4191b2..0fc1ff1 100644
--- a/python/mxnet/gluon/nn/__init__.py
+++ b/python/mxnet/gluon/nn/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import
 """Neural network layers."""
diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py
index afe2df7..63411ca 100644
--- a/python/mxnet/gluon/nn/basic_layers.py
+++ b/python/mxnet/gluon/nn/basic_layers.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Basic neural network layers."""
diff --git a/python/mxnet/gluon/nn/conv_layers.py b/python/mxnet/gluon/nn/conv_layers.py
index 9094edd..e49340d 100644
--- a/python/mxnet/gluon/nn/conv_layers.py
+++ b/python/mxnet/gluon/nn/conv_layers.py
@@ -1,5 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
-# pylint: disable= arguments-differ
+# pylint: disable= arguments-differ, too-many-lines
 """Convolutional neural network layers."""
 from ..block import HybridBlock
 from ... import symbol
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index bdc9674..69f6038 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=
 """Neural network parameter."""
diff --git a/python/mxnet/gluon/rnn/__init__.py b/python/mxnet/gluon/rnn/__init__.py
index b4554ad..24cce54 100644
--- a/python/mxnet/gluon/rnn/__init__.py
+++ b/python/mxnet/gluon/rnn/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import
 """Recurrent neural network module."""
diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py
index 87c656c..c9186fd 100644
--- a/python/mxnet/gluon/rnn/rnn_cell.py
+++ b/python/mxnet/gluon/rnn/rnn_cell.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=no-member, invalid-name, protected-access, no-self-use
 # pylint: disable=too-many-branches, too-many-arguments, no-self-use
diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py
index deb6898..a9bcee5 100644
--- a/python/mxnet/gluon/rnn/rnn_layer.py
+++ b/python/mxnet/gluon/rnn/rnn_layer.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=no-member, invalid-name, protected-access, no-self-use
 # pylint: disable=too-many-branches, too-many-arguments, no-self-use
diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py
index e8aae71..bb2cc76 100644
--- a/python/mxnet/gluon/trainer.py
+++ b/python/mxnet/gluon/trainer.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=
 """Parameter optimizer."""
diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
index 505fbc5..7d9c378 100644
--- a/python/mxnet/gluon/utils.py
+++ b/python/mxnet/gluon/utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=
 """Parallelization utility optimizer."""
diff --git a/python/mxnet/image/__init__.py b/python/mxnet/image/__init__.py
index d484006..9bb55fb 100644
--- a/python/mxnet/image/__init__.py
+++ b/python/mxnet/image/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import
 """Image Iterators and image augmentation functions"""
diff --git a/python/mxnet/image/detection.py b/python/mxnet/image/detection.py
index 0a16ac3..142ba25 100644
--- a/python/mxnet/image/detection.py
+++ b/python/mxnet/image/detection.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=unused-import
 """Read images and perform augmentations for object detection."""
 
diff --git a/python/mxnet/image/image.py b/python/mxnet/image/image.py
index cf17ecf..02cd3cd 100644
--- a/python/mxnet/image/image.py
+++ b/python/mxnet/image/image.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=no-member, too-many-lines, redefined-builtin, protected-access, unused-import, invalid-name
 # pylint: disable=too-many-arguments, too-many-locals, no-name-in-module, too-many-branches, too-many-statements
 """Read individual image files and perform augmentations."""
diff --git a/python/mxnet/initializer.py b/python/mxnet/initializer.py
index a5962b6..78afa2d 100755
--- a/python/mxnet/initializer.py
+++ b/python/mxnet/initializer.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Weight initializer."""
 from __future__ import absolute_import, print_function
 
diff --git a/python/mxnet/io.py b/python/mxnet/io.py
index bb791ce..0404e34 100644
--- a/python/mxnet/io.py
+++ b/python/mxnet/io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Data iterators for common data formats."""
 from __future__ import absolute_import
 from collections import OrderedDict, namedtuple
diff --git a/python/mxnet/kvstore.py b/python/mxnet/kvstore.py
index 10b83b0..fd00911 100644
--- a/python/mxnet/kvstore.py
+++ b/python/mxnet/kvstore.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """ Key value store interface of MXNet for parameter synchronization."""
 from __future__ import absolute_import
diff --git a/python/mxnet/kvstore_server.py b/python/mxnet/kvstore_server.py
index c6d0b07..1bb995a 100644
--- a/python/mxnet/kvstore_server.py
+++ b/python/mxnet/kvstore_server.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """A server node for the key value store."""
 from __future__ import absolute_import
diff --git a/python/mxnet/libinfo.py b/python/mxnet/libinfo.py
index a247566..d6521c5 100644
--- a/python/mxnet/libinfo.py
+++ b/python/mxnet/libinfo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Information about mxnet."""
 from __future__ import absolute_import
diff --git a/python/mxnet/log.py b/python/mxnet/log.py
index 46d97f6..6dcaedb 100644
--- a/python/mxnet/log.py
+++ b/python/mxnet/log.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # -*- coding: utf-8 -*-
 # pylint: disable= protected-access, invalid-name
 """Logging utilities."""
diff --git a/python/mxnet/lr_scheduler.py b/python/mxnet/lr_scheduler.py
index 9998fc8..e4af77a 100644
--- a/python/mxnet/lr_scheduler.py
+++ b/python/mxnet/lr_scheduler.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Scheduling learning rate."""
 import logging
 
diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py
index 2fe38ab..1a8e67d 100644
--- a/python/mxnet/metric.py
+++ b/python/mxnet/metric.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=no-member, too-many-lines
 
diff --git a/python/mxnet/misc.py b/python/mxnet/misc.py
index b158981..13b7dc2 100644
--- a/python/mxnet/misc.py
+++ b/python/mxnet/misc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=invalid-name
 """Learning rate scheduler."""
 
diff --git a/python/mxnet/model.py b/python/mxnet/model.py
index 5780ac1..01b3fa5 100644
--- a/python/mxnet/model.py
+++ b/python/mxnet/model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=fixme, invalid-name, too-many-arguments, too-many-locals, too-many-lines
 # pylint: disable=too-many-branches, too-many-statements
 """MXNet model module"""
diff --git a/python/mxnet/module/__init__.py b/python/mxnet/module/__init__.py
index 9164137..32ecbb9 100644
--- a/python/mxnet/module/__init__.py
+++ b/python/mxnet/module/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """A module is like a FeedForward model. But we would like to make it
 easier to compose, similar to Torch modules.
 """
diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py
index cacce25..3123462 100644
--- a/python/mxnet/module/base_module.py
+++ b/python/mxnet/module/base_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=fixme, too-many-arguments, too-many-locals, too-many-public-methods, too-many-branches
 """`BaseModule` defines an API for modules."""
 
diff --git a/python/mxnet/module/bucketing_module.py b/python/mxnet/module/bucketing_module.py
index 7a1be96..f3c7ecb 100644
--- a/python/mxnet/module/bucketing_module.py
+++ b/python/mxnet/module/bucketing_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=too-many-instance-attributes, too-many-arguments, protected-access
 # pylint: disable=too-many-public-methods
 """A `BucketingModule` implement the `BaseModule` API, and allows multiple
diff --git a/python/mxnet/module/executor_group.py b/python/mxnet/module/executor_group.py
index 169e81e..0f3c079 100755
--- a/python/mxnet/module/executor_group.py
+++ b/python/mxnet/module/executor_group.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=too-many-instance-attributes,too-many-locals
 # pylint: disable=too-many-branches,too-many-statements,too-many-arguments
 """Executor group is a convenient tool for managing a group of executors."""
diff --git a/python/mxnet/module/module.py b/python/mxnet/module/module.py
index b31ea0f..058edd5 100644
--- a/python/mxnet/module/module.py
+++ b/python/mxnet/module/module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=too-many-instance-attributes, too-many-arguments, protected-access, too-many-branches
 # pylint: disable=too-many-public-methods
 """A `Module` implement the `BaseModule` API by wrapping a `Symbol` and one or
diff --git a/python/mxnet/module/python_module.py b/python/mxnet/module/python_module.py
index af7c76e..2d4343c 100644
--- a/python/mxnet/module/python_module.py
+++ b/python/mxnet/module/python_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=too-many-instance-attributes, too-many-arguments
 """Provide some handy classes for user to implement a simple computation module
 in Python easily.
diff --git a/python/mxnet/module/sequential_module.py b/python/mxnet/module/sequential_module.py
index b2644c8..642a398 100644
--- a/python/mxnet/module/sequential_module.py
+++ b/python/mxnet/module/sequential_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=too-many-arguments, too-many-locals, too-many-instance-attributes
 """`SequentialModule` is a container module that chains a number of modules together."""
 
diff --git a/python/mxnet/monitor.py b/python/mxnet/monitor.py
index 15be41d..e3185a1 100644
--- a/python/mxnet/monitor.py
+++ b/python/mxnet/monitor.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=protected-access, logging-format-interpolation, invalid-name, no-member, too-many-branches
 """Monitor outputs, weights, and gradients for debugging."""
diff --git a/python/mxnet/name.py b/python/mxnet/name.py
index 8003073..966d382 100644
--- a/python/mxnet/name.py
+++ b/python/mxnet/name.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Automatic naming support for symbolic API."""
 from __future__ import absolute_import
diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py
index d4a0cdb..42f0ff5 100644
--- a/python/mxnet/ndarray.py
+++ b/python/mxnet/ndarray.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable= too-many-lines, redefined-builtin, protected-access
 # pylint: disable=import-error, no-name-in-module, undefined-variable
@@ -764,7 +781,7 @@ fixed-size items.
     def size(self):
         """Number of elements in the array.
 
-        Equivalent to the product of the array’s dimensions.
+        Equivalent to the product of the array's dimensions.
 
         Examples
         --------
@@ -803,7 +820,7 @@ fixed-size items.
 
     @property
     def dtype(self):
-        """Data-type of the array’s elements.
+        """Data-type of the array's elements.
 
         Returns
         -------
diff --git a/python/mxnet/ndarray_doc.py b/python/mxnet/ndarray_doc.py
index 9cc4545..0c51036 100644
--- a/python/mxnet/ndarray_doc.py
+++ b/python/mxnet/ndarray_doc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=unused-argument, too-many-arguments
 """Extra symbol documents"""
diff --git a/python/mxnet/notebook/__init__.py b/python/mxnet/notebook/__init__.py
index 71a30e3..d605d74 100644
--- a/python/mxnet/notebook/__init__.py
+++ b/python/mxnet/notebook/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=invalid-name, missing-docstring, no-init, old-style-class, multiple-statements
 
 """MXNet notebook: an easy to use visualization platform"""
diff --git a/python/mxnet/notebook/callback.py b/python/mxnet/notebook/callback.py
index e9c6e97..56321b7 100644
--- a/python/mxnet/notebook/callback.py
+++ b/python/mxnet/notebook/callback.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=fixme, invalid-name, missing-docstring, no-init, old-style-class, multiple-statements
 # pylint: disable=arguments-differ, too-many-arguments, no-member
 """Visualization callback function
diff --git a/python/mxnet/operator.py b/python/mxnet/operator.py
index 8274838..692c7fe 100644
--- a/python/mxnet/operator.py
+++ b/python/mxnet/operator.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, protected-access, too-many-arguments, no-self-use, too-many-locals, broad-except
 """numpy interface for operators."""
diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py
index 934566e..1ef9cc8 100644
--- a/python/mxnet/optimizer.py
+++ b/python/mxnet/optimizer.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Weight updating functions."""
 import math
 import pickle
diff --git a/python/mxnet/profiler.py b/python/mxnet/profiler.py
index 0d4b8fb..7356ed0 100644
--- a/python/mxnet/profiler.py
+++ b/python/mxnet/profiler.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=fixme, invalid-name, too-many-arguments, too-many-locals, too-many-lines
 # pylint: disable=too-many-branches, too-many-statements
diff --git a/python/mxnet/random.py b/python/mxnet/random.py
index 91c2f50..29b250d 100644
--- a/python/mxnet/random.py
+++ b/python/mxnet/random.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=no-member, protected-access, unused-import, no-name-in-module
 """Random number interface of MXNet."""
diff --git a/python/mxnet/recordio.py b/python/mxnet/recordio.py
index 6661ac5..39f442b 100644
--- a/python/mxnet/recordio.py
+++ b/python/mxnet/recordio.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Read and write for the RecordIO data format."""
 from __future__ import absolute_import
 from collections import namedtuple
diff --git a/python/mxnet/registry.py b/python/mxnet/registry.py
index fdd095e..4a4f22f 100644
--- a/python/mxnet/registry.py
+++ b/python/mxnet/registry.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=no-member
 
diff --git a/python/mxnet/rnn/__init__.py b/python/mxnet/rnn/__init__.py
index 99b0a2d..dbf382e 100644
--- a/python/mxnet/rnn/__init__.py
+++ b/python/mxnet/rnn/__init__.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=wildcard-import
 """Recurrent neural network module."""
diff --git a/python/mxnet/rnn/io.py b/python/mxnet/rnn/io.py
index 8cfce96..ab51b09 100644
--- a/python/mxnet/rnn/io.py
+++ b/python/mxnet/rnn/io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=too-many-arguments, too-many-locals
 """Definition of various recurrent neural network cells."""
diff --git a/python/mxnet/rnn/rnn.py b/python/mxnet/rnn/rnn.py
index 10343c3..47307c5 100644
--- a/python/mxnet/rnn/rnn.py
+++ b/python/mxnet/rnn/rnn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=too-many-arguments, no-member
 """Functions for constructing recurrent neural networks."""
diff --git a/python/mxnet/rnn/rnn_cell.py b/python/mxnet/rnn/rnn_cell.py
index c8213a2..1c34520 100644
--- a/python/mxnet/rnn/rnn_cell.py
+++ b/python/mxnet/rnn/rnn_cell.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=no-member, invalid-name, protected-access, no-self-use
 # pylint: disable=too-many-branches, too-many-arguments, no-self-use
diff --git a/python/mxnet/rtc.py b/python/mxnet/rtc.py
index 759fc3d..9da38c6 100644
--- a/python/mxnet/rtc.py
+++ b/python/mxnet/rtc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Interface to runtime cuda kernel compile module."""
 from __future__ import absolute_import
 
diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py
index a6c2b0a..14cb381 100644
--- a/python/mxnet/symbol.py
+++ b/python/mxnet/symbol.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, protected-access, too-many-arguments, too-many-lines
 # pylint: disable=import-error, no-name-in-module
diff --git a/python/mxnet/symbol_doc.py b/python/mxnet/symbol_doc.py
index dff5383..3cb1997 100644
--- a/python/mxnet/symbol_doc.py
+++ b/python/mxnet/symbol_doc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=unused-argument, too-many-arguments
 """Extra symbol documents
diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index 83c773f..c5587f8 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Tools for testing."""
 # pylint: disable=too-many-lines
 from __future__ import absolute_import, print_function, division
diff --git a/python/mxnet/torch.py b/python/mxnet/torch.py
index 765c96b..b7fce6d 100644
--- a/python/mxnet/torch.py
+++ b/python/mxnet/torch.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 """Interface for NDArray functions executed by torch backend.
 Install Torch and compile with USE_TORCH=1 to use this module."""
diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py
index e67fee4..4dbf680 100644
--- a/python/mxnet/visualization.py
+++ b/python/mxnet/visualization.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # coding: utf-8
 # pylint: disable=invalid-name, too-many-locals, fixme
 # pylint: disable=too-many-branches, too-many-statements
diff --git a/python/setup.py b/python/setup.py
index f5bd55d..14c8121 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=invalid-name, exec-used
 """Setup mxnet package."""
 from __future__ import absolute_import
diff --git a/scala-package/core/scripts/get_cifar_data.sh b/scala-package/core/scripts/get_cifar_data.sh
index eba3a27..9ec1c39 100755
--- a/scala-package/core/scripts/get_cifar_data.sh
+++ b/scala-package/core/scripts/get_cifar_data.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 if [ ! -z "$MXNET_DATA_DIR" ]; then
diff --git a/scala-package/core/scripts/get_mnist_data.sh b/scala-package/core/scripts/get_mnist_data.sh
index a4cfe11..97e151b 100755
--- a/scala-package/core/scripts/get_mnist_data.sh
+++ b/scala-package/core/scripts/get_mnist_data.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 if [ ! -z "$MXNET_DATA_DIR" ]; then
diff --git a/scala-package/examples/scripts/customop/run_customop.sh b/scala-package/examples/scripts/customop/run_customop.sh
index bd425ed..b11bb89 100644
--- a/scala-package/examples/scripts/customop/run_customop.sh
+++ b/scala-package/examples/scripts/customop/run_customop.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
diff --git a/scala-package/examples/scripts/customop/run_customopwithrtc.sh b/scala-package/examples/scripts/customop/run_customopwithrtc.sh
index 6009193..160525e 100644
--- a/scala-package/examples/scripts/customop/run_customopwithrtc.sh
+++ b/scala-package/examples/scripts/customop/run_customopwithrtc.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
diff --git a/scala-package/examples/scripts/module/mnist_mlp.sh b/scala-package/examples/scripts/module/mnist_mlp.sh
index 0b450d7..6bb9636 100755
--- a/scala-package/examples/scripts/module/mnist_mlp.sh
+++ b/scala-package/examples/scripts/module/mnist_mlp.sh
@@ -1,4 +1,22 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ROOT_DIR=$(cd `dirname $0`/../../..; pwd)
 CLASSPATH=$ROOT_DIR/assembly/osx-x86_64-cpu/target/*:$ROOT_DIR/examples/target/*:$ROOT_DIR/examples/target/classes/lib/*
 
diff --git a/scala-package/examples/scripts/module/run_sequential_module.sh b/scala-package/examples/scripts/module/run_sequential_module.sh
index 15cc7dd..9d9edb7 100644
--- a/scala-package/examples/scripts/module/run_sequential_module.sh
+++ b/scala-package/examples/scripts/module/run_sequential_module.sh
@@ -1,4 +1,22 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ROOT_DIR=$(cd `dirname $0`/../../..; pwd)
 CLASSPATH=$ROOT_DIR/assembly/linux-x86_64-cpu/target/*:$ROOT_DIR/examples/target/*:$ROOT_DIR/examples/target/classes/lib/*
 
diff --git a/scala-package/examples/scripts/neuralstyle_end2end/run_test_end2end.sh b/scala-package/examples/scripts/neuralstyle_end2end/run_test_end2end.sh
index 10bc2da..1c683bf 100644
--- a/scala-package/examples/scripts/neuralstyle_end2end/run_test_end2end.sh
+++ b/scala-package/examples/scripts/neuralstyle_end2end/run_test_end2end.sh
@@ -1,9 +1,27 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
-INPUT_IMG=$1 
+INPUT_IMG=$1
 MODEL_DIR=$2
 OUTPUT_DIR=$3
 GPU=0
@@ -13,4 +31,4 @@ java -Xmx1024m -cp $CLASS_PATH \
 	--model-path $MODEL_DIR \
 	--input-image $INPUT_IMG \
 	--output-path $OUTPUT_DIR \
-	--gpu $GPU
\ No newline at end of file
+	--gpu $GPU
diff --git a/scala-package/examples/scripts/neuralstyle_end2end/run_train_end2end.sh b/scala-package/examples/scripts/neuralstyle_end2end/run_train_end2end.sh
index 3ede06a..fa08ff3 100644
--- a/scala-package/examples/scripts/neuralstyle_end2end/run_train_end2end.sh
+++ b/scala-package/examples/scripts/neuralstyle_end2end/run_train_end2end.sh
@@ -1,12 +1,30 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
 # more details please refer to
 # https://github.com/Ldpe2G/mxnet/blob/develop/example/neural-style/end_to_end/README.md
-TRAIN_DATA_PATH=$1 
-STYLE_IMG=$2 
+TRAIN_DATA_PATH=$1
+STYLE_IMG=$2
 VGG_MODEL_PATH=$3
 SAVE_MODEL_DIR=$4
 GPU=0
@@ -17,4 +35,4 @@ java -Xmx1024m -cp $CLASS_PATH \
 	--vgg--model-path  $VGG_MODEL_PATH \
 	--save--model-path $SAVE_MODEL_DIR \
 	--style-image $STYLE_IMG \
-	--gpu $GPU
\ No newline at end of file
+	--gpu $GPU
diff --git a/scala-package/examples/scripts/profiler/run_profiler_matmul.sh b/scala-package/examples/scripts/profiler/run_profiler_matmul.sh
index b54a422..54aafaf 100644
--- a/scala-package/examples/scripts/profiler/run_profiler_matmul.sh
+++ b/scala-package/examples/scripts/profiler/run_profiler_matmul.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
diff --git a/scala-package/examples/scripts/profiler/run_profiler_ndarray.sh b/scala-package/examples/scripts/profiler/run_profiler_ndarray.sh
index 4a849c5..711fe54 100644
--- a/scala-package/examples/scripts/profiler/run_profiler_ndarray.sh
+++ b/scala-package/examples/scripts/profiler/run_profiler_ndarray.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
diff --git a/scala-package/examples/scripts/rnn/run_test_charrnn.sh b/scala-package/examples/scripts/rnn/run_test_charrnn.sh
index 04eee36..a4ed913 100644
--- a/scala-package/examples/scripts/rnn/run_test_charrnn.sh
+++ b/scala-package/examples/scripts/rnn/run_test_charrnn.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 OS=$(uname)
 if [ "$OS" = "Darwin" ]; then
diff --git a/scala-package/examples/scripts/rnn/run_train_charrnn.sh b/scala-package/examples/scripts/rnn/run_train_charrnn.sh
index 07b7dda..2e9a3a2 100755
--- a/scala-package/examples/scripts/rnn/run_train_charrnn.sh
+++ b/scala-package/examples/scripts/rnn/run_train_charrnn.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
 OS=$(uname)
 if [ "$OS" = "Darwin" ]; then
diff --git a/scala-package/examples/scripts/run_cnntextclassification.sh b/scala-package/examples/scripts/run_cnntextclassification.sh
index a7cf7c0..7939b06 100644
--- a/scala-package/examples/scripts/run_cnntextclassification.sh
+++ b/scala-package/examples/scripts/run_cnntextclassification.sh
@@ -1,16 +1,34 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
 # which gpu card to use, -1 means cpu
 GPU=$1
 # the mr dataset path, you should put the pos and neg file in the same folder
-MR_DATASET_PATH=$2  
+MR_DATASET_PATH=$2
 # the trained word2vec file path, binary or text format
-W2V_FILE_PATH=$3  
+W2V_FILE_PATH=$3
 # whether the format of the word2vec file is binary,1 means binary, 0 means text
-W2V_FORMAT_BIN=$4 
+W2V_FORMAT_BIN=$4
 BATCH_SIZE=$5
 SAVE_MODEL_PATH=$6
 
diff --git a/scala-package/examples/scripts/run_gan_mnist.sh b/scala-package/examples/scripts/run_gan_mnist.sh
index 2d3c545..951241f 100644
--- a/scala-package/examples/scripts/run_gan_mnist.sh
+++ b/scala-package/examples/scripts/run_gan_mnist.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
@@ -10,7 +28,7 @@ GPU=$1
 # you can get the mnist data using the script core/scripts/get_mnist_data.sh
 MNIST_DATA_PATH=$2
 
-# the path to save the generated results  
+# the path to save the generated results
 OUTPUT_PATH=$3
 
 java -Xmx4G -cp $CLASS_PATH \
diff --git a/scala-package/examples/scripts/run_multitask.sh b/scala-package/examples/scripts/run_multitask.sh
index 1642cc8..9e6a489 100644
--- a/scala-package/examples/scripts/run_multitask.sh
+++ b/scala-package/examples/scripts/run_multitask.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
diff --git a/scala-package/examples/scripts/run_neuralstyle.sh b/scala-package/examples/scripts/run_neuralstyle.sh
index 5fbfc32..a9c2e5c 100644
--- a/scala-package/examples/scripts/run_neuralstyle.sh
+++ b/scala-package/examples/scripts/run_neuralstyle.sh
@@ -1,9 +1,27 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
-INPUT_IMG=$1 
-STYLE_IMG=$2 
+INPUT_IMG=$1
+STYLE_IMG=$2
 MODEL_PATH=$MXNET_ROOT/example/neural-style/model/vgg19.params
 OUTPUT_DIR=$MXNET_ROOT/example/neural-style/output
 
@@ -12,4 +30,4 @@ java -Xmx1024m -cp $CLASS_PATH \
 	--content-image $INPUT_IMG  \
 	--style-image  $STYLE_IMG \
 	--model-path  $MODEL_PATH \
-	--output-dir $OUTPUT_DIR 
+	--output-dir $OUTPUT_DIR
diff --git a/scala-package/examples/scripts/run_visualization.sh b/scala-package/examples/scripts/run_visualization.sh
index 6f686ad..a4b545e 100644
--- a/scala-package/examples/scripts/run_visualization.sh
+++ b/scala-package/examples/scripts/run_visualization.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../.."; pwd)
 CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-cpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
@@ -15,4 +33,4 @@ NET=$2
 java -Xmx1024m -cp $CLASS_PATH \
 	ml.dmlc.mxnetexamples.visualization.ExampleVis \
 	--out-dir $OUT_DIR  \
-	--net $NET 
+	--net $NET
diff --git a/scala-package/init-native/src/main/native/ml_dmlc_mxnet_init_native_c_api.cc b/scala-package/init-native/src/main/native/ml_dmlc_mxnet_init_native_c_api.cc
index d6daa00..114510c 100644
--- a/scala-package/init-native/src/main/native/ml_dmlc_mxnet_init_native_c_api.cc
+++ b/scala-package/init-native/src/main/native/ml_dmlc_mxnet_init_native_c_api.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ml_dmlc_mxnet_native_c_api.cc
  * \brief JNI function implementations
  */
diff --git a/scala-package/native/src/main/native/jni_helper_func.h b/scala-package/native/src/main/native/jni_helper_func.h
index d1abd93..009bbec 100644
--- a/scala-package/native/src/main/native/jni_helper_func.h
+++ b/scala-package/native/src/main/native/jni_helper_func.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file jni_helper_func.h
  * \brief Helper functions for operating JVM objects
  */
diff --git a/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc b/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc
index 07fd075..166f6b7 100644
--- a/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc
+++ b/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ml_dmlc_mxnet_native_c_api.cc
  * \brief JNI function implementations
  */
diff --git a/scala-package/spark/bin/run-mnist-example.sh b/scala-package/spark/bin/run-mnist-example.sh
index dc2f3ad..cae1938 100755
--- a/scala-package/spark/bin/run-mnist-example.sh
+++ b/scala-package/spark/bin/run-mnist-example.sh
@@ -1,4 +1,22 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 CURR_DIR=$(cd `dirname $0`; pwd)
 MODULE_DIR=$(cd $CURR_DIR/../; pwd)
 ROOT_DIR=$(cd $CURR_DIR/../../; pwd)
diff --git a/setup-utils/install-mxnet-amz-linux.sh b/setup-utils/install-mxnet-amz-linux.sh
index b8564a5..66788a9 100644
--- a/setup-utils/install-mxnet-amz-linux.sh
+++ b/setup-utils/install-mxnet-amz-linux.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ######################################################################
 # This script installs MXNet for Python along with all required dependencies on a Amazon Linux Machine.
 ######################################################################
diff --git a/setup-utils/install-mxnet-fedora-python.sh b/setup-utils/install-mxnet-fedora-python.sh
index 54b716b..8611666 100644
--- a/setup-utils/install-mxnet-fedora-python.sh
+++ b/setup-utils/install-mxnet-fedora-python.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ######################################################################
 # This script installs MXNet for Python along with all required dependencies on a Fedora Machine.
 # Tested on Fedora 21.0 + distro.
@@ -24,8 +42,8 @@ sudo yum install numpy
 echo "Installing Python setuptools..."
 sudo yum install -y python-setuptools python-pip
 
-echo "Adding MXNet path to your ~/.bashrc file"		
-echo "export PYTHONPATH=$MXNET_HOME/python:$PYTHONPATH" >> ~/.bashrc		
+echo "Adding MXNet path to your ~/.bashrc file"
+echo "export PYTHONPATH=$MXNET_HOME/python:$PYTHONPATH" >> ~/.bashrc
 source ~/.bashrc
 
 echo "Install Graphviz for plotting MXNet network graph..."
diff --git a/setup-utils/install-mxnet-osx-python.sh b/setup-utils/install-mxnet-osx-python.sh
index f9e4e77..8bfb7da 100755
--- a/setup-utils/install-mxnet-osx-python.sh
+++ b/setup-utils/install-mxnet-osx-python.sh
@@ -1,4 +1,22 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 #
 # This scripts installs the dependencies and compiles
 # MXNet source.
@@ -12,7 +30,7 @@ export MXNET_GITPATH="https://github.com/dmlc/mxnet.git"
 if [ -z ${MXNET_TAG} ];
 then
 	#
-	# TODO: Change this to latest tag 
+	# TODO: Change this to latest tag
 	#       to avoid updating this value for every release
 	#
 	export MXNET_TAG="v0.10.0"
@@ -138,17 +156,17 @@ install_brew() {
 	echo "END: Check/Install/Update Homebrew"
 	echo $LINE
 	echo " "
-	
+
 	echo "BEGIN: Install dependent brew packages for MXNet: ${BREW_PKGS}"
-	
+
 	chkret brew tap homebrew/science
-	
+
 	# install each individually to see progress for each
 	for pkg in ${BREW_PKGS}
 	do
 		chkret brew_pkg_install ${pkg}
 	done
-	
+
 	echo "END: Install dependent brew packages for MXNet: ${BREW_PKGS}"
 	echo $LINE
 	echo " "
@@ -223,7 +241,7 @@ install_port () {
 	#	sudo mkdir -p /usr/local/opt/openblas/lib
 	#	sudo ln -s /opt/local/lib/libopenblas.a /usr/local/opt/openblas/lib/libopenblas.a
 	#fi
-	
+
 	echo " "
 	echo "END: Install dependent port packages for MXNet: ${PORT_PKGS}"
 	echo $LINE
@@ -265,9 +283,9 @@ install_mac_pkg_manager() {
 		export PKG_MGR=""
 	else
 		export MAC_PKG_ASK=0
-	
+
 		while true; do
-			echo "NOTE: Using the already installed package manager: $PKG_MGR" 
+			echo "NOTE: Using the already installed package manager: $PKG_MGR"
 			read -p "Do you want to continue? (y/n): " response
 			echo " "
 			case $response in
@@ -368,7 +386,7 @@ compile_mxnet() {
 	echo "BEGIN: Compile MXNet"
 	cd ${MXNET_HOME}
 	chkret cp make/osx.mk ./config.mk.tmp
-	
+
 	touch ./config.mk
 	# rm any old setting of USE_BLAS, if present in config file
 	egrep -v "^USE_BLAS" ./config.mk.tmp                   >> ./config.mk
@@ -386,7 +404,7 @@ compile_mxnet() {
 		echo "ADD_LDFLAGS += -L/usr/local/lib/graphviz/"        >> ./config.mk
 	fi
 	echo " "
-	
+
 	echo "NOTE: The following compile-time configurations will be used."
 	echo "      If you want to change any of them, edit the following file"
 	echo "      in another terminal window and then press enter to continue."
@@ -452,7 +470,7 @@ END
 		echo " "
 		echo $LINE
 		echo " "
-		rm -f mxnet_test.log mxnet_test.expected 
+		rm -f mxnet_test.log mxnet_test.expected
 		exit 0
 	else
 		echo " "
diff --git a/setup-utils/install-mxnet-ubuntu-python.sh b/setup-utils/install-mxnet-ubuntu-python.sh
index ba06074..8aa0d02 100644
--- a/setup-utils/install-mxnet-ubuntu-python.sh
+++ b/setup-utils/install-mxnet-ubuntu-python.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ######################################################################
 # This script installs MXNet for Python along with all required dependencies on a Ubuntu Machine.
 # Tested on Ubuntu 14.0 + distro.
diff --git a/setup-utils/install-mxnet-ubuntu-r.sh b/setup-utils/install-mxnet-ubuntu-r.sh
index 8f4c07d..ca46d7b 100644
--- a/setup-utils/install-mxnet-ubuntu-r.sh
+++ b/setup-utils/install-mxnet-ubuntu-r.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ######################################################################
 # This script installs MXNet for R along with all required dependencies on a Ubuntu Machine.
 # Tested on Ubuntu 14.04+ distro.
@@ -32,7 +50,7 @@ sudo apt-get -y install libcurl4-openssl-dev libssl-dev
 # Needed for R XML
 sudo apt-get install libxml2-dev
 
-# Needed for R Cairo 
+# Needed for R Cairo
 sudo apt-get install libxt-dev
 
 sudo Rscript -e "install.packages('devtools', repo = 'https://cran.rstudio.com')"
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 214e6ed..93458d2 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file c_api.cc
  * \brief C API of mxnet
  */
diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h
index d8857f8..846b539 100644
--- a/src/c_api/c_api_common.h
+++ b/src/c_api/c_api_common.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file c_api_error.h
  * \brief Error handling for C API.
  */
diff --git a/src/c_api/c_api_error.cc b/src/c_api/c_api_error.cc
index 4ee6a35..4d93b90 100644
--- a/src/c_api/c_api_error.cc
+++ b/src/c_api/c_api_error.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file c_api_error.cc
  * \brief C error handling
  */
diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc
index 3ba3154..a4c48e4 100644
--- a/src/c_api/c_api_executor.cc
+++ b/src/c_api/c_api_executor.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file c_api_executor.cc
  * \brief C API of mxnet
  */
diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc
index a37e314..8960518 100644
--- a/src/c_api/c_api_ndarray.cc
+++ b/src/c_api/c_api_ndarray.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file c_api_symbolic.cc
  * \brief C API of mxnet
  */
diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc
index d3603e9..e2c29b8 100644
--- a/src/c_api/c_api_symbolic.cc
+++ b/src/c_api/c_api_symbolic.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file c_api_symbolic.cc
  * \brief C API of mxnet
  */
diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc
index 1dd784b..5ca0149 100644
--- a/src/c_api/c_predict_api.cc
+++ b/src/c_api/c_predict_api.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file c_predict_api.cc
  * \brief C predict API of mxnet
  */
diff --git a/src/common/cuda_utils.h b/src/common/cuda_utils.h
index d0defc3..3c4d1a8 100644
--- a/src/common/cuda_utils.h
+++ b/src/common/cuda_utils.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cuda_utils.h
  * \brief CUDA debugging utilities.
  */
diff --git a/src/common/lazy_alloc_array.h b/src/common/lazy_alloc_array.h
index 61b81e5..aa2cd4a 100644
--- a/src/common/lazy_alloc_array.h
+++ b/src/common/lazy_alloc_array.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file lazy_alloc_array.h
  * \brief An array that lazily allocate elements as
  *   First time the cell get visited.
diff --git a/src/common/mxrtc.cc b/src/common/mxrtc.cc
index e808e11..e72ac0b 100644
--- a/src/common/mxrtc.cc
+++ b/src/common/mxrtc.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file mxrtc.cc
  * \brief Wrapper for NVRTC
  * \author Junyuan Xie
diff --git a/src/common/object_pool.h b/src/common/object_pool.h
index 5e22d49..6e11ce5 100644
--- a/src/common/object_pool.h
+++ b/src/common/object_pool.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  */
 #ifndef MXNET_COMMON_OBJECT_POOL_H_
 #define MXNET_COMMON_OBJECT_POOL_H_
diff --git a/src/common/utils.h b/src/common/utils.h
index 5f50aab..85e3097 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file utils.h
  * \brief Basic utilility functions.
  */
diff --git a/src/engine/engine.cc b/src/engine/engine.cc
index ae72861..d619608 100644
--- a/src/engine/engine.cc
+++ b/src/engine/engine.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file engine.cc
  * \brief Implementation of engine.
  */
diff --git a/src/engine/engine_impl.h b/src/engine/engine_impl.h
index 9d3fc4c..cf72736 100644
--- a/src/engine/engine_impl.h
+++ b/src/engine/engine_impl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file engine_impl.h
  * \brief Internal implementation header of engine components.
  */
diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc
index 11ff7c8..85ec3ae 100644
--- a/src/engine/naive_engine.cc
+++ b/src/engine/naive_engine.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file naive_engine.cc
  * \brief Implementation of NaiveEngine
  */
diff --git a/src/engine/profiler.cc b/src/engine/profiler.cc
index 44099c3..99504f6 100644
--- a/src/engine/profiler.cc
+++ b/src/engine/profiler.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file profiler.cc
  * \brief implements profiler
  */
diff --git a/src/engine/profiler.h b/src/engine/profiler.h
index f28d691..b7f8e0e 100644
--- a/src/engine/profiler.h
+++ b/src/engine/profiler.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file profiler.h
  * \brief implements profiler
  */
diff --git a/src/engine/stream_manager.h b/src/engine/stream_manager.h
index 2d684bb..1a66277 100644
--- a/src/engine/stream_manager.h
+++ b/src/engine/stream_manager.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  */
 #ifndef MXNET_ENGINE_STREAM_MANAGER_H_
 #define MXNET_ENGINE_STREAM_MANAGER_H_
diff --git a/src/engine/thread_pool.h b/src/engine/thread_pool.h
index 060f473..b6fe3c2 100644
--- a/src/engine/thread_pool.h
+++ b/src/engine/thread_pool.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  */
 #ifndef MXNET_ENGINE_THREAD_POOL_H_
 #define MXNET_ENGINE_THREAD_POOL_H_
diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc
index 3632a46..5f348fb 100644
--- a/src/engine/threaded_engine.cc
+++ b/src/engine/threaded_engine.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file threaded_engine.cc
  * \brief implements base threaded engine.
  * \author Yutian Li
diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h
index 4612cc6..9b7b74d 100644
--- a/src/engine/threaded_engine.h
+++ b/src/engine/threaded_engine.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file threaded_engine.h
  * \brief Implements base class of threaded engine
  *    that tracks the dependency and pushes actions to execute.
diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc
index 97356ae..66cfc9d 100644
--- a/src/engine/threaded_engine_perdevice.cc
+++ b/src/engine/threaded_engine_perdevice.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file threaded_engine_perdevice.cc
  * \brief ThreadedEngine that uses fix amount of thread for each device.
  */
diff --git a/src/engine/threaded_engine_pooled.cc b/src/engine/threaded_engine_pooled.cc
index d806c38..6db7c4b 100644
--- a/src/engine/threaded_engine_pooled.cc
+++ b/src/engine/threaded_engine_pooled.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file threaded_engine_pooled.cc
  * \brief Pooled threaded engine
  * \author Yutian Li
diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc
index c4b3a18..13b0018 100644
--- a/src/executor/attach_op_execs_pass.cc
+++ b/src/executor/attach_op_execs_pass.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file attach_op_execs_pass.cc
  * \brief Operator executor to execute each operator.
  */
diff --git a/src/executor/attach_op_resource_pass.cc b/src/executor/attach_op_resource_pass.cc
index 73c8f48..ef26a35 100644
--- a/src/executor/attach_op_resource_pass.cc
+++ b/src/executor/attach_op_resource_pass.cc
@@ -1,6 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 
 /*!
- * Copyright (c) 2016 by Contributors
  * \file attach_op_resource_pass.cc
  * \brief Pass to attach resource to OpExecVector of the graph.
  */
diff --git a/src/executor/exec_pass.h b/src/executor/exec_pass.h
index 76b02de..0eda71d 100644
--- a/src/executor/exec_pass.h
+++ b/src/executor/exec_pass.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file exec_pass.h
  * \brief All the execution related pass and data structures.
  */
diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc
index a17f44a..6dc8cf3 100644
--- a/src/executor/graph_executor.cc
+++ b/src/executor/graph_executor.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file graph_executor.cc
  * \brief graph executor
  */
diff --git a/src/executor/graph_executor.h b/src/executor/graph_executor.h
index 0efb8ae..dc50bef 100644
--- a/src/executor/graph_executor.h
+++ b/src/executor/graph_executor.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file graph_executor.h
  * \brief Executor to execute the computation graph.
  */
diff --git a/src/executor/inplace_addto_detect_pass.cc b/src/executor/inplace_addto_detect_pass.cc
index 75a2608..26a91e3 100644
--- a/src/executor/inplace_addto_detect_pass.cc
+++ b/src/executor/inplace_addto_detect_pass.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file inplace_addto_detect_pass.cc
  * \brief Detect whether inplace addto operation is possible for certain op.
  */
diff --git a/src/initialize.cc b/src/initialize.cc
index c1e897f..092dacf 100644
--- a/src/initialize.cc
+++ b/src/initialize.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file initialize.cc
  * \brief initialize mxnet library
  */
diff --git a/src/io/image_aug_default.cc b/src/io/image_aug_default.cc
index f7a79d1..6db14bd 100644
--- a/src/io/image_aug_default.cc
+++ b/src/io/image_aug_default.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file image_aug_default.cc
  * \brief Default augmenter.
  */
diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h
index 6c0ad98..5b6c4e9 100644
--- a/src/io/image_augmenter.h
+++ b/src/io/image_augmenter.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file image_augmenter.h
  * \brief Interface of opencv based image augmenter
  */
diff --git a/src/io/image_det_aug_default.cc b/src/io/image_det_aug_default.cc
index cb7966d..7d15fac 100644
--- a/src/io/image_det_aug_default.cc
+++ b/src/io/image_det_aug_default.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file image_det_aug_default.cc
  * \brief Default augmenter.
  */
diff --git a/src/io/image_io.cc b/src/io/image_io.cc
index 64fd2dd..f9d7f33 100644
--- a/src/io/image_io.cc
+++ b/src/io/image_io.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file optimizer_op-inl.h
  * \brief Optimizer operators
  * \author Junyuan Xie
diff --git a/src/io/image_iter_common.h b/src/io/image_iter_common.h
index 59916c9..f2f72dc 100644
--- a/src/io/image_iter_common.h
+++ b/src/io/image_iter_common.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file image_iter_common.h
  * \brief common types used by image data iterators
  */
diff --git a/src/io/image_recordio.h b/src/io/image_recordio.h
index 10674ec..a931539 100644
--- a/src/io/image_recordio.h
+++ b/src/io/image_recordio.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file image_recordio.h
  * \brief image recordio struct
  */
@@ -24,7 +42,7 @@ struct ImageRecordIO {
     /*!
      * \brief label field that returns label of images
      *  when image list was not presented,
-     * 
+     *
      * NOTE: user do not need to repack recordio just to
      * change label field, just supply a list file that
      * maps image id to new labels
@@ -58,9 +76,9 @@ struct ImageRecordIO {
     return header.image_id[0];
   }
   /*!
-   * \brief load header from a record content 
+   * \brief load header from a record content
    * \param buf the head of record
-   * \param size the size of the entire record   
+   * \param size the size of the entire record
    */
   inline void Load(void *buf, size_t size) {
     CHECK(size >= sizeof(header));
diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h
index d82bd48..4bc2a6c 100644
--- a/src/io/inst_vector.h
+++ b/src/io/inst_vector.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file inst_vector.h
  * \brief holder of a sequence of DataInst in CPU
  *        that are not necessarily of same shape
diff --git a/src/io/io.cc b/src/io/io.cc
index 822f66f..e7c9284 100644
--- a/src/io/io.cc
+++ b/src/io/io.cc
@@ -1,4 +1,22 @@
-// Copyright (c) 2015 by Contributors
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 
 #include <mxnet/io.h>
 #include <dmlc/registry.h>
diff --git a/src/io/iter_batchloader.h b/src/io/iter_batchloader.h
index a51e245..c5ec106 100644
--- a/src/io/iter_batchloader.h
+++ b/src/io/iter_batchloader.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file iter_batchloader.h
  * \brief define a batch adapter to create tblob batch
  */
diff --git a/src/io/iter_csv.cc b/src/io/iter_csv.cc
index 9dcbcb8..a28b8d4 100644
--- a/src/io/iter_csv.cc
+++ b/src/io/iter_csv.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file iter_csv.cc
  * \brief define a CSV Reader to read in arrays
  */
diff --git a/src/io/iter_image_det_recordio.cc b/src/io/iter_image_det_recordio.cc
index 25e920d..4e80d5d 100644
--- a/src/io/iter_image_det_recordio.cc
+++ b/src/io/iter_image_det_recordio.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file iter_image_recordio-inl.hpp
  * \brief recordio data iterator
  */
diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc
index 49694d0..64f31a6 100644
--- a/src/io/iter_image_recordio.cc
+++ b/src/io/iter_image_recordio.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file iter_image_recordio-inl.hpp
  * \brief recordio data iterator
  */
diff --git a/src/io/iter_image_recordio_2.cc b/src/io/iter_image_recordio_2.cc
index 9d4ebf4..c4d1e86 100644
--- a/src/io/iter_image_recordio_2.cc
+++ b/src/io/iter_image_recordio_2.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file iter_image_recordio_2.cc
  * \brief new version of recordio data iterator
  */
diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc
index 09799f2..055af52 100644
--- a/src/io/iter_mnist.cc
+++ b/src/io/iter_mnist.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file iter_mnist.cc
  * \brief register mnist iterator
 */
diff --git a/src/io/iter_normalize.h b/src/io/iter_normalize.h
index 2cebaaa..409231b 100644
--- a/src/io/iter_normalize.h
+++ b/src/io/iter_normalize.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file iter_normalize.h
  * \brief Iterator that subtracts mean and do a few augmentations.
  */
diff --git a/src/io/iter_prefetcher.h b/src/io/iter_prefetcher.h
index 9050ef2..89960c7 100644
--- a/src/io/iter_prefetcher.h
+++ b/src/io/iter_prefetcher.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file iter_prefetcher.h
  * \brief define a prefetcher using threaditer to keep k batch fetched
  */
diff --git a/src/kvstore/comm.h b/src/kvstore/comm.h
index 07f2d24..ade9c95 100644
--- a/src/kvstore/comm.h
+++ b/src/kvstore/comm.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /**
- * Copyright (c) 2015 by Contributors
  */
 #ifndef MXNET_KVSTORE_COMM_H_
 #define MXNET_KVSTORE_COMM_H_
diff --git a/src/kvstore/kvstore.cc b/src/kvstore/kvstore.cc
index 78d4958..a288676 100644
--- a/src/kvstore/kvstore.cc
+++ b/src/kvstore/kvstore.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file kvstore.cc
  * \brief implement kv_store
  */
diff --git a/src/kvstore/kvstore_dist.h b/src/kvstore/kvstore_dist.h
index 5f5a0cc..52c7c13 100644
--- a/src/kvstore/kvstore_dist.h
+++ b/src/kvstore/kvstore_dist.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /**
- * Copyright (c) 2015 by Contributors
  * @file   kvstore_dist.h
  * @brief  distributed implementation based on ps-lite
  */
diff --git a/src/kvstore/kvstore_dist_server.h b/src/kvstore/kvstore_dist_server.h
index 02d4a38..4e9f887 100644
--- a/src/kvstore/kvstore_dist_server.h
+++ b/src/kvstore/kvstore_dist_server.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file mxnet_node.h
  * \brief implement mxnet nodes
  */
diff --git a/src/kvstore/kvstore_local.h b/src/kvstore/kvstore_local.h
index dc5f7b7..536a89b 100644
--- a/src/kvstore/kvstore_local.h
+++ b/src/kvstore/kvstore_local.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /**
- * Copyright (c) 2015 by Contributors
  * @file   kvstore_local.h
  * @brief  local implementation
  */
diff --git a/src/ndarray/autograd.cc b/src/ndarray/autograd.cc
index efb6bc9..33d0d5d 100644
--- a/src/ndarray/autograd.cc
+++ b/src/ndarray/autograd.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file autograd.cc
  * \brief Implementation of AutogradRuntime module.
  */
diff --git a/src/ndarray/autograd.h b/src/ndarray/autograd.h
index 4748640..52e461d 100644
--- a/src/ndarray/autograd.h
+++ b/src/ndarray/autograd.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file autograd.h
  * \brief AutogradRuntime can automatically compute gradients
  */
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 48499fa..8e71df7 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ndarray.cc
  * \brief ndarry module of mxnet
  */
diff --git a/src/ndarray/ndarray_function-inl.h b/src/ndarray/ndarray_function-inl.h
index 28524b7..2be55f5 100644
--- a/src/ndarray/ndarray_function-inl.h
+++ b/src/ndarray/ndarray_function-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ndarray_function-inl.h
  * \brief The real implementation of NDArray functions.
  */
diff --git a/src/ndarray/ndarray_function.cc b/src/ndarray/ndarray_function.cc
index a5ba266..e4af86d 100644
--- a/src/ndarray/ndarray_function.cc
+++ b/src/ndarray/ndarray_function.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ndarray_function_cpu.cc
  * \brief CPU Implementation of ndarray function.
  */
diff --git a/src/ndarray/ndarray_function.cu b/src/ndarray/ndarray_function.cu
index 13d36a2..30d5326 100644
--- a/src/ndarray/ndarray_function.cu
+++ b/src/ndarray/ndarray_function.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ndarray_function_cpu.cc
  * \brief GPU Implementation of ndarray function.
  */
diff --git a/src/ndarray/ndarray_function.h b/src/ndarray/ndarray_function.h
index 479f6f9..b1ed58d 100644
--- a/src/ndarray/ndarray_function.h
+++ b/src/ndarray/ndarray_function.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file ndarray_op.h
  * \brief the real execution functions of ndarray operations
  */
diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h
index ead79be..6e6222b 100644
--- a/src/operator/activation-inl.h
+++ b/src/operator/activation-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation-inl.h
  * \brief Activation operator
  * \author Bing Xu
diff --git a/src/operator/activation.cc b/src/operator/activation.cc
index 7ef452f..a33c11c 100644
--- a/src/operator/activation.cc
+++ b/src/operator/activation.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation.cc
  * \brief activation op
  * \author Bing Xu
diff --git a/src/operator/activation.cu b/src/operator/activation.cu
index 9a55354..0ac51ad 100644
--- a/src/operator/activation.cu
+++ b/src/operator/activation.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation.cu
  * \brief
  * \author Bing Xu
diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h
index 2d9c96b..b53acb4 100644
--- a/src/operator/batch_norm-inl.h
+++ b/src/operator/batch_norm-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file batch_norm-inl.h
  * \brief
  * \author Bing Xu, Chris Olivier
diff --git a/src/operator/batch_norm.cc b/src/operator/batch_norm.cc
index e56b306..86f47dd 100644
--- a/src/operator/batch_norm.cc
+++ b/src/operator/batch_norm.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file batch_norm.cc
  * \brief
  * \author Bing Xu, Chris Olivier
diff --git a/src/operator/batch_norm.cu b/src/operator/batch_norm.cu
index 9f7370f..64f7d93 100644
--- a/src/operator/batch_norm.cu
+++ b/src/operator/batch_norm.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file batch_norm.cu
  * \brief CUDA Batch Normalization code
  * \author Chris Olivier, Bing Xu
diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h
index 19215c5..092c482 100644
--- a/src/operator/batch_norm_v1-inl.h
+++ b/src/operator/batch_norm_v1-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file batch_norm-inl_v1.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/batch_norm_v1.cc b/src/operator/batch_norm_v1.cc
index 32c5034..1abced8 100644
--- a/src/operator/batch_norm_v1.cc
+++ b/src/operator/batch_norm_v1.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file batch_norm_v1.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/batch_norm_v1.cu b/src/operator/batch_norm_v1.cu
index 302dc47..8ed22a4 100644
--- a/src/operator/batch_norm_v1.cu
+++ b/src/operator/batch_norm_v1.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file batch_norm_v1.cu
  * \brief
  * \author Bing Xu
diff --git a/src/operator/bilinear_sampler-inl.h b/src/operator/bilinear_sampler-inl.h
index b4c9d99..2d68d78 100644
--- a/src/operator/bilinear_sampler-inl.h
+++ b/src/operator/bilinear_sampler-inl.h
@@ -1,219 +1,237 @@
-/*!
- * Copyright (c) 2017 by Contributors
- * \file bilinear_Sampler-inl.h
- * \brief
- * \author Xu Dong
-*/
-#ifndef MXNET_OPERATOR_BILINEAR_SAMPLER_INL_H_
-#define MXNET_OPERATOR_BILINEAR_SAMPLER_INL_H_
-
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <mxnet/operator.h>
-#include <vector>
-#include <map>
-#include <string>
-#include <utility>
-#include "./operator_common.h"
-
-namespace mxnet {
-namespace op {
-
-namespace bs {
-enum BilinearSamplerOpInputs {kData, kGrid};
-enum BilinearSamplerOpOutputs {kOut, kTmp};
-}
-
-struct BilinearSamplerParam : public dmlc::Parameter<BilinearSamplerParam> {
-  DMLC_DECLARE_PARAMETER(BilinearSamplerParam) {
-  }
-};
-
-template<typename xpu, typename DType>
-class BilinearSamplerOp : public Operator {
- public:
-  explicit BilinearSamplerOp(BilinearSamplerParam p) {
-    this->param_ = p;
-  }
-
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(req[bs::kOut], kWriteTo);
-    CHECK_EQ(in_data.size(), 2U);
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-
-    Tensor<xpu, 4, DType> data = in_data[bs::kData].get<xpu, 4, DType>(s);
-    Tensor<xpu, 4, DType> grid = in_data[bs::kGrid].get<xpu, 4, DType>(s);
-    Tensor<xpu, 4, DType> out = out_data[bs::kOut].get<xpu, 4, DType>(s);
-
-    BilinearSamplerForward(out, data, grid);
-  }
-
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(in_data.size(), 2U);
-    CHECK_NE(req[bs::kData], kWriteInplace);
-    CHECK_NE(req[bs::kGrid], kWriteInplace);
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-
-    Tensor<xpu, 4, DType> data = in_data[bs::kData].get<xpu, 4, DType>(s);
-    Tensor<xpu, 4, DType> grid = in_data[bs::kGrid].get<xpu, 4, DType>(s);
-    Tensor<xpu, 4, DType> gdata = in_grad[bs::kData].get<xpu, 4, DType>(s);
-    Tensor<xpu, 4, DType> ggrid = in_grad[bs::kGrid].get<xpu, 4, DType>(s);
-    Tensor<xpu, 4, DType> grad = out_grad[bs::kOut].get<xpu, 4, DType>(s);
-    if (req[bs::kData] != kNullOp && req[bs::kGrid] != kNullOp) {
-      if (req[bs::kData] == kWriteTo) {
-        gdata = scalar<DType>(0.0f);
-      }
-      if (req[bs::kGrid] == kWriteTo) {
-        ggrid = scalar<DType>(0.0f);
-      }
-      BilinearSamplerBackward(gdata, ggrid, grad, data, grid);
-    } else if (req[bs::kData] == kNullOp && req[bs::kGrid] == kNullOp) {
-      return;
-    } else {
-      LOG(FATAL) << "Have not implemented the data req combinations! gdata_req="
-                 << req[bs::kData] << " ggrid_req=" << req[bs::kGrid];
-    }
-  }
-
- private:
-  BilinearSamplerParam param_;
-};  // class BilinearSamplerOp
-
-template<typename xpu>
-Operator* CreateOp(BilinearSamplerParam param, int dtype);
-
-#if DMLC_USE_CXX11
-class BilinearSamplerProp : public OperatorProperty {
- public:
-  int NumVisibleOutputs() const override {
-    return 1;
-  }
-
-  int NumOutputs() const override {
-    return 2;
-  }
-
-  std::vector<std::string> ListArguments() const override {
-    return {"data", "grid"};
-  }
-
-  std::vector<std::string> ListOutputs() const override {
-    return {"output", "tmp"};
-  }
-
-  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
-    param_.Init(kwargs);
-  }
-
-  std::map<std::string, std::string> GetParams() const override {
-    return param_.__DICT__();
-  }
-
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
-    using namespace mshadow;
-    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, grid]";
-    const TShape &dshape = (*in_shape)[bs::kData];
-    const TShape &lshape = (*in_shape)[bs::kGrid];
-    if (dshape.ndim() == 0) return false;
-    CHECK_EQ(dshape.ndim(), 4U) \
-        << "input data should be 4D in batch-num_filter-y-x";
-    if (lshape.ndim() ==  0) return false;
-    CHECK_EQ(lshape.ndim(), 4U) \
-      << "Sampler grid should be 4D in batch-2-y-x";
-    CHECK_EQ(dshape[0], lshape[0]);
-    CHECK_EQ(lshape[1], 2U) << "incorrect grid shape[1], should be 2";
-    // target height
-    CHECK_GT(lshape[2], 0U) \
-            << "incorrect grid_shape: " << lshape[2];
-    // target width
-    CHECK_GT(lshape[3], 0U) \
-        << "incorrect grid_shape: " << lshape[3];
-    out_shape->clear();
-    // output_shape : (data.shape[0], data.shape[1], grid.shape[2], grid.shape[3])
-    out_shape->push_back(dshape);
-    (*out_shape)[bs::kOut][2] = lshape[2];
-    (*out_shape)[bs::kOut][3] = lshape[3];
-    out_shape->push_back(Shape4(lshape[0], lshape[2], lshape[3], 2));
-    return true;
-  }
-
-  bool InferType(std::vector<int> *in_type,
-                   std::vector<int> *out_type,
-                   std::vector<int> *aux_type) const override {
-      int dtype = -1;
-      for (size_t i = 0; i < in_type->size(); ++i) {
-        if (dtype == -1) {
-          dtype = in_type->at(i);
-        } else {
-          CHECK(in_type->at(i) == dtype ||
-                in_type->at(i) == -1) <<
-                "Non-uniform data type in BilinearSampler";
-        }
-      }
-      if (dtype == -1) {
-        LOG(FATAL) << "Not enough information to infer type in BilinearSampler.";
-        return false;
-      }
-      size_t nin = this->ListArguments().size();
-      in_type->clear();
-      for (size_t i = 0; i < nin; ++i) in_type->push_back(dtype);
-      size_t naux = this->ListAuxiliaryStates().size();
-      aux_type->clear();
-      for (size_t i = 0; i < naux; ++i) aux_type->push_back(dtype);
-      size_t nout = this->ListOutputs().size();
-      out_type->clear();
-      for (size_t i = 0; i < nout; ++i) out_type->push_back(dtype);
-      return true;
-    }
-
-  OperatorProperty* Copy() const override {
-    auto ptr = new BilinearSamplerProp();
-    ptr->param_ = param_;
-    return ptr;
-  }
-
-  std::string TypeString() const override {
-    return "BilinearSampler";
-  }
-
-  std::vector<int> DeclareBackwardDependency(
-    const std::vector<int> &out_grad,
-    const std::vector<int> &in_data,
-    const std::vector<int> &out_data) const override {
-    return {out_grad[bs::kOut],
-            in_data[bs::kData],
-            out_data[bs::kTmp],
-            in_data[bs::kGrid]};
-  }
-
-  Operator* CreateOperator(Context ctx) const override {
-    LOG(FATAL) << "Not Implemented.";
-    return NULL;
-  }
-
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
-                             std::vector<int> *in_type) const override;
-
- private:
-  BilinearSamplerParam param_;
-};  // class BilinearSamplerProp
-#endif  // DMLC_USE_CXX11
-}  // namespace op
-}  // namespace mxnet
-#endif  // MXNET_OPERATOR_BILINEAR_SAMPLER_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file bilinear_Sampler-inl.h
+ * \brief
+ * \author Xu Dong
+*/
+#ifndef MXNET_OPERATOR_BILINEAR_SAMPLER_INL_H_
+#define MXNET_OPERATOR_BILINEAR_SAMPLER_INL_H_
+
+#include <dmlc/logging.h>
+#include <dmlc/parameter.h>
+#include <mxnet/operator.h>
+#include <vector>
+#include <map>
+#include <string>
+#include <utility>
+#include "./operator_common.h"
+
+namespace mxnet {
+namespace op {
+
+namespace bs {
+enum BilinearSamplerOpInputs {kData, kGrid};
+enum BilinearSamplerOpOutputs {kOut, kTmp};
+}
+
+struct BilinearSamplerParam : public dmlc::Parameter<BilinearSamplerParam> {
+  DMLC_DECLARE_PARAMETER(BilinearSamplerParam) {
+  }
+};
+
+template<typename xpu, typename DType>
+class BilinearSamplerOp : public Operator {
+ public:
+  explicit BilinearSamplerOp(BilinearSamplerParam p) {
+    this->param_ = p;
+  }
+
+  virtual void Forward(const OpContext &ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &out_data,
+                       const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(req[bs::kOut], kWriteTo);
+    CHECK_EQ(in_data.size(), 2U);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+
+    Tensor<xpu, 4, DType> data = in_data[bs::kData].get<xpu, 4, DType>(s);
+    Tensor<xpu, 4, DType> grid = in_data[bs::kGrid].get<xpu, 4, DType>(s);
+    Tensor<xpu, 4, DType> out = out_data[bs::kOut].get<xpu, 4, DType>(s);
+
+    BilinearSamplerForward(out, data, grid);
+  }
+
+  virtual void Backward(const OpContext &ctx,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &in_grad,
+                        const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(in_data.size(), 2U);
+    CHECK_NE(req[bs::kData], kWriteInplace);
+    CHECK_NE(req[bs::kGrid], kWriteInplace);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+
+    Tensor<xpu, 4, DType> data = in_data[bs::kData].get<xpu, 4, DType>(s);
+    Tensor<xpu, 4, DType> grid = in_data[bs::kGrid].get<xpu, 4, DType>(s);
+    Tensor<xpu, 4, DType> gdata = in_grad[bs::kData].get<xpu, 4, DType>(s);
+    Tensor<xpu, 4, DType> ggrid = in_grad[bs::kGrid].get<xpu, 4, DType>(s);
+    Tensor<xpu, 4, DType> grad = out_grad[bs::kOut].get<xpu, 4, DType>(s);
+    if (req[bs::kData] != kNullOp && req[bs::kGrid] != kNullOp) {
+      if (req[bs::kData] == kWriteTo) {
+        gdata = scalar<DType>(0.0f);
+      }
+      if (req[bs::kGrid] == kWriteTo) {
+        ggrid = scalar<DType>(0.0f);
+      }
+      BilinearSamplerBackward(gdata, ggrid, grad, data, grid);
+    } else if (req[bs::kData] == kNullOp && req[bs::kGrid] == kNullOp) {
+      return;
+    } else {
+      LOG(FATAL) << "Have not implemented the data req combinations! gdata_req="
+                 << req[bs::kData] << " ggrid_req=" << req[bs::kGrid];
+    }
+  }
+
+ private:
+  BilinearSamplerParam param_;
+};  // class BilinearSamplerOp
+
+template<typename xpu>
+Operator* CreateOp(BilinearSamplerParam param, int dtype);
+
+#if DMLC_USE_CXX11
+class BilinearSamplerProp : public OperatorProperty {
+ public:
+  int NumVisibleOutputs() const override {
+    return 1;
+  }
+
+  int NumOutputs() const override {
+    return 2;
+  }
+
+  std::vector<std::string> ListArguments() const override {
+    return {"data", "grid"};
+  }
+
+  std::vector<std::string> ListOutputs() const override {
+    return {"output", "tmp"};
+  }
+
+  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
+    param_.Init(kwargs);
+  }
+
+  std::map<std::string, std::string> GetParams() const override {
+    return param_.__DICT__();
+  }
+
+  bool InferShape(std::vector<TShape> *in_shape,
+                  std::vector<TShape> *out_shape,
+                  std::vector<TShape> *aux_shape) const override {
+    using namespace mshadow;
+    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, grid]";
+    const TShape &dshape = (*in_shape)[bs::kData];
+    const TShape &lshape = (*in_shape)[bs::kGrid];
+    if (dshape.ndim() == 0) return false;
+    CHECK_EQ(dshape.ndim(), 4U) \
+        << "input data should be 4D in batch-num_filter-y-x";
+    if (lshape.ndim() ==  0) return false;
+    CHECK_EQ(lshape.ndim(), 4U) \
+      << "Sampler grid should be 4D in batch-2-y-x";
+    CHECK_EQ(dshape[0], lshape[0]);
+    CHECK_EQ(lshape[1], 2U) << "incorrect grid shape[1], should be 2";
+    // target height
+    CHECK_GT(lshape[2], 0U) \
+            << "incorrect grid_shape: " << lshape[2];
+    // target width
+    CHECK_GT(lshape[3], 0U) \
+        << "incorrect grid_shape: " << lshape[3];
+    out_shape->clear();
+    // output_shape : (data.shape[0], data.shape[1], grid.shape[2], grid.shape[3])
+    out_shape->push_back(dshape);
+    (*out_shape)[bs::kOut][2] = lshape[2];
+    (*out_shape)[bs::kOut][3] = lshape[3];
+    out_shape->push_back(Shape4(lshape[0], lshape[2], lshape[3], 2));
+    return true;
+  }
+
+  bool InferType(std::vector<int> *in_type,
+                   std::vector<int> *out_type,
+                   std::vector<int> *aux_type) const override {
+      int dtype = -1;
+      for (size_t i = 0; i < in_type->size(); ++i) {
+        if (dtype == -1) {
+          dtype = in_type->at(i);
+        } else {
+          CHECK(in_type->at(i) == dtype ||
+                in_type->at(i) == -1) <<
+                "Non-uniform data type in BilinearSampler";
+        }
+      }
+      if (dtype == -1) {
+        LOG(FATAL) << "Not enough information to infer type in BilinearSampler.";
+        return false;
+      }
+      size_t nin = this->ListArguments().size();
+      in_type->clear();
+      for (size_t i = 0; i < nin; ++i) in_type->push_back(dtype);
+      size_t naux = this->ListAuxiliaryStates().size();
+      aux_type->clear();
+      for (size_t i = 0; i < naux; ++i) aux_type->push_back(dtype);
+      size_t nout = this->ListOutputs().size();
+      out_type->clear();
+      for (size_t i = 0; i < nout; ++i) out_type->push_back(dtype);
+      return true;
+    }
+
+  OperatorProperty* Copy() const override {
+    auto ptr = new BilinearSamplerProp();
+    ptr->param_ = param_;
+    return ptr;
+  }
+
+  std::string TypeString() const override {
+    return "BilinearSampler";
+  }
+
+  std::vector<int> DeclareBackwardDependency(
+    const std::vector<int> &out_grad,
+    const std::vector<int> &in_data,
+    const std::vector<int> &out_data) const override {
+    return {out_grad[bs::kOut],
+            in_data[bs::kData],
+            out_data[bs::kTmp],
+            in_data[bs::kGrid]};
+  }
+
+  Operator* CreateOperator(Context ctx) const override {
+    LOG(FATAL) << "Not Implemented.";
+    return NULL;
+  }
+
+  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+                             std::vector<int> *in_type) const override;
+
+ private:
+  BilinearSamplerParam param_;
+};  // class BilinearSamplerProp
+#endif  // DMLC_USE_CXX11
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_BILINEAR_SAMPLER_INL_H_
diff --git a/src/operator/bilinear_sampler.cc b/src/operator/bilinear_sampler.cc
index ca83a43..d03f679 100644
--- a/src/operator/bilinear_sampler.cc
+++ b/src/operator/bilinear_sampler.cc
@@ -1,228 +1,246 @@
-/*!
- * Copyright (c) 2017 by Contributors
- * \file bilinear_sampler.cc
- * \brief
- * \author Xu Dong
-*/
-
-#include "./bilinear_sampler-inl.h"
-
-namespace mshadow {
-template<typename DType>
-bool between(DType value, int lowerBound, int upperBound) {
-  return (value >= lowerBound && value <= upperBound);
-}
-template<typename DType>
-inline void BilinearSamplerForward(const Tensor<cpu, 4, DType> &output,
-                                    const Tensor<cpu, 4, DType> &input,
-                                    const Tensor<cpu, 4, DType> &grid_src) {
-  DType *out = output.dptr_;
-  const DType *data = input.dptr_;
-  const DType *grid = grid_src.dptr_;
-  int o_n = output.size(0), o_c = output.size(1), o_h = output.size(2), o_w = output.size(3);
-  int i_c = input.size(1), i_h = input.size(2), i_w = input.size(3);
-  for (index_t n = 0; n < static_cast<index_t>(o_n); ++n) {
-    for (index_t c = 0; c < static_cast<index_t>(o_c); ++c) {
-      for (index_t h = 0; h < static_cast<index_t>(o_h); ++h) {
-        for (index_t w = 0; w < static_cast<index_t>(o_w); ++w) {
-          index_t out_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
-          index_t grid_index = n * o_h * o_w * 2 + h * o_w + w;
-          DType y_real = (*(grid + grid_index + o_h * o_w) + 1) * (i_h - 1) / 2;
-          DType x_real = (*(grid + grid_index) + 1) * (i_w - 1) / 2;
-          int top_left_y = static_cast<int>(floor(y_real));
-          int top_left_x = static_cast<int>(floor(x_real));
-          DType top_left_y_w = 1.0 - (y_real - top_left_y);
-          DType top_left_x_w = 1.0 - (x_real - top_left_x);
-          int data_index = n * i_c * i_h * i_w + c * i_h * i_w +
-            top_left_y * i_w + top_left_x;
-          DType top_left_v = 0;
-          DType top_right_v = 0;
-          DType bottom_left_v = 0;
-          DType bottom_right_v = 0;
-          if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1))
-            top_left_v = *(data + data_index);
-          if (between(top_left_x + 1, 0, i_w-1) && between(top_left_y, 0, i_h-1))
-            top_right_v = *(data + data_index + 1);
-          if (between(top_left_x, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
-            bottom_left_v = *(data + data_index + i_w);
-          if (between(top_left_x+1, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
-            bottom_right_v = *(data + data_index + i_w + 1);
-          *(out+out_index) = top_left_v * top_left_y_w * top_left_x_w +
-                              top_right_v * top_left_y_w * (1.0 - top_left_x_w) +
-                              bottom_left_v * (1.0 - top_left_y_w) * top_left_x_w +
-                              bottom_right_v * (1.0 - top_left_y_w) * (1.0 - top_left_x_w);
-        }
-      }
-    }
-  }
-}
-
-template<typename DType>
-inline void BilinearSamplerBackward(const Tensor<cpu, 4, DType> &gdata,
-                                     const Tensor<cpu, 4, DType> &ggrid,
-                                     const Tensor<cpu, 4, DType> &output_grad,
-                                     const Tensor<cpu, 4, DType> &input_data,
-                                     const Tensor<cpu, 4, DType> &grid) {
-  DType *g_input = gdata.dptr_;
-  DType *grad_grid = ggrid.dptr_;
-  const DType *grid_src = grid.dptr_;
-  const DType *grad = output_grad.dptr_;
-  const DType *data = input_data.dptr_;
-  int o_n = output_grad.size(0), o_c = output_grad.size(1),
-      o_h = output_grad.size(2), o_w = output_grad.size(3);
-  int i_c = input_data.size(1), i_h = input_data.size(2), i_w = input_data.size(3);
-  for (index_t n = 0; n < static_cast<index_t>(o_n); ++n) {
-     for (index_t h = 0; h < static_cast<index_t>(o_h); ++h) {
-        for (index_t w = 0; w < static_cast<index_t>(o_w); ++w) {
-          DType top_left_y_gw = 0.0;
-          DType top_left_x_gw = 0.0;
-          index_t grid_src_index = n * o_h * o_w * 2 + h * o_w + w;
-          DType y_real = (*(grid_src + grid_src_index + o_h * o_w) + 1) * (i_h - 1) / 2;
-          DType x_real = (*(grid_src + grid_src_index) + 1) * (i_w - 1) / 2;
-          int top_left_y = static_cast<int>(floor(y_real));
-          int top_left_x = static_cast<int>(floor(x_real));
-          DType top_left_y_w = 1.0 - (y_real - top_left_y);
-          DType top_left_x_w = 1.0 - (x_real - top_left_x);
-          for (index_t c = 0; c < static_cast<index_t>(o_c); ++c) {
-            index_t grad_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
-            int data_index = n * i_c * i_h * i_w + c * i_h * i_w + top_left_y * i_w
-                                  + top_left_x;
-            // calc 4 vertex value in input data
-            DType top_left_v = 0;
-            DType top_right_v = 0;
-            DType bottom_left_v = 0;
-            DType bottom_right_v = 0;
-            // calc input grad
-            if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
-              *(g_input + data_index) += *(grad + grad_index) * top_left_y_w * top_left_x_w;
-              top_left_v = *(data + data_index);
-            }
-            if (between(top_left_x+1, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
-              *(g_input + data_index + 1) += *(grad + grad_index) * top_left_y_w
-                                              * (1.0 - top_left_x_w);
-              top_right_v = *(data + data_index + 1);
-            }
-            if (between(top_left_x, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
-              *(g_input + data_index+ i_w) += *(grad + grad_index) * (1.0 - top_left_y_w)
-                                              * top_left_x_w;
-              bottom_left_v = *(data + data_index + i_w);
-            }
-            if (between(top_left_x+1, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
-              *(g_input + data_index+ i_w + 1) += *(grad + grad_index) * (1.0 - top_left_y_w)
-                                                  * (1.0 - top_left_x_w);
-              bottom_right_v = *(data + data_index + i_w + 1);
-            }
-            // calc weight grad of top_left_w, then multiple -1 is the grad of grid_src
-            top_left_y_gw -= *(grad + grad_index) * (top_right_v - bottom_right_v +
-                              (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
-                              * top_left_x_w);
-            top_left_x_gw -= *(grad + grad_index) * (bottom_left_v - bottom_right_v +
-                              (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
-                              * top_left_y_w);
-          }
-          // calc grad of grid
-          *(grad_grid + grid_src_index + o_h * o_w) += top_left_y_gw * (i_h - 1) / 2;
-          *(grad_grid + grid_src_index) += top_left_x_gw * (i_w - 1) / 2;
-        }
-      }
-    }
-  }
-}  // namespace mshadow
-
-namespace mxnet {
-namespace op {
-template<>
-Operator* CreateOp<cpu>(BilinearSamplerParam param, int dtype) {
-  Operator *op = NULL;
-  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
-    op = new BilinearSamplerOp<cpu, DType>(param);
-  })
-  return op;
-}
-
-Operator *BilinearSamplerProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
-                                     std::vector<int> *in_type) const {
-  DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
-}
-
-DMLC_REGISTER_PARAMETER(BilinearSamplerParam);
-
-MXNET_REGISTER_OP_PROPERTY(BilinearSampler, BilinearSamplerProp)
-.add_argument("data", "NDArray-or-Symbol", "Input data to the BilinearsamplerOp.")
-.add_argument("grid", "NDArray-or-Symbol", "Input grid to the BilinearsamplerOp."
-                                "grid has two channels: x_src, y_src")
-.add_arguments(BilinearSamplerParam::__FIELDS__())
-.describe(R"code(Applies bilinear sampling to input feature map.
-
-Bilinear Sampling is the key of  [NIPS2015] \"Spatial Transformer Networks\". The usage of the operator is very similar to remap function in OpenCV, 
-except that the operator has the backward pass.
-
-Given :math:`data` and :math:`grid`, then the output is computed by 
-
-.. math::
-  x_{src} = grid[batch, 0, y_{dst}, x_{dst}] \\
-  y_{src} = grid[batch, 1, y_{dst}, x_{dst}] \\
-  output[batch, channel, y_{dst}, x_{dst}] = G(data[batch, channel, y_{src}, x_{src})
-
-:math:`x_{dst}`, :math:`y_{dst}` enumerate all spatial locations in :math:`output`, and :math:`G()` denotes the bilinear interpolation kernel.
-The out-boundary points will be padded with zeros.The shape of the output will be (data.shape[0], data.shape[1], grid.shape[2], grid.shape[3]). 
-
-The operator assumes that :math:`data` has 'NCHW' layout and :math:`grid` has been normalized to [-1, 1]. 
-
-BilinearSampler often cooperates with GridGenerator which generates sampling grids for BilinearSampler. 
-GridGenerator supports two kinds of transformation: ``affine`` and ``warp``.
-If users want to design a CustomOp to manipulate :math:`grid`, please firstly refer to the code of GridGenerator.
-
-Example 1::
-
-  ## Zoom out data two times
-  data = array([[[[1, 4, 3, 6],
-                  [1, 8, 8, 9],
-                  [0, 4, 1, 5],
-                  [1, 0, 1, 3]]]])
-  
-  affine_matrix = array([[2, 0, 0],
-                         [0, 2, 0]])
-
-  affine_matrix = reshape(affine_matrix, shape=(1, 6))
-
-  grid = GridGenerator(data=affine_matrix, transform_type='affine', target_shape=(4, 4))
-
-  out = BilinearSampler(data, grid)
-
-  out
-  [[[[ 0,   0,     0,   0],
-     [ 0,   3.5,   6.5, 0],
-     [ 0,   1.25,  2.5, 0],
-     [ 0,   0,     0,   0]]]
-
-
-Example 2::
-
-  ## shift data horizontally by -1 pixel
-
-  data = array([[[[1, 4, 3, 6],
-                  [1, 8, 8, 9],
-                  [0, 4, 1, 5],
-                  [1, 0, 1, 3]]]])
-
-  warp_maxtrix = array([[[[1, 1, 1, 1],
-                          [1, 1, 1, 1],
-                          [1, 1, 1, 1],
-                          [1, 1, 1, 1]],
-                         [[0, 0, 0, 0],
-                          [0, 0, 0, 0],
-                          [0, 0, 0, 0],
-                          [0, 0, 0, 0]]]])
-  
-  grid = GridGenerator(data=warp_matrix, transform_type='warp')
-  out = BilinearSampler(data, grid)
-
-  out
-  [[[[ 4,  3,  6,  0],
-     [ 8,  8,  9,  0],
-     [ 4,  1,  5,  0],
-     [ 0,  1,  3,  0]]]
-)code" ADD_FILELINE);
-}  // namespace op
-}  // namespace mxnet
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file bilinear_sampler.cc
+ * \brief
+ * \author Xu Dong
+*/
+
+#include "./bilinear_sampler-inl.h"
+
+namespace mshadow {
+template<typename DType>
+bool between(DType value, int lowerBound, int upperBound) {
+  return (value >= lowerBound && value <= upperBound);
+}
+template<typename DType>
+inline void BilinearSamplerForward(const Tensor<cpu, 4, DType> &output,
+                                    const Tensor<cpu, 4, DType> &input,
+                                    const Tensor<cpu, 4, DType> &grid_src) {
+  DType *out = output.dptr_;
+  const DType *data = input.dptr_;
+  const DType *grid = grid_src.dptr_;
+  int o_n = output.size(0), o_c = output.size(1), o_h = output.size(2), o_w = output.size(3);
+  int i_c = input.size(1), i_h = input.size(2), i_w = input.size(3);
+  for (index_t n = 0; n < static_cast<index_t>(o_n); ++n) {
+    for (index_t c = 0; c < static_cast<index_t>(o_c); ++c) {
+      for (index_t h = 0; h < static_cast<index_t>(o_h); ++h) {
+        for (index_t w = 0; w < static_cast<index_t>(o_w); ++w) {
+          index_t out_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
+          index_t grid_index = n * o_h * o_w * 2 + h * o_w + w;
+          DType y_real = (*(grid + grid_index + o_h * o_w) + 1) * (i_h - 1) / 2;
+          DType x_real = (*(grid + grid_index) + 1) * (i_w - 1) / 2;
+          int top_left_y = static_cast<int>(floor(y_real));
+          int top_left_x = static_cast<int>(floor(x_real));
+          DType top_left_y_w = 1.0 - (y_real - top_left_y);
+          DType top_left_x_w = 1.0 - (x_real - top_left_x);
+          int data_index = n * i_c * i_h * i_w + c * i_h * i_w +
+            top_left_y * i_w + top_left_x;
+          DType top_left_v = 0;
+          DType top_right_v = 0;
+          DType bottom_left_v = 0;
+          DType bottom_right_v = 0;
+          if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1))
+            top_left_v = *(data + data_index);
+          if (between(top_left_x + 1, 0, i_w-1) && between(top_left_y, 0, i_h-1))
+            top_right_v = *(data + data_index + 1);
+          if (between(top_left_x, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
+            bottom_left_v = *(data + data_index + i_w);
+          if (between(top_left_x+1, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
+            bottom_right_v = *(data + data_index + i_w + 1);
+          *(out+out_index) = top_left_v * top_left_y_w * top_left_x_w +
+                              top_right_v * top_left_y_w * (1.0 - top_left_x_w) +
+                              bottom_left_v * (1.0 - top_left_y_w) * top_left_x_w +
+                              bottom_right_v * (1.0 - top_left_y_w) * (1.0 - top_left_x_w);
+        }
+      }
+    }
+  }
+}
+
+template<typename DType>
+inline void BilinearSamplerBackward(const Tensor<cpu, 4, DType> &gdata,
+                                     const Tensor<cpu, 4, DType> &ggrid,
+                                     const Tensor<cpu, 4, DType> &output_grad,
+                                     const Tensor<cpu, 4, DType> &input_data,
+                                     const Tensor<cpu, 4, DType> &grid) {
+  DType *g_input = gdata.dptr_;
+  DType *grad_grid = ggrid.dptr_;
+  const DType *grid_src = grid.dptr_;
+  const DType *grad = output_grad.dptr_;
+  const DType *data = input_data.dptr_;
+  int o_n = output_grad.size(0), o_c = output_grad.size(1),
+      o_h = output_grad.size(2), o_w = output_grad.size(3);
+  int i_c = input_data.size(1), i_h = input_data.size(2), i_w = input_data.size(3);
+  for (index_t n = 0; n < static_cast<index_t>(o_n); ++n) {
+     for (index_t h = 0; h < static_cast<index_t>(o_h); ++h) {
+        for (index_t w = 0; w < static_cast<index_t>(o_w); ++w) {
+          DType top_left_y_gw = 0.0;
+          DType top_left_x_gw = 0.0;
+          index_t grid_src_index = n * o_h * o_w * 2 + h * o_w + w;
+          DType y_real = (*(grid_src + grid_src_index + o_h * o_w) + 1) * (i_h - 1) / 2;
+          DType x_real = (*(grid_src + grid_src_index) + 1) * (i_w - 1) / 2;
+          int top_left_y = static_cast<int>(floor(y_real));
+          int top_left_x = static_cast<int>(floor(x_real));
+          DType top_left_y_w = 1.0 - (y_real - top_left_y);
+          DType top_left_x_w = 1.0 - (x_real - top_left_x);
+          for (index_t c = 0; c < static_cast<index_t>(o_c); ++c) {
+            index_t grad_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
+            int data_index = n * i_c * i_h * i_w + c * i_h * i_w + top_left_y * i_w
+                                  + top_left_x;
+            // calc 4 vertex value in input data
+            DType top_left_v = 0;
+            DType top_right_v = 0;
+            DType bottom_left_v = 0;
+            DType bottom_right_v = 0;
+            // calc input grad
+            if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
+              *(g_input + data_index) += *(grad + grad_index) * top_left_y_w * top_left_x_w;
+              top_left_v = *(data + data_index);
+            }
+            if (between(top_left_x+1, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
+              *(g_input + data_index + 1) += *(grad + grad_index) * top_left_y_w
+                                              * (1.0 - top_left_x_w);
+              top_right_v = *(data + data_index + 1);
+            }
+            if (between(top_left_x, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
+              *(g_input + data_index+ i_w) += *(grad + grad_index) * (1.0 - top_left_y_w)
+                                              * top_left_x_w;
+              bottom_left_v = *(data + data_index + i_w);
+            }
+            if (between(top_left_x+1, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
+              *(g_input + data_index+ i_w + 1) += *(grad + grad_index) * (1.0 - top_left_y_w)
+                                                  * (1.0 - top_left_x_w);
+              bottom_right_v = *(data + data_index + i_w + 1);
+            }
+            // calc weight grad of top_left_w, then multiple -1 is the grad of grid_src
+            top_left_y_gw -= *(grad + grad_index) * (top_right_v - bottom_right_v +
+                              (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
+                              * top_left_x_w);
+            top_left_x_gw -= *(grad + grad_index) * (bottom_left_v - bottom_right_v +
+                              (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
+                              * top_left_y_w);
+          }
+          // calc grad of grid
+          *(grad_grid + grid_src_index + o_h * o_w) += top_left_y_gw * (i_h - 1) / 2;
+          *(grad_grid + grid_src_index) += top_left_x_gw * (i_w - 1) / 2;
+        }
+      }
+    }
+  }
+}  // namespace mshadow
+
+namespace mxnet {
+namespace op {
+template<>
+Operator* CreateOp<cpu>(BilinearSamplerParam param, int dtype) {
+  Operator *op = NULL;
+  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+    op = new BilinearSamplerOp<cpu, DType>(param);
+  })
+  return op;
+}
+
+Operator *BilinearSamplerProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+                                     std::vector<int> *in_type) const {
+  DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
+}
+
+DMLC_REGISTER_PARAMETER(BilinearSamplerParam);
+
+MXNET_REGISTER_OP_PROPERTY(BilinearSampler, BilinearSamplerProp)
+.add_argument("data", "NDArray-or-Symbol", "Input data to the BilinearsamplerOp.")
+.add_argument("grid", "NDArray-or-Symbol", "Input grid to the BilinearsamplerOp."
+                                "grid has two channels: x_src, y_src")
+.add_arguments(BilinearSamplerParam::__FIELDS__())
+.describe(R"code(Applies bilinear sampling to input feature map.
+
+Bilinear Sampling is the key of  [NIPS2015] \"Spatial Transformer Networks\". The usage of the operator is very similar to remap function in OpenCV,
+except that the operator has the backward pass.
+
+Given :math:`data` and :math:`grid`, then the output is computed by
+
+.. math::
+  x_{src} = grid[batch, 0, y_{dst}, x_{dst}] \\
+  y_{src} = grid[batch, 1, y_{dst}, x_{dst}] \\
+  output[batch, channel, y_{dst}, x_{dst}] = G(data[batch, channel, y_{src}, x_{src})
+
+:math:`x_{dst}`, :math:`y_{dst}` enumerate all spatial locations in :math:`output`, and :math:`G()` denotes the bilinear interpolation kernel.
+The out-boundary points will be padded with zeros.The shape of the output will be (data.shape[0], data.shape[1], grid.shape[2], grid.shape[3]).
+
+The operator assumes that :math:`data` has 'NCHW' layout and :math:`grid` has been normalized to [-1, 1].
+
+BilinearSampler often cooperates with GridGenerator which generates sampling grids for BilinearSampler.
+GridGenerator supports two kinds of transformation: ``affine`` and ``warp``.
+If users want to design a CustomOp to manipulate :math:`grid`, please firstly refer to the code of GridGenerator.
+
+Example 1::
+
+  ## Zoom out data two times
+  data = array([[[[1, 4, 3, 6],
+                  [1, 8, 8, 9],
+                  [0, 4, 1, 5],
+                  [1, 0, 1, 3]]]])
+
+  affine_matrix = array([[2, 0, 0],
+                         [0, 2, 0]])
+
+  affine_matrix = reshape(affine_matrix, shape=(1, 6))
+
+  grid = GridGenerator(data=affine_matrix, transform_type='affine', target_shape=(4, 4))
+
+  out = BilinearSampler(data, grid)
+
+  out
+  [[[[ 0,   0,     0,   0],
+     [ 0,   3.5,   6.5, 0],
+     [ 0,   1.25,  2.5, 0],
+     [ 0,   0,     0,   0]]]
+
+
+Example 2::
+
+  ## shift data horizontally by -1 pixel
+
+  data = array([[[[1, 4, 3, 6],
+                  [1, 8, 8, 9],
+                  [0, 4, 1, 5],
+                  [1, 0, 1, 3]]]])
+
+  warp_maxtrix = array([[[[1, 1, 1, 1],
+                          [1, 1, 1, 1],
+                          [1, 1, 1, 1],
+                          [1, 1, 1, 1]],
+                         [[0, 0, 0, 0],
+                          [0, 0, 0, 0],
+                          [0, 0, 0, 0],
+                          [0, 0, 0, 0]]]])
+
+  grid = GridGenerator(data=warp_matrix, transform_type='warp')
+  out = BilinearSampler(data, grid)
+
+  out
+  [[[[ 4,  3,  6,  0],
+     [ 8,  8,  9,  0],
+     [ 4,  1,  5,  0],
+     [ 0,  1,  3,  0]]]
+)code" ADD_FILELINE);
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/bilinear_sampler.cu b/src/operator/bilinear_sampler.cu
index dc394f1..14b5cd2 100644
--- a/src/operator/bilinear_sampler.cu
+++ b/src/operator/bilinear_sampler.cu
@@ -1,207 +1,225 @@
-/*!
- * Copyright (c) 2017 by Contributors
- * \file bilinear_sampler.cu
- * \brief
- * \author Xu Dong
-*/
-
-#include "./bilinear_sampler-inl.h"
-#include <algorithm>
-#include "../common/cuda_utils.h"
-#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
-#include "./cudnn_bilinear_sampler-inl.h"
-#endif  // MXNET_USE_CUDNN && CUDNN_MAJOR
-
-namespace mshadow {
-namespace cuda {
-template<typename DType>
-__device__ bool between(DType value, int lowerBound, int upperBound) {
-  return (value >= lowerBound && value <= upperBound);
-}
-template<typename DType>
-__global__ void BilinearSamplerForwardKernel(const int i_c, const int i_h,
-                                              const int i_w, const DType* data,
-                                              const DType* grid, const int o_n,
-                                              const int o_c, const int o_h,
-                                              const int o_w, DType* out) {
-  for (int index = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
-       index < o_n * o_c * o_h * o_w;
-       index += blockDim.x * gridDim.x * gridDim.y) {
-    // (n, c, h, w) is the element in out
-    int w = index % o_w;
-    int h = (index / o_w) % o_h;
-    int c = (index / o_w / o_h) % o_c;
-    int n = index / o_w / o_h / o_c;
-    index_t out_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
-    index_t grid_index = n * o_h * o_w * 2 + h * o_w + w;
-    DType y_real = (*(grid + grid_index + o_h * o_w) + 1) * (i_h - 1) / 2;
-    DType x_real = (*(grid + grid_index) + 1) * (i_w - 1) / 2;
-    int top_left_y = static_cast<int>(floor(y_real));
-    int top_left_x = static_cast<int>(floor(x_real));
-    DType top_left_y_w = 1.0 - (y_real - top_left_y);
-    DType top_left_x_w = 1.0 - (x_real - top_left_x);
-    int data_index = n * i_c * i_h * i_w + c * i_h * i_w + top_left_y * i_w + top_left_x;
-    DType top_left_v = 0;
-    DType top_right_v = 0;
-    DType bottom_left_v = 0;
-    DType bottom_right_v = 0;
-    if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1))
-      top_left_v = *(data + data_index);
-    if (between(top_left_x + 1, 0, i_w-1) && between(top_left_y, 0, i_h-1))
-      top_right_v = *(data + data_index + 1);
-    if (between(top_left_x, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
-      bottom_left_v = *(data + data_index + i_w);
-    if (between(top_left_x+1, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
-      bottom_right_v = *(data + data_index + i_w + 1);
-    *(out+out_index) = top_left_v * top_left_y_w * top_left_x_w +
-                        top_right_v * top_left_y_w * (1.0 - top_left_x_w) +
-                        bottom_left_v * (1.0 - top_left_y_w) * top_left_x_w +
-                        bottom_right_v * (1.0 - top_left_y_w) * (1.0 - top_left_x_w);
-  }
-}
-
-template<typename DType>
-__global__ void BilinearSamplerBackwardKernel(const int i_c, const int i_h,
-                                              const int i_w, const DType* grad,
-                                              const DType* data, const int o_n,
-                                              const int o_c, const int o_h,
-                                              const int o_w, DType* g_input,
-                                              const DType* grid_src,
-                                              DType* grad_grid) {
-  for (int index = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
-       index < o_n * o_h * o_w;
-       index += blockDim.x * gridDim.x * gridDim.y) {
-    // (n, c, h, w) is the element in grad
-    int w = index % o_w;
-    int h = (index / o_w) % o_h;
-    int n = index / o_w / o_h;
-    DType top_left_y_gw = 0.0;
-    DType top_left_x_gw = 0.0;
-    index_t grid_src_index = n * o_h * o_w * 2 + h * o_w + w;
-    DType y_real = (*(grid_src + grid_src_index + o_h * o_w) + 1) * (i_h - 1) / 2;
-    DType x_real = (*(grid_src + grid_src_index) + 1) * (i_w - 1) / 2;
-
-    int top_left_y = static_cast<int>(floor(y_real));
-    int top_left_x = static_cast<int>(floor(x_real));
-    DType top_left_y_w = 1.0 - (y_real - top_left_y);
-    DType top_left_x_w = 1.0 - (x_real - top_left_x);
-    for (index_t c = 0; c < o_c; ++c) {
-      index_t grad_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
-      int data_index = n * i_c * i_h * i_w + c * i_h * i_w + top_left_y * i_w + top_left_x;
-      // calc 4 vertex value in input data
-      DType top_left_v = 0;
-      DType top_right_v = 0;
-      DType bottom_left_v = 0;
-      DType bottom_right_v = 0;
-      // calc input grad
-      if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
-        atomicAdd(&g_input[data_index], *(grad + grad_index) * top_left_y_w * top_left_x_w);
-        top_left_v = *(data + data_index);
-      }
-      if (between(top_left_x+1, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
-        atomicAdd(&g_input[data_index + 1], *(grad + grad_index) * top_left_y_w
-                                        * (1.0 - top_left_x_w));
-        top_right_v = *(data + data_index + 1);
-      }
-      if (between(top_left_x, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
-        atomicAdd(&g_input[data_index+ i_w], *(grad + grad_index) * (1.0 - top_left_y_w)
-                                        * top_left_x_w);
-        bottom_left_v = *(data + data_index + i_w);
-      }
-      if (between(top_left_x+1, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
-        atomicAdd(&g_input[data_index+ i_w + 1], *(grad + grad_index) * (1.0 - top_left_y_w)
-                                            * (1.0 - top_left_x_w));
-        bottom_right_v = *(data + data_index + i_w + 1);
-      }
-      // calc weight grad of top_left_w, then multiple -1 is the grad of grid_src
-      top_left_y_gw -= *(grad + grad_index) * (top_right_v - bottom_right_v +
-                        (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
-                        * top_left_x_w);
-      top_left_x_gw -= *(grad + grad_index) * (bottom_left_v - bottom_right_v +
-                        (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
-                        * top_left_y_w);
-    }
-    // calc grad of grid
-    *(grad_grid + grid_src_index + o_h * o_w) += top_left_y_gw * (i_h - 1) / 2;
-    *(grad_grid + grid_src_index) += top_left_x_gw * (i_w - 1) / 2;
-  }
-}
-}  // namespace cuda
-
-template<typename DType>
-inline void BilinearSamplerForward(const Tensor<gpu, 4, DType> &output,
-                                    const Tensor<gpu, 4, DType> &input,
-                                    const Tensor<gpu, 4, DType> &grid_src) {
-    DType *out = output.dptr_;
-    const DType *data = input.dptr_;
-    const DType *grid = grid_src.dptr_;
-    int o_n = output.size(0), o_c = output.size(1), o_h = output.size(2), o_w = output.size(3);
-    int i_c = input.size(1), i_h = input.size(2), i_w = input.size(3);
-    using namespace cuda;
-    const int max_block = (output.shape_.Size() + kMaxThreadsPerBlock - 1) / kMaxThreadsPerBlock;
-    const int grid_dim_x = (max_block > kMaxGridDim) ? kMaxGridDim : max_block;
-    const int grid_dim_y =
-      (max_block > kMaxGridDim) ? (max_block + kMaxGridDim - 1) / kMaxGridDim : 1;
-    dim3 num_blocks(grid_dim_x, grid_dim_y);
-    dim3 threads_per_block(kMaxThreadsPerBlock);
-    CheckLaunchParam(num_blocks, threads_per_block, "bilinear sampler forward");
-    cudaStream_t stream = Stream<gpu>::GetStream(output.stream_);
-    cuda::BilinearSamplerForwardKernel<DType> << <num_blocks, threads_per_block, 0, stream >> >(
-      i_c, i_h, i_w, data, grid, o_n, o_c, o_h, o_w, out);
-    // post kernel check
-    cudaError err = cudaPeekAtLastError();
-    CHECK_EQ(err, cudaSuccess) << cudaGetErrorString(err);
-}
-
-template<typename DType>
-inline void BilinearSamplerBackward(const Tensor<gpu, 4, DType> &input_grad,
-                                     const Tensor<gpu, 4, DType> &ggrid,
-                                     const Tensor<gpu, 4, DType> &output_grad,
-                                     const Tensor<gpu, 4, DType> &input_data,
-                                     const Tensor<gpu, 4, DType> &grid) {
-  DType *g_input = input_grad.dptr_;
-  DType *grad_grid = ggrid.dptr_;
-  const DType *grid_src = grid.dptr_;
-  const DType *grad = output_grad.dptr_;
-  const DType *data = input_data.dptr_;
-  int o_n = output_grad.size(0), o_c = output_grad.size(1),
-      o_h = output_grad.size(2), o_w = output_grad.size(3);
-  int i_c = input_data.size(1), i_h = input_data.size(2), i_w = input_data.size(3);
-  using namespace cuda;
-  const int max_block = (output_grad.shape_.Size() / o_c + kMaxThreadsPerBlock - 1)
-                        / kMaxThreadsPerBlock;
-  const int grid_dim_x = (max_block > kMaxGridDim) ? kMaxGridDim : max_block;
-  const int grid_dim_y =
-    (max_block > kMaxGridDim) ? (max_block + kMaxGridDim - 1) / kMaxGridDim : 1;
-  dim3 num_blocks(grid_dim_x, grid_dim_y);
-  dim3 threads_per_block(kMaxThreadsPerBlock);
-  CheckLaunchParam(num_blocks, threads_per_block, "bilinear sampler backward");
-  cudaStream_t stream = Stream<gpu>::GetStream(input_grad.stream_);
-  cuda::BilinearSamplerBackwardKernel<DType> << <num_blocks, threads_per_block, 0, stream >> >(
-    i_c, i_h, i_w, grad, data, o_n, o_c, o_h, o_w, g_input, grid_src, grad_grid);
-  // post kernel check
-  cudaError err = cudaPeekAtLastError();
-  CHECK_EQ(err, cudaSuccess) << cudaGetErrorString(err);
-}
-
-}  // namespace mshadow
-
-namespace mxnet {
-namespace op {
-template<>
-Operator* CreateOp<gpu>(BilinearSamplerParam param, int dtype) {
-  Operator *op = NULL;
-#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
-  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
-    op = new CuDNNBilinearSamplerOp<DType>(param);
-  })
-#else
-  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
-    op = new BilinearSamplerOp<gpu, DType>(param);
-  })
-#endif  // MXNET_USE_CUDNN && CUDNN_MAJOR
-  return op;
-}
-
-}  // namespace op
-}  // namespace mxnet
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file bilinear_sampler.cu
+ * \brief
+ * \author Xu Dong
+*/
+
+#include "./bilinear_sampler-inl.h"
+#include <algorithm>
+#include "../common/cuda_utils.h"
+#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
+#include "./cudnn_bilinear_sampler-inl.h"
+#endif  // MXNET_USE_CUDNN && CUDNN_MAJOR
+
+namespace mshadow {
+namespace cuda {
+template<typename DType>
+__device__ bool between(DType value, int lowerBound, int upperBound) {
+  return (value >= lowerBound && value <= upperBound);
+}
+template<typename DType>
+__global__ void BilinearSamplerForwardKernel(const int i_c, const int i_h,
+                                              const int i_w, const DType* data,
+                                              const DType* grid, const int o_n,
+                                              const int o_c, const int o_h,
+                                              const int o_w, DType* out) {
+  for (int index = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
+       index < o_n * o_c * o_h * o_w;
+       index += blockDim.x * gridDim.x * gridDim.y) {
+    // (n, c, h, w) is the element in out
+    int w = index % o_w;
+    int h = (index / o_w) % o_h;
+    int c = (index / o_w / o_h) % o_c;
+    int n = index / o_w / o_h / o_c;
+    index_t out_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
+    index_t grid_index = n * o_h * o_w * 2 + h * o_w + w;
+    DType y_real = (*(grid + grid_index + o_h * o_w) + 1) * (i_h - 1) / 2;
+    DType x_real = (*(grid + grid_index) + 1) * (i_w - 1) / 2;
+    int top_left_y = static_cast<int>(floor(y_real));
+    int top_left_x = static_cast<int>(floor(x_real));
+    DType top_left_y_w = 1.0 - (y_real - top_left_y);
+    DType top_left_x_w = 1.0 - (x_real - top_left_x);
+    int data_index = n * i_c * i_h * i_w + c * i_h * i_w + top_left_y * i_w + top_left_x;
+    DType top_left_v = 0;
+    DType top_right_v = 0;
+    DType bottom_left_v = 0;
+    DType bottom_right_v = 0;
+    if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1))
+      top_left_v = *(data + data_index);
+    if (between(top_left_x + 1, 0, i_w-1) && between(top_left_y, 0, i_h-1))
+      top_right_v = *(data + data_index + 1);
+    if (between(top_left_x, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
+      bottom_left_v = *(data + data_index + i_w);
+    if (between(top_left_x+1, 0, i_w-1) && between(top_left_y + 1, 0, i_h-1))
+      bottom_right_v = *(data + data_index + i_w + 1);
+    *(out+out_index) = top_left_v * top_left_y_w * top_left_x_w +
+                        top_right_v * top_left_y_w * (1.0 - top_left_x_w) +
+                        bottom_left_v * (1.0 - top_left_y_w) * top_left_x_w +
+                        bottom_right_v * (1.0 - top_left_y_w) * (1.0 - top_left_x_w);
+  }
+}
+
+template<typename DType>
+__global__ void BilinearSamplerBackwardKernel(const int i_c, const int i_h,
+                                              const int i_w, const DType* grad,
+                                              const DType* data, const int o_n,
+                                              const int o_c, const int o_h,
+                                              const int o_w, DType* g_input,
+                                              const DType* grid_src,
+                                              DType* grad_grid) {
+  for (int index = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
+       index < o_n * o_h * o_w;
+       index += blockDim.x * gridDim.x * gridDim.y) {
+    // (n, c, h, w) is the element in grad
+    int w = index % o_w;
+    int h = (index / o_w) % o_h;
+    int n = index / o_w / o_h;
+    DType top_left_y_gw = 0.0;
+    DType top_left_x_gw = 0.0;
+    index_t grid_src_index = n * o_h * o_w * 2 + h * o_w + w;
+    DType y_real = (*(grid_src + grid_src_index + o_h * o_w) + 1) * (i_h - 1) / 2;
+    DType x_real = (*(grid_src + grid_src_index) + 1) * (i_w - 1) / 2;
+
+    int top_left_y = static_cast<int>(floor(y_real));
+    int top_left_x = static_cast<int>(floor(x_real));
+    DType top_left_y_w = 1.0 - (y_real - top_left_y);
+    DType top_left_x_w = 1.0 - (x_real - top_left_x);
+    for (index_t c = 0; c < o_c; ++c) {
+      index_t grad_index = n * o_c * o_h * o_w + c * o_h * o_w + h * o_w + w;
+      int data_index = n * i_c * i_h * i_w + c * i_h * i_w + top_left_y * i_w + top_left_x;
+      // calc 4 vertex value in input data
+      DType top_left_v = 0;
+      DType top_right_v = 0;
+      DType bottom_left_v = 0;
+      DType bottom_right_v = 0;
+      // calc input grad
+      if (between(top_left_x, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
+        atomicAdd(&g_input[data_index], *(grad + grad_index) * top_left_y_w * top_left_x_w);
+        top_left_v = *(data + data_index);
+      }
+      if (between(top_left_x+1, 0, i_w-1) && between(top_left_y, 0, i_h-1)) {
+        atomicAdd(&g_input[data_index + 1], *(grad + grad_index) * top_left_y_w
+                                        * (1.0 - top_left_x_w));
+        top_right_v = *(data + data_index + 1);
+      }
+      if (between(top_left_x, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
+        atomicAdd(&g_input[data_index+ i_w], *(grad + grad_index) * (1.0 - top_left_y_w)
+                                        * top_left_x_w);
+        bottom_left_v = *(data + data_index + i_w);
+      }
+      if (between(top_left_x+1, 0, i_w-1) && between(top_left_y+1, 0, i_h-1)) {
+        atomicAdd(&g_input[data_index+ i_w + 1], *(grad + grad_index) * (1.0 - top_left_y_w)
+                                            * (1.0 - top_left_x_w));
+        bottom_right_v = *(data + data_index + i_w + 1);
+      }
+      // calc weight grad of top_left_w, then multiple -1 is the grad of grid_src
+      top_left_y_gw -= *(grad + grad_index) * (top_right_v - bottom_right_v +
+                        (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
+                        * top_left_x_w);
+      top_left_x_gw -= *(grad + grad_index) * (bottom_left_v - bottom_right_v +
+                        (top_left_v - top_right_v - bottom_left_v + bottom_right_v)
+                        * top_left_y_w);
+    }
+    // calc grad of grid
+    *(grad_grid + grid_src_index + o_h * o_w) += top_left_y_gw * (i_h - 1) / 2;
+    *(grad_grid + grid_src_index) += top_left_x_gw * (i_w - 1) / 2;
+  }
+}
+}  // namespace cuda
+
+template<typename DType>
+inline void BilinearSamplerForward(const Tensor<gpu, 4, DType> &output,
+                                    const Tensor<gpu, 4, DType> &input,
+                                    const Tensor<gpu, 4, DType> &grid_src) {
+    DType *out = output.dptr_;
+    const DType *data = input.dptr_;
+    const DType *grid = grid_src.dptr_;
+    int o_n = output.size(0), o_c = output.size(1), o_h = output.size(2), o_w = output.size(3);
+    int i_c = input.size(1), i_h = input.size(2), i_w = input.size(3);
+    using namespace cuda;
+    const int max_block = (output.shape_.Size() + kMaxThreadsPerBlock - 1) / kMaxThreadsPerBlock;
+    const int grid_dim_x = (max_block > kMaxGridDim) ? kMaxGridDim : max_block;
+    const int grid_dim_y =
+      (max_block > kMaxGridDim) ? (max_block + kMaxGridDim - 1) / kMaxGridDim : 1;
+    dim3 num_blocks(grid_dim_x, grid_dim_y);
+    dim3 threads_per_block(kMaxThreadsPerBlock);
+    CheckLaunchParam(num_blocks, threads_per_block, "bilinear sampler forward");
+    cudaStream_t stream = Stream<gpu>::GetStream(output.stream_);
+    cuda::BilinearSamplerForwardKernel<DType> << <num_blocks, threads_per_block, 0, stream >> >(
+      i_c, i_h, i_w, data, grid, o_n, o_c, o_h, o_w, out);
+    // post kernel check
+    cudaError err = cudaPeekAtLastError();
+    CHECK_EQ(err, cudaSuccess) << cudaGetErrorString(err);
+}
+
+template<typename DType>
+inline void BilinearSamplerBackward(const Tensor<gpu, 4, DType> &input_grad,
+                                     const Tensor<gpu, 4, DType> &ggrid,
+                                     const Tensor<gpu, 4, DType> &output_grad,
+                                     const Tensor<gpu, 4, DType> &input_data,
+                                     const Tensor<gpu, 4, DType> &grid) {
+  DType *g_input = input_grad.dptr_;
+  DType *grad_grid = ggrid.dptr_;
+  const DType *grid_src = grid.dptr_;
+  const DType *grad = output_grad.dptr_;
+  const DType *data = input_data.dptr_;
+  int o_n = output_grad.size(0), o_c = output_grad.size(1),
+      o_h = output_grad.size(2), o_w = output_grad.size(3);
+  int i_c = input_data.size(1), i_h = input_data.size(2), i_w = input_data.size(3);
+  using namespace cuda;
+  const int max_block = (output_grad.shape_.Size() / o_c + kMaxThreadsPerBlock - 1)
+                        / kMaxThreadsPerBlock;
+  const int grid_dim_x = (max_block > kMaxGridDim) ? kMaxGridDim : max_block;
+  const int grid_dim_y =
+    (max_block > kMaxGridDim) ? (max_block + kMaxGridDim - 1) / kMaxGridDim : 1;
+  dim3 num_blocks(grid_dim_x, grid_dim_y);
+  dim3 threads_per_block(kMaxThreadsPerBlock);
+  CheckLaunchParam(num_blocks, threads_per_block, "bilinear sampler backward");
+  cudaStream_t stream = Stream<gpu>::GetStream(input_grad.stream_);
+  cuda::BilinearSamplerBackwardKernel<DType> << <num_blocks, threads_per_block, 0, stream >> >(
+    i_c, i_h, i_w, grad, data, o_n, o_c, o_h, o_w, g_input, grid_src, grad_grid);
+  // post kernel check
+  cudaError err = cudaPeekAtLastError();
+  CHECK_EQ(err, cudaSuccess) << cudaGetErrorString(err);
+}
+
+}  // namespace mshadow
+
+namespace mxnet {
+namespace op {
+template<>
+Operator* CreateOp<gpu>(BilinearSamplerParam param, int dtype) {
+  Operator *op = NULL;
+#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
+  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+    op = new CuDNNBilinearSamplerOp<DType>(param);
+  })
+#else
+  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+    op = new BilinearSamplerOp<gpu, DType>(param);
+  })
+#endif  // MXNET_USE_CUDNN && CUDNN_MAJOR
+  return op;
+}
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/channel_op_common.h b/src/operator/channel_op_common.h
index 9ae6a66..113da9b 100644
--- a/src/operator/channel_op_common.h
+++ b/src/operator/channel_op_common.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file channel_op_common.h
  * \brief common function used for concat and split channel
  * \author Bing Xu
diff --git a/src/operator/concat-inl.h b/src/operator/concat-inl.h
index 09b0c4b..ed553c8 100644
--- a/src/operator/concat-inl.h
+++ b/src/operator/concat-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file concat-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/concat.cc b/src/operator/concat.cc
index 49fa03e..1bee4b4 100644
--- a/src/operator/concat.cc
+++ b/src/operator/concat.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file concat.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/concat.cu b/src/operator/concat.cu
index a410e19..06828fc 100644
--- a/src/operator/concat.cu
+++ b/src/operator/concat.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file concat.cu
  * \brief
  * \author Bing Xu
diff --git a/src/operator/contrib/count_sketch-inl.h b/src/operator/contrib/count_sketch-inl.h
index 566327e..5df0096 100644
--- a/src/operator/contrib/count_sketch-inl.h
+++ b/src/operator/contrib/count_sketch-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file count_sketch-inl.h
  * \brief count_sketch operator and symbol
  * \author Chen Zhu
diff --git a/src/operator/contrib/count_sketch.cc b/src/operator/contrib/count_sketch.cc
index cf89c97..6aba8f4 100644
--- a/src/operator/contrib/count_sketch.cc
+++ b/src/operator/contrib/count_sketch.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file count_sketch.cc
  * \brief count_sketch op
  * \author Chen Zhu
@@ -30,7 +48,7 @@ MXNET_REGISTER_OP_PROPERTY(_contrib_count_sketch, CountSketchProp)
 
 Assume input data has shape (N, d), sign hash table s has shape (N, d),
 index hash table h has shape (N, d) and mapping dimension out_dim = k,
-each element in s is either +1 or -1, each element in h is random integer from 0 to k-1. 
+each element in s is either +1 or -1, each element in h is random integer from 0 to k-1.
 Then the operator computs:
 
 .. math::
diff --git a/src/operator/contrib/count_sketch.cu b/src/operator/contrib/count_sketch.cu
index 7cf13e8..0f3d295 100644
--- a/src/operator/contrib/count_sketch.cu
+++ b/src/operator/contrib/count_sketch.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file count_sketch.cu
  * \brief count_sketch op
  * \author Chen Zhu, Yang Shi
diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h
index 8431f65..0d0c0bf 100644
--- a/src/operator/contrib/ctc_loss-inl.h
+++ b/src/operator/contrib/ctc_loss-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file ctc_loss-inl.h
  * \brief
  * \author Sebastian Bodenstien
diff --git a/src/operator/contrib/ctc_loss.cc b/src/operator/contrib/ctc_loss.cc
index c3f3fe1..3727cee 100644
--- a/src/operator/contrib/ctc_loss.cc
+++ b/src/operator/contrib/ctc_loss.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file ctc_loss.cc
  * \brief
  * \author Sebastian Bodenstein
@@ -58,24 +76,24 @@ The shapes of the inputs and outputs:
 - **label**: *(batch_size, label_sequence_length)*
 - **out**: *(batch_size)*.
 
-``label`` is a tensor of integers between 1 and *alphabet_size*. If a 
-sequence of labels is shorter than *label_sequence_length*, use the special 
-padding character 0 at the end of the sequence to conform it to the correct 
-length. For example, if *label_sequence_length* = 4, and one has two sequences 
-of labels [2, 1] and [3, 2, 2], the resulting ```label``` tensor should be 
+``label`` is a tensor of integers between 1 and *alphabet_size*. If a
+sequence of labels is shorter than *label_sequence_length*, use the special
+padding character 0 at the end of the sequence to conform it to the correct
+length. For example, if *label_sequence_length* = 4, and one has two sequences
+of labels [2, 1] and [3, 2, 2], the resulting ```label``` tensor should be
 padded to be::
 
   [[2, 1, 0, 0], [3, 2, 2, 0]]
 
-The ``data`` tensor consists of sequences of activation vectors. The layer 
-applies a softmax to each vector, which then becomes a vector of probabilities 
-over the alphabet. Note that the 0th element of this vector is reserved for the 
+The ``data`` tensor consists of sequences of activation vectors. The layer
+applies a softmax to each vector, which then becomes a vector of probabilities
+over the alphabet. Note that the 0th element of this vector is reserved for the
 special blank character.
 
 ``out`` is a list of CTC loss values, one per example in the batch.
 
-See *Connectionist Temporal Classification: Labelling Unsegmented 
-Sequence Data with Recurrent Neural Networks*, A. Graves *et al*. for more 
+See *Connectionist Temporal Classification: Labelling Unsegmented
+Sequence Data with Recurrent Neural Networks*, A. Graves *et al*. for more
 information.
 
 )code" ADD_FILELINE)
diff --git a/src/operator/contrib/ctc_loss.cu b/src/operator/contrib/ctc_loss.cu
index ed80eb7..4bdef75 100644
--- a/src/operator/contrib/ctc_loss.cu
+++ b/src/operator/contrib/ctc_loss.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file ctc_loss.cu
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h
index d8397cf..a8dc6b8 100644
--- a/src/operator/contrib/deformable_convolution-inl.h
+++ b/src/operator/contrib/deformable_convolution-inl.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
diff --git a/src/operator/contrib/deformable_convolution.cc b/src/operator/contrib/deformable_convolution.cc
index 5af91a0..352baa1 100644
--- a/src/operator/contrib/deformable_convolution.cc
+++ b/src/operator/contrib/deformable_convolution.cc
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
@@ -60,7 +79,7 @@ then we have::
 If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
 
 The default data ``layout`` is *NCHW*, namely *(batch_size, channle, height,
-width)*. 
+width)*.
 
 If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
 evenly into *g* parts along the channel axis, and also evenly split ``weight``
diff --git a/src/operator/contrib/deformable_convolution.cu b/src/operator/contrib/deformable_convolution.cu
index f690cc1..f2200a9 100644
--- a/src/operator/contrib/deformable_convolution.cu
+++ b/src/operator/contrib/deformable_convolution.cu
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
diff --git a/src/operator/contrib/deformable_psroi_pooling-inl.h b/src/operator/contrib/deformable_psroi_pooling-inl.h
index 16a98f7..d391f04 100644
--- a/src/operator/contrib/deformable_psroi_pooling-inl.h
+++ b/src/operator/contrib/deformable_psroi_pooling-inl.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
 * Copyright (c) 2017 Microsoft
 * Licensed under The Apache-2.0 License [see LICENSE for details]
diff --git a/src/operator/contrib/deformable_psroi_pooling.cc b/src/operator/contrib/deformable_psroi_pooling.cc
index 290bad2..93bb64d 100644
--- a/src/operator/contrib/deformable_psroi_pooling.cc
+++ b/src/operator/contrib/deformable_psroi_pooling.cc
@@ -1,8 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
  * \file deformable_psroi_pooling.cc
- * \brief 
+ * \brief
  * \author Yi Li, Guodong Zhang, Jifeng Dai
 */
 #include "./deformable_psroi_pooling-inl.h"
diff --git a/src/operator/contrib/deformable_psroi_pooling.cu b/src/operator/contrib/deformable_psroi_pooling.cu
index f9eb01a..71bbd4c 100644
--- a/src/operator/contrib/deformable_psroi_pooling.cu
+++ b/src/operator/contrib/deformable_psroi_pooling.cu
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
diff --git a/src/operator/contrib/dequantize-inl.h b/src/operator/contrib/dequantize-inl.h
index ecd0cb4..61940c0 100644
--- a/src/operator/contrib/dequantize-inl.h
+++ b/src/operator/contrib/dequantize-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file dequantize-inl.h
  * \brief Implementation of dequantize operation
  */
diff --git a/src/operator/contrib/dequantize.cc b/src/operator/contrib/dequantize.cc
index 46e36fa..422a955 100644
--- a/src/operator/contrib/dequantize.cc
+++ b/src/operator/contrib/dequantize.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file dequantize.cc
  * \brief
  */
diff --git a/src/operator/contrib/dequantize.cu b/src/operator/contrib/dequantize.cu
index be09b79..7081c27 100644
--- a/src/operator/contrib/dequantize.cu
+++ b/src/operator/contrib/dequantize.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file dequantize.cu
  * \brief
  */
diff --git a/src/operator/contrib/fft-inl.h b/src/operator/contrib/fft-inl.h
index 5996322..5092f58 100644
--- a/src/operator/contrib/fft-inl.h
+++ b/src/operator/contrib/fft-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file fft-inl.h
  * \brief
  * \author Chen Zhu
diff --git a/src/operator/contrib/fft.cc b/src/operator/contrib/fft.cc
index e2094b3..11f8425 100644
--- a/src/operator/contrib/fft.cc
+++ b/src/operator/contrib/fft.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file fft-inl.h
  * \brief
  * \author Chen Zhu
diff --git a/src/operator/contrib/fft.cu b/src/operator/contrib/fft.cu
index 5dbd00c..3017ce7 100644
--- a/src/operator/contrib/fft.cu
+++ b/src/operator/contrib/fft.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file fft-inl.h
  * \brief
  * \author Chen Zhu
diff --git a/src/operator/contrib/ifft-inl.h b/src/operator/contrib/ifft-inl.h
index 98b601e..abd5bb2 100644
--- a/src/operator/contrib/ifft-inl.h
+++ b/src/operator/contrib/ifft-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file Ifft-inl.h
  * \brief
  * \author Chen Zhu
diff --git a/src/operator/contrib/ifft.cc b/src/operator/contrib/ifft.cc
index b2afd46..0ea3a7e 100644
--- a/src/operator/contrib/ifft.cc
+++ b/src/operator/contrib/ifft.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file Ifft-inl.h
  * \brief
  * \author Chen Zhu
diff --git a/src/operator/contrib/ifft.cu b/src/operator/contrib/ifft.cu
index 93ec1e6..79795d8 100644
--- a/src/operator/contrib/ifft.cu
+++ b/src/operator/contrib/ifft.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file Ifft-inl.h
  * \brief
  * \author Chen Zhu
diff --git a/src/operator/contrib/krprod.h b/src/operator/contrib/krprod.h
index a713f1e..6ce94c6 100644
--- a/src/operator/contrib/krprod.h
+++ b/src/operator/contrib/krprod.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  *  \file krprod.h
  *  \brief Core function for Khatri-Rao product
  *  \author Jencir Lee
diff --git a/src/operator/contrib/multi_proposal-inl.h b/src/operator/contrib/multi_proposal-inl.h
index 48f3535..7cd465e 100644
--- a/src/operator/contrib/multi_proposal-inl.h
+++ b/src/operator/contrib/multi_proposal-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
  * \file multi_proposal-inl.h
diff --git a/src/operator/contrib/multi_proposal.cc b/src/operator/contrib/multi_proposal.cc
index c8f75ea..cd00e87 100644
--- a/src/operator/contrib/multi_proposal.cc
+++ b/src/operator/contrib/multi_proposal.cc
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
diff --git a/src/operator/contrib/multi_proposal.cu b/src/operator/contrib/multi_proposal.cu
index 052d777..cb99963 100644
--- a/src/operator/contrib/multi_proposal.cu
+++ b/src/operator/contrib/multi_proposal.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
  * \file multi_proposal.cu
diff --git a/src/operator/contrib/multibox_detection-inl.h b/src/operator/contrib/multibox_detection-inl.h
index 3507281..34099a3 100644
--- a/src/operator/contrib/multibox_detection-inl.h
+++ b/src/operator/contrib/multibox_detection-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_detection-inl.h
  * \brief post-process multibox detection predictions
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_detection.cc b/src/operator/contrib/multibox_detection.cc
index 2bf49f3..0f69828 100644
--- a/src/operator/contrib/multibox_detection.cc
+++ b/src/operator/contrib/multibox_detection.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_detection.cc
  * \brief MultiBoxDetection op
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_detection.cu b/src/operator/contrib/multibox_detection.cu
index dab11ff..56a1e88 100644
--- a/src/operator/contrib/multibox_detection.cu
+++ b/src/operator/contrib/multibox_detection.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_detection.cu
  * \brief MultiBoxDetection op
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_prior-inl.h b/src/operator/contrib/multibox_prior-inl.h
index ee83fe4..88ca3dc 100644
--- a/src/operator/contrib/multibox_prior-inl.h
+++ b/src/operator/contrib/multibox_prior-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_prior-inl.h
  * \brief generate multibox prior boxes
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_prior.cc b/src/operator/contrib/multibox_prior.cc
index a9c747e..af77fda 100644
--- a/src/operator/contrib/multibox_prior.cc
+++ b/src/operator/contrib/multibox_prior.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_prior.cc
  * \brief generate multibox prior boxes cpu implementation
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_prior.cu b/src/operator/contrib/multibox_prior.cu
index a3f2cc2..b041b90 100644
--- a/src/operator/contrib/multibox_prior.cu
+++ b/src/operator/contrib/multibox_prior.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_prior.cu
  * \brief generate multibox prior boxes cuda kernels
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_target-inl.h b/src/operator/contrib/multibox_target-inl.h
index 7185c9a..f76df35 100644
--- a/src/operator/contrib/multibox_target-inl.h
+++ b/src/operator/contrib/multibox_target-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_target-inl.h
  * \brief
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_target.cc b/src/operator/contrib/multibox_target.cc
index 56c6cee..095613d 100644
--- a/src/operator/contrib/multibox_target.cc
+++ b/src/operator/contrib/multibox_target.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_target.cc
  * \brief MultiBoxTarget op
  * \author Joshua Zhang
diff --git a/src/operator/contrib/multibox_target.cu b/src/operator/contrib/multibox_target.cu
index adcfcf2..3d0da6c 100644
--- a/src/operator/contrib/multibox_target.cu
+++ b/src/operator/contrib/multibox_target.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file multibox_target.cu
  * \brief MultiBoxTarget op
  * \author Joshua Zhang
diff --git a/src/operator/contrib/nn/deformable_im2col.cuh b/src/operator/contrib/nn/deformable_im2col.cuh
index d9e7b97..0238921 100644
--- a/src/operator/contrib/nn/deformable_im2col.cuh
+++ b/src/operator/contrib/nn/deformable_im2col.cuh
@@ -1,34 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  *
  * COPYRIGHT
- * 
+ *
  * All contributions by the University of California:
  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  * All rights reserved.
- * 
+ *
  * All other contributions:
  * Copyright (c) 2014-2017, the respective contributors
  * All rights reserved.
- * 
+ *
  * Caffe uses a shared copyright model: each contributor holds copyright over
  * their contributions to Caffe. The project versioning records all such
  * contribution and copyright details. If a contributor wants to further mark
  * their specific copyright on a particular contribution, they should indicate
  * their copyright solely in the commit message of the change when it is
  * committed.
- * 
+ *
  * LICENSE
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met: 
- * 
+ * modification, are permitted provided that the following conditions are met:
+ *
  * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer. 
+ * list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution. 
- * 
+ * and/or other materials provided with the distribution.
+ *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -39,9 +58,9 @@
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
+ *
  * CONTRIBUTION AGREEMENT
- * 
+ *
  * By contributing to the BVLC/caffe repository through pull-request, comment,
  * or otherwise, the contributor releases their content to the
  * license and copyright terms herein.
@@ -75,7 +94,7 @@ namespace mxnet {
 namespace op {
 
 template <typename DType>
-__device__ DType deformable_im2col_bilinear(const DType* bottom_data, const int data_width, 
+__device__ DType deformable_im2col_bilinear(const DType* bottom_data, const int data_width,
   const int height, const int width, DType h, DType w) {
 
   int h_low = floor(h);
@@ -114,7 +133,7 @@ __device__ DType deformable_im2col_bilinear(const DType* bottom_data, const int
 
 
 template <typename DType>
-__device__ DType get_gradient_weight(DType argmax_h, DType argmax_w, 
+__device__ DType get_gradient_weight(DType argmax_h, DType argmax_w,
   const int h, const int w, const int height, const int width) {
 
   if (argmax_h < 0 || argmax_h > height || argmax_w < 0 || argmax_w > width) {
@@ -282,9 +301,9 @@ __global__ void deformable_im2col_gpu_kernel(const int n, const DType* data_im,
  */
 template <typename DType>
 inline void deformable_im2col(mshadow::Stream<gpu>* s,
-  const DType* data_im, const DType* data_offset, 
+  const DType* data_im, const DType* data_offset,
   const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape,
-  const TShape& pad, const TShape& stride, const TShape& dilation, 
+  const TShape& pad, const TShape& stride, const TShape& dilation,
   const uint32_t deformable_group, DType* data_col) {
   // num_axes should be smaller than block size
   index_t num_spatial_axes = kernel_shape.ndim();
@@ -416,7 +435,7 @@ inline void deformable_col2im(mshadow::Stream<gpu>* s,
  * \brief DO NOT call this directly. Use wrapper function deformable_col2im_coord() instead;
  */
 template <typename DType>
-__global__ void deformable_col2im_coord_gpu_kernel(const int n, const DType* data_col, 
+__global__ void deformable_col2im_coord_gpu_kernel(const int n, const DType* data_col,
   const DType* data_im, const DType* data_offset,
   const int channels, const int height, const int width,
   const int kernel_h, const int kernel_w,
diff --git a/src/operator/contrib/nn/deformable_im2col.h b/src/operator/contrib/nn/deformable_im2col.h
index 9d61800..b477acb 100644
--- a/src/operator/contrib/nn/deformable_im2col.h
+++ b/src/operator/contrib/nn/deformable_im2col.h
@@ -1,34 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  *
  * COPYRIGHT
- * 
+ *
  * All contributions by the University of California:
  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  * All rights reserved.
- * 
+ *
  * All other contributions:
  * Copyright (c) 2014-2017, the respective contributors
  * All rights reserved.
- * 
+ *
  * Caffe uses a shared copyright model: each contributor holds copyright over
  * their contributions to Caffe. The project versioning records all such
  * contribution and copyright details. If a contributor wants to further mark
  * their specific copyright on a particular contribution, they should indicate
  * their copyright solely in the commit message of the change when it is
  * committed.
- * 
+ *
  * LICENSE
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met: 
- * 
+ * modification, are permitted provided that the following conditions are met:
+ *
  * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer. 
+ * list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution. 
- * 
+ * and/or other materials provided with the distribution.
+ *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -39,9 +58,9 @@
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
+ *
  * CONTRIBUTION AGREEMENT
- * 
+ *
  * By contributing to the BVLC/caffe repository through pull-request, comment,
  * or otherwise, the contributor releases their content to the
  * license and copyright terms herein.
@@ -70,7 +89,7 @@
 namespace mxnet {
 namespace op {
 
-/*!\brief 
+/*!\brief
  * cpu function of deformable_im2col algorithm
  * \param s device stream
  * \param data_im pointer of an image (C, H, W, ...) in the image batch
diff --git a/src/operator/contrib/proposal-inl.h b/src/operator/contrib/proposal-inl.h
index 686a8a3..3d1851c 100644
--- a/src/operator/contrib/proposal-inl.h
+++ b/src/operator/contrib/proposal-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file proposal-inl.h
  * \brief Proposal Operator
  * \author Piotr Teterwak, Bing Xu, Jian Guo
diff --git a/src/operator/contrib/proposal.cc b/src/operator/contrib/proposal.cc
index fe4fe98..ec53900 100644
--- a/src/operator/contrib/proposal.cc
+++ b/src/operator/contrib/proposal.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file proposal.cc
  * \brief
  * \author Piotr Teterwak, Bing Xu, Jian Guo
diff --git a/src/operator/contrib/proposal.cu b/src/operator/contrib/proposal.cu
index ce1e9e5..209ef79 100644
--- a/src/operator/contrib/proposal.cu
+++ b/src/operator/contrib/proposal.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file proposal.cu
  * \brief Proposal Operator
  * \author Shaoqing Ren, Jian Guo
diff --git a/src/operator/contrib/psroi_pooling-inl.h b/src/operator/contrib/psroi_pooling-inl.h
index 3a3a9c3..b492972 100644
--- a/src/operator/contrib/psroi_pooling-inl.h
+++ b/src/operator/contrib/psroi_pooling-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
  * \file psroi_pooling-inl.h
diff --git a/src/operator/contrib/psroi_pooling.cc b/src/operator/contrib/psroi_pooling.cc
index ad25aec..dd3a9e0 100644
--- a/src/operator/contrib/psroi_pooling.cc
+++ b/src/operator/contrib/psroi_pooling.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
  * \file psroi_pooling.cc
diff --git a/src/operator/contrib/psroi_pooling.cu b/src/operator/contrib/psroi_pooling.cu
index 962c874..6df64a1 100644
--- a/src/operator/contrib/psroi_pooling.cu
+++ b/src/operator/contrib/psroi_pooling.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * Copyright (c) 2017 Microsoft
  * Licensed under The Apache-2.0 License [see LICENSE for details]
  * \file psroi_pooling.cu
diff --git a/src/operator/contrib/quantize-inl.h b/src/operator/contrib/quantize-inl.h
index e005762..1274a7d 100644
--- a/src/operator/contrib/quantize-inl.h
+++ b/src/operator/contrib/quantize-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file quantize-inl.h
  * \brief implementation of quantize operation
  */
diff --git a/src/operator/contrib/quantize.cc b/src/operator/contrib/quantize.cc
index 86f35e1..dbb8985 100644
--- a/src/operator/contrib/quantize.cc
+++ b/src/operator/contrib/quantize.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file quantize.cc
  * \brief
  */
diff --git a/src/operator/contrib/quantize.cu b/src/operator/contrib/quantize.cu
index c6d9035..6c9db9a 100644
--- a/src/operator/contrib/quantize.cu
+++ b/src/operator/contrib/quantize.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  * \file quantize.cu
  * \brief
  */
diff --git a/src/operator/convolution-inl.h b/src/operator/convolution-inl.h
index a97d53b..0a2522c 100644
--- a/src/operator/convolution-inl.h
+++ b/src/operator/convolution-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file convolution-inl.h
  * \brief
  * \ref: https://github.com/Yangqing/caffe/wiki/Convolution-in-Caffe:-a-memo
diff --git a/src/operator/convolution.cc b/src/operator/convolution.cc
index fd604d9..35ab5f0 100644
--- a/src/operator/convolution.cc
+++ b/src/operator/convolution.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file convolution.cc
  * \brief
  * \author Bing Xu, Jun Wu
diff --git a/src/operator/convolution.cu b/src/operator/convolution.cu
index 34ae42f..bf5f305 100644
--- a/src/operator/convolution.cu
+++ b/src/operator/convolution.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file convolution.cu
  * \brief
  * \author Bing Xu, Jun Wu
diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h
index ee8c8c0..f39d8e0 100644
--- a/src/operator/convolution_v1-inl.h
+++ b/src/operator/convolution_v1-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file convolution_v1-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/convolution_v1.cc b/src/operator/convolution_v1.cc
index a1d115f..cb47ed1 100644
--- a/src/operator/convolution_v1.cc
+++ b/src/operator/convolution_v1.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file convolution_v1.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/convolution_v1.cu b/src/operator/convolution_v1.cu
index 83a0f1d..b20b4b2 100644
--- a/src/operator/convolution_v1.cu
+++ b/src/operator/convolution_v1.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file convolution_v1.cu
  * \brief
  * \author Bing Xu
diff --git a/src/operator/correlation-inl.h b/src/operator/correlation-inl.h
index 6ba209b..02507cb 100644
--- a/src/operator/correlation-inl.h
+++ b/src/operator/correlation-inl.h
@@ -1,236 +1,254 @@
-/*!
- * Copyright (c) 2015 by Contributors
- * \file correlation-inl.h
- * \brief correlation operator and symbol
- * \author Xu Dong
-*/
-#ifndef MXNET_OPERATOR_CORRELATION_INL_H_
-#define MXNET_OPERATOR_CORRELATION_INL_H_
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <mxnet/operator.h>
-#include <map>
-#include <vector>
-#include <string>
-#include <utility>
-#include "./mshadow_op.h"
-#include "./operator_common.h"
-namespace mxnet {
-namespace op {
-//  Declare enumeration of input order to make code more intuitive.
-//  These enums are only visible within this header
-namespace Correlation {
-enum  CorrelationOpInputs{kData1, kData2};
-enum  CorrelationOpOutputs{kOut, kTemp1, kTemp2};
-}  //  namespace Correlation
-struct CorrelationParam : public dmlc::Parameter<CorrelationParam> {
-  uint32_t max_displacement;
-  uint32_t kernel_size;
-  uint32_t pad_size;
-  uint32_t stride1;
-  uint32_t stride2;
-  bool is_multiply;
-  DMLC_DECLARE_PARAMETER(CorrelationParam) {
-    DMLC_DECLARE_FIELD(kernel_size).set_default(1)
-    .describe("kernel size for Correlation must be an odd number");
-    DMLC_DECLARE_FIELD(max_displacement).set_default(1)
-    .describe("Max displacement of Correlation ");
-    DMLC_DECLARE_FIELD(stride1).set_default(1)
-    .describe("stride1 quantize data1 globally");
-    DMLC_DECLARE_FIELD(stride2).set_default(1)
-    .describe("stride2 quantize data2 within the neighborhood centered around data1");
-    DMLC_DECLARE_FIELD(pad_size).set_default(0)
-    .describe("pad for Correlation");
-    DMLC_DECLARE_FIELD(is_multiply).set_default(true)
-    .describe("operation type is either multiplication or subduction");
-  }
-};
-template<typename xpu>
-class CorrelationOp : public Operator {
- public:
-  explicit CorrelationOp(CorrelationParam param) {
-    this->param_ = param;
-  }
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    CHECK_EQ(in_data.size(), 2U);
-    CHECK_EQ(out_data.size(), 3U);
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    Tensor<xpu, 4> data1 = in_data[Correlation::kData1].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> data2 = in_data[Correlation::kData2].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> out   = out_data[Correlation::kOut].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> tmp1  = out_data[Correlation::kTemp1].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> tmp2  = out_data[Correlation::kTemp2].get<xpu, 4, real_t>(s);
-    tmp1 = 0.0f;
-    tmp2 = 0.0f;
-    out = 0.0f;
-    CHECK_EQ(data1.CheckContiguous(), true);
-    CHECK_EQ(data2.CheckContiguous(), true);
-    CHECK_EQ(out.CheckContiguous(), true);
-    CHECK_EQ(tmp1.CheckContiguous(), true);
-    CHECK_EQ(tmp2.CheckContiguous(), true);
-    paddedbottomheight = data1.shape_[2] + 2 * param_.pad_size;
-    paddedbottomwidth  = data1.shape_[3] + 2 * param_.pad_size;
-    kernel_radius_ = (param_.kernel_size - 1) / 2;
-    border_size_ = param_.max_displacement + kernel_radius_;
-    stride1 = param_.stride1;
-    stride2 = param_.stride2;
-    top_width_ = ceil(static_cast<float>(paddedbottomwidth - border_size_ * 2)\
-     / static_cast<float>(stride1));
-    top_height_ = ceil(static_cast<float>(paddedbottomheight - border_size_ * 2)\
-     / static_cast<float>(stride1));
-    neighborhood_grid_radius_ = param_.max_displacement / stride2;
-    neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1;
-    top_channels_ = neighborhood_grid_width_ * neighborhood_grid_width_;
-    num =  data1.shape_[0];
-    channels = data1.shape_[1];
-    height = data1.shape_[2];
-    width = data1.shape_[3];
-    CorrelationForward(out, data1, data2, tmp1, tmp2, top_channels_, top_height_, top_width_,
-                       param_.pad_size, param_.is_multiply,
-                       param_.max_displacement, param_.kernel_size,
-                       neighborhood_grid_radius_, neighborhood_grid_width_,
-                       kernel_radius_, param_.stride1, param_.stride2);
-  }
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    Tensor<xpu, 4> grad_data1 = in_grad[Correlation::kData1].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> grad_data2 = in_grad[Correlation::kData2].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> out_g = out_grad[Correlation::kOut].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> tmp1 = out_data[Correlation::kTemp1].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> tmp2 = out_data[Correlation::kTemp2].get<xpu, 4, real_t>(s);
-    if (req[0] != kAddTo) grad_data1 = 0.0f;
-    if (req[1] != kAddTo) grad_data2 = 0.0f;
-    CHECK_EQ(grad_data1.CheckContiguous(), true);
-    CHECK_EQ(grad_data2.CheckContiguous(), true);
-    CHECK_EQ(out_g.CheckContiguous(), true);
-    CHECK_EQ(tmp1.CheckContiguous(), true);
-    CHECK_EQ(tmp2.CheckContiguous(), true);
-    CorrelationBackward(out_g, grad_data1, grad_data2, tmp1, tmp2, top_channels_,
-                        top_height_, top_width_, param_.pad_size, param_.is_multiply,
-                        param_.max_displacement, param_.kernel_size, neighborhood_grid_radius_,
-                        neighborhood_grid_width_, kernel_radius_, param_.stride1, param_.stride2,
-                        num, channels, height, width);
-  }
-
- private:
-    CorrelationParam param_;
-    int paddedbottomheight;
-    int paddedbottomwidth;
-    uint32_t kernel_radius_;
-    uint32_t border_size_;
-    uint32_t stride1;
-    uint32_t stride2;
-    uint32_t top_width_;
-    uint32_t top_height_;
-    uint32_t neighborhood_grid_radius_;
-    uint32_t neighborhood_grid_width_;
-    uint32_t top_channels_;
-    int  num;
-    int  channels;
-    int  height;
-    int  width;
-};   //  class CorrelationOp
-//  Decalre Factory function
-template<typename xpu>
-Operator* CreateOp(CorrelationParam param);
-#if DMLC_USE_CXX11
-class CorrelationProp : public OperatorProperty {
- public:
-  std::vector<std::string> ListArguments() const override {
-    return {"data1", "data2"};
-  }
-  std::vector<std::string> ListOutputs() const override {
-    return {"output", "tmp1", "tmp2"};
-  }
-  int NumOutputs() const override {
-    return 3;
-  }
-  int NumVisibleOutputs() const override {
-    return 1;
-  }
-void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
-    param_.Init(kwargs);
-  }
-  std::map<std::string, std::string> GetParams() const override {
-    return param_.__DICT__();
-  }
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
-    using namespace mshadow;
-    CHECK_EQ(in_shape->size(), 2U) << "Input:[data1, data2]";
-    TShape dshape1 = in_shape->at(Correlation::kData1);
-    TShape dshape2 = in_shape->at(Correlation::kData2);
-    CHECK_EQ(dshape1.ndim(), 4U) << "data should be a 4D tensor";
-    CHECK_EQ(dshape2.ndim(), 4U) << "data should be a 4D tensor";
-    int paddedbottomheight;
-    int paddedbottomwidth;
-    uint32_t kernel_radius_;
-    uint32_t stride1;
-    uint32_t stride2;
-    uint32_t top_width_;
-    uint32_t top_height_;
-    uint32_t neighborhood_grid_radius_;
-    uint32_t neighborhood_grid_width_;
-    uint32_t top_channels_;
-    uint32_t border_size_;
-    paddedbottomheight = dshape1[2] + 2*param_.pad_size;
-    paddedbottomwidth  = dshape1[3] + 2*param_.pad_size;
-    kernel_radius_ = (param_.kernel_size -1)/2;
-    border_size_ = param_.max_displacement + kernel_radius_;
-    stride1 = param_.stride1;
-    stride2 = param_.stride2;
-    top_width_ = ceil(static_cast<float>(paddedbottomwidth - border_size_ * 2)\
-     / static_cast<float>(stride1));
-    top_height_ = ceil(static_cast<float>(paddedbottomheight - border_size_ * 2)\
-     / static_cast<float>(stride1));
-    neighborhood_grid_radius_ = param_.max_displacement / stride2;
-    neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1;
-    top_channels_ = neighborhood_grid_width_ * neighborhood_grid_width_;
-    CHECK_GE(top_width_, 1U) <<
-    "Correlation cannot be done with current settings.Neighborhood and kernel don't fit in blob";
-    CHECK_GE(top_height_, 1U) <<
-    "Correlation cannot be done with current settings.Neighborhood and kernel don't fit in blob";
-    out_shape->clear();
-    out_shape->push_back(Shape4(dshape1[0], top_channels_, top_height_, top_width_));
-    out_shape->push_back(Shape4(dshape1[0], paddedbottomheight, paddedbottomwidth, dshape1[1]));
-    out_shape->push_back(Shape4(dshape1[0], paddedbottomheight, paddedbottomwidth, dshape1[1]));
-    return true;
-  }
-  OperatorProperty* Copy() const override {
-    CorrelationProp* Correlation_sym = new CorrelationProp();
-    Correlation_sym->param_ = this->param_;
-    return Correlation_sym;
-  }
-  std::string TypeString() const override {
-    return "Correlation";
-  }
-  //  decalre dependency and inplace optimization options
-  std::vector<int> DeclareBackwardDependency(
-    const std::vector<int> &out_grad,
-    const std::vector<int> &in_data,
-    const std::vector<int> &out_data) const override {
-     return {out_grad[Correlation::kOut],
-     out_data[Correlation::kTemp1], out_data[Correlation::kTemp2]};
-}
-  Operator* CreateOperator(Context ctx) const override;
-
- private:
-  CorrelationParam param_;
-};  //  class CorrelationProp
-#endif
-}  //  namespace op
-}  //  namespace mxnet
-#endif  //  MXNET_OPERATOR_CORRELATION_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file correlation-inl.h
+ * \brief correlation operator and symbol
+ * \author Xu Dong
+*/
+#ifndef MXNET_OPERATOR_CORRELATION_INL_H_
+#define MXNET_OPERATOR_CORRELATION_INL_H_
+#include <dmlc/logging.h>
+#include <dmlc/parameter.h>
+#include <mxnet/operator.h>
+#include <map>
+#include <vector>
+#include <string>
+#include <utility>
+#include "./mshadow_op.h"
+#include "./operator_common.h"
+namespace mxnet {
+namespace op {
+//  Declare enumeration of input order to make code more intuitive.
+//  These enums are only visible within this header
+namespace Correlation {
+enum  CorrelationOpInputs{kData1, kData2};
+enum  CorrelationOpOutputs{kOut, kTemp1, kTemp2};
+}  //  namespace Correlation
+struct CorrelationParam : public dmlc::Parameter<CorrelationParam> {
+  uint32_t max_displacement;
+  uint32_t kernel_size;
+  uint32_t pad_size;
+  uint32_t stride1;
+  uint32_t stride2;
+  bool is_multiply;
+  DMLC_DECLARE_PARAMETER(CorrelationParam) {
+    DMLC_DECLARE_FIELD(kernel_size).set_default(1)
+    .describe("kernel size for Correlation must be an odd number");
+    DMLC_DECLARE_FIELD(max_displacement).set_default(1)
+    .describe("Max displacement of Correlation ");
+    DMLC_DECLARE_FIELD(stride1).set_default(1)
+    .describe("stride1 quantize data1 globally");
+    DMLC_DECLARE_FIELD(stride2).set_default(1)
+    .describe("stride2 quantize data2 within the neighborhood centered around data1");
+    DMLC_DECLARE_FIELD(pad_size).set_default(0)
+    .describe("pad for Correlation");
+    DMLC_DECLARE_FIELD(is_multiply).set_default(true)
+    .describe("operation type is either multiplication or subduction");
+  }
+};
+template<typename xpu>
+class CorrelationOp : public Operator {
+ public:
+  explicit CorrelationOp(CorrelationParam param) {
+    this->param_ = param;
+  }
+  virtual void Forward(const OpContext &ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &out_data,
+                       const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    CHECK_EQ(in_data.size(), 2U);
+    CHECK_EQ(out_data.size(), 3U);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 4> data1 = in_data[Correlation::kData1].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> data2 = in_data[Correlation::kData2].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> out   = out_data[Correlation::kOut].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> tmp1  = out_data[Correlation::kTemp1].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> tmp2  = out_data[Correlation::kTemp2].get<xpu, 4, real_t>(s);
+    tmp1 = 0.0f;
+    tmp2 = 0.0f;
+    out = 0.0f;
+    CHECK_EQ(data1.CheckContiguous(), true);
+    CHECK_EQ(data2.CheckContiguous(), true);
+    CHECK_EQ(out.CheckContiguous(), true);
+    CHECK_EQ(tmp1.CheckContiguous(), true);
+    CHECK_EQ(tmp2.CheckContiguous(), true);
+    paddedbottomheight = data1.shape_[2] + 2 * param_.pad_size;
+    paddedbottomwidth  = data1.shape_[3] + 2 * param_.pad_size;
+    kernel_radius_ = (param_.kernel_size - 1) / 2;
+    border_size_ = param_.max_displacement + kernel_radius_;
+    stride1 = param_.stride1;
+    stride2 = param_.stride2;
+    top_width_ = ceil(static_cast<float>(paddedbottomwidth - border_size_ * 2)\
+     / static_cast<float>(stride1));
+    top_height_ = ceil(static_cast<float>(paddedbottomheight - border_size_ * 2)\
+     / static_cast<float>(stride1));
+    neighborhood_grid_radius_ = param_.max_displacement / stride2;
+    neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1;
+    top_channels_ = neighborhood_grid_width_ * neighborhood_grid_width_;
+    num =  data1.shape_[0];
+    channels = data1.shape_[1];
+    height = data1.shape_[2];
+    width = data1.shape_[3];
+    CorrelationForward(out, data1, data2, tmp1, tmp2, top_channels_, top_height_, top_width_,
+                       param_.pad_size, param_.is_multiply,
+                       param_.max_displacement, param_.kernel_size,
+                       neighborhood_grid_radius_, neighborhood_grid_width_,
+                       kernel_radius_, param_.stride1, param_.stride2);
+  }
+  virtual void Backward(const OpContext &ctx,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &in_grad,
+                        const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 4> grad_data1 = in_grad[Correlation::kData1].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> grad_data2 = in_grad[Correlation::kData2].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> out_g = out_grad[Correlation::kOut].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> tmp1 = out_data[Correlation::kTemp1].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> tmp2 = out_data[Correlation::kTemp2].get<xpu, 4, real_t>(s);
+    if (req[0] != kAddTo) grad_data1 = 0.0f;
+    if (req[1] != kAddTo) grad_data2 = 0.0f;
+    CHECK_EQ(grad_data1.CheckContiguous(), true);
+    CHECK_EQ(grad_data2.CheckContiguous(), true);
+    CHECK_EQ(out_g.CheckContiguous(), true);
+    CHECK_EQ(tmp1.CheckContiguous(), true);
+    CHECK_EQ(tmp2.CheckContiguous(), true);
+    CorrelationBackward(out_g, grad_data1, grad_data2, tmp1, tmp2, top_channels_,
+                        top_height_, top_width_, param_.pad_size, param_.is_multiply,
+                        param_.max_displacement, param_.kernel_size, neighborhood_grid_radius_,
+                        neighborhood_grid_width_, kernel_radius_, param_.stride1, param_.stride2,
+                        num, channels, height, width);
+  }
+
+ private:
+    CorrelationParam param_;
+    int paddedbottomheight;
+    int paddedbottomwidth;
+    uint32_t kernel_radius_;
+    uint32_t border_size_;
+    uint32_t stride1;
+    uint32_t stride2;
+    uint32_t top_width_;
+    uint32_t top_height_;
+    uint32_t neighborhood_grid_radius_;
+    uint32_t neighborhood_grid_width_;
+    uint32_t top_channels_;
+    int  num;
+    int  channels;
+    int  height;
+    int  width;
+};   //  class CorrelationOp
+//  Decalre Factory function
+template<typename xpu>
+Operator* CreateOp(CorrelationParam param);
+#if DMLC_USE_CXX11
+class CorrelationProp : public OperatorProperty {
+ public:
+  std::vector<std::string> ListArguments() const override {
+    return {"data1", "data2"};
+  }
+  std::vector<std::string> ListOutputs() const override {
+    return {"output", "tmp1", "tmp2"};
+  }
+  int NumOutputs() const override {
+    return 3;
+  }
+  int NumVisibleOutputs() const override {
+    return 1;
+  }
+void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
+    param_.Init(kwargs);
+  }
+  std::map<std::string, std::string> GetParams() const override {
+    return param_.__DICT__();
+  }
+  bool InferShape(std::vector<TShape> *in_shape,
+                  std::vector<TShape> *out_shape,
+                  std::vector<TShape> *aux_shape) const override {
+    using namespace mshadow;
+    CHECK_EQ(in_shape->size(), 2U) << "Input:[data1, data2]";
+    TShape dshape1 = in_shape->at(Correlation::kData1);
+    TShape dshape2 = in_shape->at(Correlation::kData2);
+    CHECK_EQ(dshape1.ndim(), 4U) << "data should be a 4D tensor";
+    CHECK_EQ(dshape2.ndim(), 4U) << "data should be a 4D tensor";
+    int paddedbottomheight;
+    int paddedbottomwidth;
+    uint32_t kernel_radius_;
+    uint32_t stride1;
+    uint32_t stride2;
+    uint32_t top_width_;
+    uint32_t top_height_;
+    uint32_t neighborhood_grid_radius_;
+    uint32_t neighborhood_grid_width_;
+    uint32_t top_channels_;
+    uint32_t border_size_;
+    paddedbottomheight = dshape1[2] + 2*param_.pad_size;
+    paddedbottomwidth  = dshape1[3] + 2*param_.pad_size;
+    kernel_radius_ = (param_.kernel_size -1)/2;
+    border_size_ = param_.max_displacement + kernel_radius_;
+    stride1 = param_.stride1;
+    stride2 = param_.stride2;
+    top_width_ = ceil(static_cast<float>(paddedbottomwidth - border_size_ * 2)\
+     / static_cast<float>(stride1));
+    top_height_ = ceil(static_cast<float>(paddedbottomheight - border_size_ * 2)\
+     / static_cast<float>(stride1));
+    neighborhood_grid_radius_ = param_.max_displacement / stride2;
+    neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1;
+    top_channels_ = neighborhood_grid_width_ * neighborhood_grid_width_;
+    CHECK_GE(top_width_, 1U) <<
+    "Correlation cannot be done with current settings.Neighborhood and kernel don't fit in blob";
+    CHECK_GE(top_height_, 1U) <<
+    "Correlation cannot be done with current settings.Neighborhood and kernel don't fit in blob";
+    out_shape->clear();
+    out_shape->push_back(Shape4(dshape1[0], top_channels_, top_height_, top_width_));
+    out_shape->push_back(Shape4(dshape1[0], paddedbottomheight, paddedbottomwidth, dshape1[1]));
+    out_shape->push_back(Shape4(dshape1[0], paddedbottomheight, paddedbottomwidth, dshape1[1]));
+    return true;
+  }
+  OperatorProperty* Copy() const override {
+    CorrelationProp* Correlation_sym = new CorrelationProp();
+    Correlation_sym->param_ = this->param_;
+    return Correlation_sym;
+  }
+  std::string TypeString() const override {
+    return "Correlation";
+  }
+  //  decalre dependency and inplace optimization options
+  std::vector<int> DeclareBackwardDependency(
+    const std::vector<int> &out_grad,
+    const std::vector<int> &in_data,
+    const std::vector<int> &out_data) const override {
+     return {out_grad[Correlation::kOut],
+     out_data[Correlation::kTemp1], out_data[Correlation::kTemp2]};
+}
+  Operator* CreateOperator(Context ctx) const override;
+
+ private:
+  CorrelationParam param_;
+};  //  class CorrelationProp
+#endif
+}  //  namespace op
+}  //  namespace mxnet
+#endif  //  MXNET_OPERATOR_CORRELATION_INL_H_
diff --git a/src/operator/correlation.cc b/src/operator/correlation.cc
index 18a3e1c..2522cd4 100644
--- a/src/operator/correlation.cc
+++ b/src/operator/correlation.cc
@@ -1,175 +1,193 @@
-/*!
- * Copyright (c) 2015 by Contributors
- * \file correlation.cc
- * \brief correlation op
- * \author Xu Dong
-*/
-#include "./correlation-inl.h"
-#include "./mshadow_op.h"
-
-namespace mshadow {
-template<typename Dtype>
-void AddPad(const Tensor<cpu, 4, Dtype> &original,
-            const Tensor<cpu, 4, Dtype> &out,
-            int pad_size)
-{ for (index_t nbatch = 0 ; nbatch < original.size(0) ; nbatch++)
-  for (index_t channel = 0 ; channel < original.size(1) ; channel++)
-    for (index_t h = 0 ; h < original.size(2) ; h++)
-      for (index_t w = 0 ; w < original.size(3) ; w++)
-         out[nbatch][h+pad_size][w+pad_size][channel] = original[nbatch][channel][h][w];
-}
-template<typename Dtype>
-inline void CorrelationForward(const Tensor<cpu, 4, Dtype> &out,
-                               const Tensor<cpu, 4, Dtype> &data1,
-                               const Tensor<cpu, 4, Dtype> &data2,
-                               const Tensor<cpu, 4, Dtype> &tmp1,
-                               const Tensor<cpu, 4, Dtype> &tmp2,
-                               int top_channels_, int top_height_, int top_width_,
-                               int pad_size_, bool is_multiply,
-                               int max_displacement_, int kernel_size_,
-                               int neighborhood_grid_radius_, int neighborhood_grid_width_,
-                               int  kernel_radius_, int stride1_, int stride2_) {
-  const index_t bnum = data1.size(0);
-  const int bchannels = data1.size(1);
-  const int sumelems = kernel_size_ * kernel_size_ * bchannels;
-  AddPad<Dtype>(data1, tmp1, pad_size_);
-  index_t top_channels_unsigned_ = static_cast<index_t>(top_channels_);
-  AddPad<Dtype>(data2, tmp2, pad_size_);
-  for (index_t i = 0 ; i < static_cast<index_t>(top_height_) ; i++)
-      for (index_t j = 0 ; j < static_cast<index_t>(top_width_); j++)
-        for (index_t nbatch = 0 ; nbatch < bnum ; nbatch++) {
-            int x1 = j*stride1_+max_displacement_;
-            int y1 = i*stride1_+max_displacement_;
-            for (index_t top_channel = 0 ; top_channel < top_channels_unsigned_ ; top_channel++) {
-              int s2o = (top_channel % neighborhood_grid_width_ -\
-                         neighborhood_grid_radius_) * stride2_;
-              int s2p = (top_channel / neighborhood_grid_width_ -\
-                         neighborhood_grid_radius_) * stride2_;
-              int x2 = x1 + s2o;
-              int y2 = y1 + s2p;
-              for (index_t h = 0; h < static_cast<index_t>(kernel_size_); h++)
-                for (index_t w = 0; w < static_cast<index_t>(kernel_size_); w++)
-                  for (index_t channel = 0; channel < static_cast<index_t>(bchannels); channel++) {
-                    if (is_multiply == true)
-                        out[nbatch][top_channel][i][j] += \
-                        tmp1[nbatch][y1+h][x1+w][channel]*tmp2[nbatch][y2+h][x2+w][channel];
-                    else
-                        out[nbatch][top_channel][i][j] += \
-                        fabsf(tmp1[nbatch][y1+h][x1+w][channel]-tmp2[nbatch][y2+h][x2+w][channel]);
-                  }
-              out[nbatch][top_channel][i][j] /= sumelems;
-            }
-        }
-}
-template<typename Dtype>
-inline void CorrelationBackward(const Tensor<cpu, 4, Dtype> &out_grad,
-                                const Tensor<cpu, 4, Dtype> &in_grad1,
-                                const Tensor<cpu, 4, Dtype> &in_grad2,
-                                const Tensor<cpu, 4, Dtype> &tmp1,
-                                const Tensor<cpu, 4, Dtype> &tmp2,
-                                int top_channels_, int top_height_,
-                                int top_width_, int pad_size_,
-                                bool is_multiply, int max_displacement_,
-                                int kernel_size_, int neighborhood_grid_radius_,
-                                int neighborhood_grid_width_,
-                                int  kernel_radius_, int stride1_,
-                                int stride2_, int num,
-                                int channels, int height, int width
-                            ) {
-  const float sumelems = kernel_size_ * kernel_size_ * channels;
-  for (index_t i = 0 ; i < static_cast<index_t>(top_height_) ; i++)
-     for (index_t j = 0 ; j < static_cast<index_t>(top_width_); j++)
-        for (index_t nbatch = 0 ; nbatch < static_cast<index_t>(num) ; nbatch++) {
-            int x1 = j*stride1_+max_displacement_;
-            int y1 = i*stride1_+max_displacement_;
-            for (int top_channel = 0 ; top_channel < top_channels_ ; top_channel++) {
-              int s2o = (top_channel % neighborhood_grid_width_ - \
-              neighborhood_grid_radius_) * stride2_;
-              int s2p = (top_channel / neighborhood_grid_width_ - \
-              neighborhood_grid_radius_) * stride2_;
-              int x2 = x1 + s2o;
-              int y2 = y1 + s2p;
-              for (int h = 0; h < kernel_size_; h++)
-                for (int w = 0; w < kernel_size_; w++)
-                  for (int channel = 0 ; channel < channels; channel++) {
-                    if (is_multiply == true) {
-                      if ((y1 +  h - pad_size_ >= 0) && (x1 + w - pad_size_ >= 0) && \
-                      (y1 + h < height +pad_size_) && (x1 + w < width + pad_size_)) {
-                        in_grad1[nbatch][channel][y1+h-pad_size_][x1+w-pad_size_] += \
-                        out_grad[nbatch][top_channel][i][j] * \
-                        tmp2[nbatch][y2+h][x2+w][channel]/sumelems;
-                       }
-                       if ((y2 +  h - pad_size_ >= 0) && (x2 + w -pad_size_ >=0) && \
-                       (y2 + h < height +pad_size_) && (x2 + w < width + pad_size_)) {
-                       in_grad2[nbatch][channel][y2+h-pad_size_][x2+w-pad_size_] += \
-                       out_grad[nbatch][top_channel][i][j] * \
-                       tmp1[nbatch][y1+h][x1+w][channel]/sumelems;
-                       }
-                    } else {
-                      if ((y1 +  h - pad_size_ >= 0) && (x1 + w -pad_size_ >=0) && \
-                      (y1 + h < height + pad_size_) && (x1 + w < width + pad_size_)) {
-                        Dtype sign  = (tmp1[nbatch][y1+h][x1+w][channel] >= \
-                        tmp2[nbatch][y2+h][x2+w][channel])? Dtype(1.0) : Dtype(-1.0);
-                        in_grad1[nbatch][channel][y1+h-pad_size_][x1+w-pad_size_] +=\
-                        out_grad[nbatch][top_channel][i][j]*sign/sumelems;
-                      }
-                      if ((y2 +  h - pad_size_ >= 0) && (x2 + w - pad_size_ >=0) && \
-                      (y2 + h < height + pad_size_) && (x2 + w < width + pad_size_)) {
-                        Dtype sign  = (tmp1[nbatch][y1+h][x1+w][channel] >= \
-                        tmp2[nbatch][y2+h][x2+w][channel])? Dtype(-1.0) : Dtype(1.0);
-                        in_grad2[nbatch][channel][y2+h-pad_size_][x2+w-pad_size_] +=\
-                        out_grad[nbatch][top_channel][i][j]*sign/sumelems;
-                       }
-                    }
-                  }
-               }
-         }
-}
-}  // namespace mshadow
-namespace mxnet {
-namespace op {
-template<>
-Operator *CreateOp<cpu>(CorrelationParam param) {
-  return new CorrelationOp<cpu>(param);
-}
-Operator* CorrelationProp::CreateOperator(Context ctx) const {
-  DO_BIND_DISPATCH(CreateOp, param_);
-}
-DMLC_REGISTER_PARAMETER(CorrelationParam);
-MXNET_REGISTER_OP_PROPERTY(Correlation, CorrelationProp)
-.add_argument("data1", "NDArray-or-Symbol", "Input data1 to the correlation.")
-.add_argument("data2", "NDArray-or-Symbol", "Input data2 to the correlation.")
-.add_arguments(CorrelationParam::__FIELDS__())
-.describe(R"code(Applies correlation to inputs.
-
-The correlation layer performs multiplicative patch comparisons between two feature maps.
-
-Given two multi-channel feature maps :math:`f_{1}, f_{2}`, with :math:`w`, :math:`h`, and :math:`c` being their width, height, and number of channels,
-the correlation layer lets the network compare each patch from :math:`f_{1}` with each patch from :math:`f_{2}`.
-
-For now we consider only a single comparison of two patches. The 'correlation' of two patches centered at :math:`x_{1}` in the first map and
-:math:`x_{2}` in the second map is then defined as:
-
-.. math::
-   c(x_{1}, x_{2}) = \sum_{o \in [-k,k] \times [-k,k]} <f_{1}(x_{1} + o), f_{2}(x_{2} + o)>
-
-for a square patch of size :math:`K:=2k+1`.
-
-Note that the equation above is identical to one step of a convolution in neural networks, but instead of convolving data with a filter, it convolves data with other
-data. For this reason, it has no training weights.
-
-Computing :math:`c(x_{1}, x_{2})` involves :math:`c * K^{2}` multiplications. Comparing all patch combinations involves :math:`w^{2}*h^{2}` such computations.
-
-Given a maximum displacement :math:`d`, for each location :math:`x_{1}` it computes correlations :math:`c(x_{1}, x_{2})` only in a neighborhood of size :math:`D:=2d+1`,
-by limiting the range of :math:`x_{2}`. We use strides :math:`s_{1}, s_{2}`, to quantize :math:`x_{1}` globally and to quantize :math:`x_{2}` within the neighborhood
-centered around :math:`x_{1}`.
-
-The final output is defined by the following expression:
-
-.. math::
-  out[n, q, i, j] = c(x_{i, j}, x_{q})
-
-where :math:`i` and :math:`j` enumerate spatial locations in :math:`f_{1}`, and :math:`q` denotes the :math:`q^{th}` neighborhood of :math:`x_{i,j}`.
-)code" ADD_FILELINE);
-}  // namespace op
-}  // namespace mxnet
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file correlation.cc
+ * \brief correlation op
+ * \author Xu Dong
+*/
+#include "./correlation-inl.h"
+#include "./mshadow_op.h"
+
+namespace mshadow {
+template<typename Dtype>
+void AddPad(const Tensor<cpu, 4, Dtype> &original,
+            const Tensor<cpu, 4, Dtype> &out,
+            int pad_size)
+{ for (index_t nbatch = 0 ; nbatch < original.size(0) ; nbatch++)
+  for (index_t channel = 0 ; channel < original.size(1) ; channel++)
+    for (index_t h = 0 ; h < original.size(2) ; h++)
+      for (index_t w = 0 ; w < original.size(3) ; w++)
+         out[nbatch][h+pad_size][w+pad_size][channel] = original[nbatch][channel][h][w];
+}
+template<typename Dtype>
+inline void CorrelationForward(const Tensor<cpu, 4, Dtype> &out,
+                               const Tensor<cpu, 4, Dtype> &data1,
+                               const Tensor<cpu, 4, Dtype> &data2,
+                               const Tensor<cpu, 4, Dtype> &tmp1,
+                               const Tensor<cpu, 4, Dtype> &tmp2,
+                               int top_channels_, int top_height_, int top_width_,
+                               int pad_size_, bool is_multiply,
+                               int max_displacement_, int kernel_size_,
+                               int neighborhood_grid_radius_, int neighborhood_grid_width_,
+                               int  kernel_radius_, int stride1_, int stride2_) {
+  const index_t bnum = data1.size(0);
+  const int bchannels = data1.size(1);
+  const int sumelems = kernel_size_ * kernel_size_ * bchannels;
+  AddPad<Dtype>(data1, tmp1, pad_size_);
+  index_t top_channels_unsigned_ = static_cast<index_t>(top_channels_);
+  AddPad<Dtype>(data2, tmp2, pad_size_);
+  for (index_t i = 0 ; i < static_cast<index_t>(top_height_) ; i++)
+      for (index_t j = 0 ; j < static_cast<index_t>(top_width_); j++)
+        for (index_t nbatch = 0 ; nbatch < bnum ; nbatch++) {
+            int x1 = j*stride1_+max_displacement_;
+            int y1 = i*stride1_+max_displacement_;
+            for (index_t top_channel = 0 ; top_channel < top_channels_unsigned_ ; top_channel++) {
+              int s2o = (top_channel % neighborhood_grid_width_ -\
+                         neighborhood_grid_radius_) * stride2_;
+              int s2p = (top_channel / neighborhood_grid_width_ -\
+                         neighborhood_grid_radius_) * stride2_;
+              int x2 = x1 + s2o;
+              int y2 = y1 + s2p;
+              for (index_t h = 0; h < static_cast<index_t>(kernel_size_); h++)
+                for (index_t w = 0; w < static_cast<index_t>(kernel_size_); w++)
+                  for (index_t channel = 0; channel < static_cast<index_t>(bchannels); channel++) {
+                    if (is_multiply == true)
+                        out[nbatch][top_channel][i][j] += \
+                        tmp1[nbatch][y1+h][x1+w][channel]*tmp2[nbatch][y2+h][x2+w][channel];
+                    else
+                        out[nbatch][top_channel][i][j] += \
+                        fabsf(tmp1[nbatch][y1+h][x1+w][channel]-tmp2[nbatch][y2+h][x2+w][channel]);
+                  }
+              out[nbatch][top_channel][i][j] /= sumelems;
+            }
+        }
+}
+template<typename Dtype>
+inline void CorrelationBackward(const Tensor<cpu, 4, Dtype> &out_grad,
+                                const Tensor<cpu, 4, Dtype> &in_grad1,
+                                const Tensor<cpu, 4, Dtype> &in_grad2,
+                                const Tensor<cpu, 4, Dtype> &tmp1,
+                                const Tensor<cpu, 4, Dtype> &tmp2,
+                                int top_channels_, int top_height_,
+                                int top_width_, int pad_size_,
+                                bool is_multiply, int max_displacement_,
+                                int kernel_size_, int neighborhood_grid_radius_,
+                                int neighborhood_grid_width_,
+                                int  kernel_radius_, int stride1_,
+                                int stride2_, int num,
+                                int channels, int height, int width
+                            ) {
+  const float sumelems = kernel_size_ * kernel_size_ * channels;
+  for (index_t i = 0 ; i < static_cast<index_t>(top_height_) ; i++)
+     for (index_t j = 0 ; j < static_cast<index_t>(top_width_); j++)
+        for (index_t nbatch = 0 ; nbatch < static_cast<index_t>(num) ; nbatch++) {
+            int x1 = j*stride1_+max_displacement_;
+            int y1 = i*stride1_+max_displacement_;
+            for (int top_channel = 0 ; top_channel < top_channels_ ; top_channel++) {
+              int s2o = (top_channel % neighborhood_grid_width_ - \
+              neighborhood_grid_radius_) * stride2_;
+              int s2p = (top_channel / neighborhood_grid_width_ - \
+              neighborhood_grid_radius_) * stride2_;
+              int x2 = x1 + s2o;
+              int y2 = y1 + s2p;
+              for (int h = 0; h < kernel_size_; h++)
+                for (int w = 0; w < kernel_size_; w++)
+                  for (int channel = 0 ; channel < channels; channel++) {
+                    if (is_multiply == true) {
+                      if ((y1 +  h - pad_size_ >= 0) && (x1 + w - pad_size_ >= 0) && \
+                      (y1 + h < height +pad_size_) && (x1 + w < width + pad_size_)) {
+                        in_grad1[nbatch][channel][y1+h-pad_size_][x1+w-pad_size_] += \
+                        out_grad[nbatch][top_channel][i][j] * \
+                        tmp2[nbatch][y2+h][x2+w][channel]/sumelems;
+                       }
+                       if ((y2 +  h - pad_size_ >= 0) && (x2 + w -pad_size_ >=0) && \
+                       (y2 + h < height +pad_size_) && (x2 + w < width + pad_size_)) {
+                       in_grad2[nbatch][channel][y2+h-pad_size_][x2+w-pad_size_] += \
+                       out_grad[nbatch][top_channel][i][j] * \
+                       tmp1[nbatch][y1+h][x1+w][channel]/sumelems;
+                       }
+                    } else {
+                      if ((y1 +  h - pad_size_ >= 0) && (x1 + w -pad_size_ >=0) && \
+                      (y1 + h < height + pad_size_) && (x1 + w < width + pad_size_)) {
+                        Dtype sign  = (tmp1[nbatch][y1+h][x1+w][channel] >= \
+                        tmp2[nbatch][y2+h][x2+w][channel])? Dtype(1.0) : Dtype(-1.0);
+                        in_grad1[nbatch][channel][y1+h-pad_size_][x1+w-pad_size_] +=\
+                        out_grad[nbatch][top_channel][i][j]*sign/sumelems;
+                      }
+                      if ((y2 +  h - pad_size_ >= 0) && (x2 + w - pad_size_ >=0) && \
+                      (y2 + h < height + pad_size_) && (x2 + w < width + pad_size_)) {
+                        Dtype sign  = (tmp1[nbatch][y1+h][x1+w][channel] >= \
+                        tmp2[nbatch][y2+h][x2+w][channel])? Dtype(-1.0) : Dtype(1.0);
+                        in_grad2[nbatch][channel][y2+h-pad_size_][x2+w-pad_size_] +=\
+                        out_grad[nbatch][top_channel][i][j]*sign/sumelems;
+                       }
+                    }
+                  }
+               }
+         }
+}
+}  // namespace mshadow
+namespace mxnet {
+namespace op {
+template<>
+Operator *CreateOp<cpu>(CorrelationParam param) {
+  return new CorrelationOp<cpu>(param);
+}
+Operator* CorrelationProp::CreateOperator(Context ctx) const {
+  DO_BIND_DISPATCH(CreateOp, param_);
+}
+DMLC_REGISTER_PARAMETER(CorrelationParam);
+MXNET_REGISTER_OP_PROPERTY(Correlation, CorrelationProp)
+.add_argument("data1", "NDArray-or-Symbol", "Input data1 to the correlation.")
+.add_argument("data2", "NDArray-or-Symbol", "Input data2 to the correlation.")
+.add_arguments(CorrelationParam::__FIELDS__())
+.describe(R"code(Applies correlation to inputs.
+
+The correlation layer performs multiplicative patch comparisons between two feature maps.
+
+Given two multi-channel feature maps :math:`f_{1}, f_{2}`, with :math:`w`, :math:`h`, and :math:`c` being their width, height, and number of channels,
+the correlation layer lets the network compare each patch from :math:`f_{1}` with each patch from :math:`f_{2}`.
+
+For now we consider only a single comparison of two patches. The 'correlation' of two patches centered at :math:`x_{1}` in the first map and
+:math:`x_{2}` in the second map is then defined as:
+
+.. math::
+   c(x_{1}, x_{2}) = \sum_{o \in [-k,k] \times [-k,k]} <f_{1}(x_{1} + o), f_{2}(x_{2} + o)>
+
+for a square patch of size :math:`K:=2k+1`.
+
+Note that the equation above is identical to one step of a convolution in neural networks, but instead of convolving data with a filter, it convolves data with other
+data. For this reason, it has no training weights.
+
+Computing :math:`c(x_{1}, x_{2})` involves :math:`c * K^{2}` multiplications. Comparing all patch combinations involves :math:`w^{2}*h^{2}` such computations.
+
+Given a maximum displacement :math:`d`, for each location :math:`x_{1}` it computes correlations :math:`c(x_{1}, x_{2})` only in a neighborhood of size :math:`D:=2d+1`,
+by limiting the range of :math:`x_{2}`. We use strides :math:`s_{1}, s_{2}`, to quantize :math:`x_{1}` globally and to quantize :math:`x_{2}` within the neighborhood
+centered around :math:`x_{1}`.
+
+The final output is defined by the following expression:
+
+.. math::
+  out[n, q, i, j] = c(x_{i, j}, x_{q})
+
+where :math:`i` and :math:`j` enumerate spatial locations in :math:`f_{1}`, and :math:`q` denotes the :math:`q^{th}` neighborhood of :math:`x_{i,j}`.
+)code" ADD_FILELINE);
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/correlation.cu b/src/operator/correlation.cu
index b26ae04..149d73f 100644
--- a/src/operator/correlation.cu
+++ b/src/operator/correlation.cu
@@ -1,609 +1,628 @@
-/*!
- * Copyright [2016] <Contributors>
- * \file Correation.cu
- * \brief  Correlation operator
- * \author Xu Dong
-*/
-#include "./correlation-inl.h"
-#include <mshadow/tensor.h>
-#include <mshadow/cuda/reduce.cuh>
-#include <algorithm>
-#include <vector>
-
-#define ROUND_OFF 50000
-#define WARPS_PER_BLOCK 1
-#define THREADS_PER_WARP 32
-#define CORRELATION_CUDA_CHECK(condition) \
-  /* Code block avoids redefinition of cudaError_t error */ \
-  do { \
-    cudaError_t error = condition; \
-    CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
-  } while (0)
-#define CUDA_KERNEL_LOOP(i, n) \
-for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
-      i < (n); \
-      i += blockDim.x * gridDim.x)
-namespace mshadow {
-namespace cuda {
-// == Correlation Kernel
-template <typename Dtype>
-__global__ void CorrelateData(const int nthreads, int num, int topwidth,
-  int topheight, int topchannels, int topcount,
-  int max_displacement, int neighborhood_grid_radius,
-  int neighborhood_grid_width, int kernel_radius, int kernel_size, int stride1, int stride2,
-  int bottomwidth, int bottomheight, int bottomchannels,
-  const Dtype *bottom0, const Dtype *bottom1, Dtype *top) {
-  extern __shared__ char patch_data_char[];
-  Dtype *patch_data = reinterpret_cast<Dtype *>(patch_data_char);
-  //  First (upper left) position of kernel upper-left corner
-  //  in current center position of neighborhood in image 1
-  int x1 = blockIdx.x * stride1 + max_displacement;
-  int y1 = blockIdx.y * stride1 + max_displacement;
-  int item = blockIdx.z;
-  int ch_off = threadIdx.x;
-  //  Load 3D patch into shared shared memory
-  for (int j = 0; j < kernel_size; j++) {  //  HEIGHT
-    for (int i = 0; i < kernel_size; i++) {  //  WIDTH
-      int ji_off = ((j * kernel_size) + i) * bottomchannels;
-      for (int ch = ch_off; ch < bottomchannels; ch += (THREADS_PER_WARP * WARPS_PER_BLOCK))  {
-          //  CHANNELS
-          int idx1 = ((item * bottomheight + y1+j) * bottomwidth + x1+i) * bottomchannels + ch;
-          int idxPatchData = ji_off + ch;
-          patch_data[idxPatchData] = bottom0[idx1];
-      }
-    }
-  }
-  __syncthreads();
-  __shared__ Dtype sum[THREADS_PER_WARP * WARPS_PER_BLOCK];
-  //  Compute correlation
-  for (int top_channel = 0; top_channel < topchannels; top_channel++) {
-    sum[ch_off] = 0;
-    int s2o = (top_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride2;
-    int s2p = (top_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride2;
-    for (int j = 0; j < kernel_size; j++) {  //  HEIGHT
-      for (int i = 0; i < kernel_size; i++) {  //  WIDTH
-        int ji_off = ((j * kernel_size) + i) * bottomchannels;
-        for (int ch = ch_off; ch < bottomchannels; ch += (THREADS_PER_WARP * WARPS_PER_BLOCK)) {
-          //  CHANNELS
-          int x2 = x1 + s2o;
-          int y2 = y1 + s2p;
-          int idxPatchData = ji_off + ch;
-          int idx2 = ((item * bottomheight + y2 + j) * bottomwidth + x2 + i) * bottomchannels + ch;
-          sum[ch_off] += patch_data[idxPatchData] * bottom1[idx2];
-        }
-      }
-    }
-    __syncthreads();
-    if (ch_off == 0) {
-        Dtype total_sum = 0;
-        for (int idx = 0; idx < THREADS_PER_WARP * WARPS_PER_BLOCK; idx++) {
-            total_sum += sum[idx];
-        }
-        const int sumelems = kernel_size * kernel_size * bottomchannels;
-        const int index = ((top_channel * topheight + blockIdx.y) * topwidth) + blockIdx.x;
-        top[index + item*topcount] = total_sum / static_cast<float>(sumelems);
-    }  //  Aggregate result of  different threads
-  }
-}
-//  == Correlation Backward Pass Kernel (For data1)
-template <typename Dtype>
-__global__ void CorrelateDataBackward0(const int nthreads, int num, int item,
-  int topwidth, int topheight, int topchannels,
-  int max_displacement, int neighborhood_grid_radius,
-  int neighborhood_grid_width, int kernel_radius, int stride1, int stride2,
-  int bottomwidth, int bottomheight, int pbottomwidth, int pbottomheight,
-  int bottomchannels, int bottomcount, int pad_size,
-  Dtype *bottom0diff, const Dtype *bottom1, const Dtype *topdiff) {
-  CUDA_KERNEL_LOOP(index, nthreads) {
-    int n = index % bottomchannels;  //  channels
-    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
-    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
-    //  Get X,Y ranges and clamp
-    //  round_off is a trick to enable integer division with ceil, even for negative numbers
-    //  We use a large offset, for the inner part not to become negative.
-    const int round_off = ROUND_OFF;
-    const int round_off_s1 = stride1 * round_off;
-    //  We add round_off before_s1 the int division and subtract round_off after it,
-    //  to ensure the formula matches ceil behavior:
-    int xmin = (l - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
-     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
-    int ymin = (m - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
-     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
-    //  Same here:
-    int xmax = (l - max_displacement + round_off_s1) / stride1 - round_off;
-    //  floor (l - max_displacement) / stride1
-    int ymax = (m - max_displacement + round_off_s1) / stride1 - round_off;
-    //  floor (m - max_displacement) / stride1
-    Dtype sum = 0;
-    if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth-1) && (ymin <= topheight-1)) {
-        xmin = max(0, xmin);
-        xmax = min(topwidth-1, xmax);
-        ymin = max(0, ymin);
-        ymax = min(topheight-1, ymax);
-        for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
-          for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
-            //  Get bottom1 data:
-            int s2o = stride2 * o;
-            int s2p = stride2 * p;
-            int idxbot1 = ((item * pbottomheight + (m + s2p)) * pbottomwidth + (l + s2o))\
-             * bottomchannels + n;
-            Dtype bot1tmp = bottom1[idxbot1];  // bottom1[l+s2o,m+s2p,n]
-            //  Index offset for topdiff in following loops:
-            int op = (p+neighborhood_grid_radius) * neighborhood_grid_width\
-             + (o + neighborhood_grid_radius);  //  index [o,p]
-            int idxopoffset = (item * topchannels + op);
-            for (int y = ymin; y <= ymax; y++) {
-              for (int x = xmin; x <= xmax; x++) {
-                int idxtopdiff = (idxopoffset * topheight + y) * topwidth + x;  //  topdiff[x,y,o,p]
-                sum += topdiff[idxtopdiff] * bot1tmp;
-              }
-            }
-          }
-        }
-    }
-    const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2+1) * bottomchannels;
-    const int bot0index = ((n * bottomheight) + (m-pad_size)) * bottomwidth + (l-pad_size);
-    bottom0diff[bot0index + item * bottomcount] = sum / static_cast<float>(sumelems);
-  }
-}
-// == Correlation Backward Pass Kernel (For Blob 1)
-template <typename Dtype>
-__global__ void CorrelateDataBackward1(const int nthreads,
-  int num, int item, int topwidth, int topheight, int topchannels,
-  int max_displacement, int neighborhood_grid_radius,
-  int neighborhood_grid_width, int kernel_radius, int stride1, int stride2,
-  int bottomwidth, int bottomheight, int pbottomwidth, int pbottomheight,
-  int bottomchannels, int bottomcount, int pad_size,
-  const Dtype *bottom0, Dtype *bottom1diff, const Dtype *topdiff) {
-  CUDA_KERNEL_LOOP(index, nthreads) {
-    //  int l = index % bottomwidth + pad_size; //w-pos
-    //  int m = (index / bottomwidth) % bottomheight + pad_size; //  h-pos
-    //  int n = (index / bottomwidth / bottomheight) % bottomchannels; //  channels
-    int n = index % bottomchannels;  //  channels
-    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
-    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
-    //  round_off is a trick to enable integer division with ceil, even for negative numbers
-    //  We use a large offset, for the inner part not to become negative.
-    const int round_off = ROUND_OFF;
-    const int round_off_s1 = stride1 * round_off;
-    Dtype sum = 0;
-    for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
-      for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
-        int s2o = stride2 * o;
-        int s2p = stride2 * p;
-        //  Get X,Y ranges and clamp
-        //  We add round_off before_s1 the int division and subtract round_off after it,
-        //  to ensure the formula matches ceil behavior:
-        int xmin = (l - 2*kernel_radius - max_displacement - s2o + round_off_s1 - 1)\
-         / stride1 + 1 - round_off;
-         // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
-        int ymin = (m - 2*kernel_radius - max_displacement - s2p + round_off_s1 - 1)\
-         / stride1 + 1 - round_off;
-        // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
-        //  Same here:
-        int xmax = (l - max_displacement - s2o + round_off_s1) / stride1 - round_off;
-        //  floor (l - max_displacement - s2o) / stride1
-        int ymax = (m - max_displacement - s2p + round_off_s1) / stride1 - round_off;
-        //  floor (m - max_displacement - s2p) / stride1
-        if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth - 1) && (ymin <= topheight - 1)) {
-            xmin = max(0, xmin);
-            xmax = min(topwidth-1, xmax);
-            ymin = max(0, ymin);
-            ymax = min(topheight-1, ymax);
-            //  Get bottom0 data:
-            int idxbot0 = ((item * pbottomheight + (m - s2p)) \
-            * pbottomwidth + (l - s2o)) * bottomchannels + n;
-            Dtype bot0tmp = bottom0[idxbot0];  //  bottom1[l+s2o,m+s2p,n]
-            //  Index offset for topdiff in following loops:
-            int op = (p+neighborhood_grid_radius) * \
-            neighborhood_grid_width + (o+neighborhood_grid_radius);  //  index [o,p]
-            int idxOpOffset = (item * topchannels + op);
-            for (int y = ymin; y <= ymax; y++) {
-              for (int x = xmin; x <= xmax; x++) {
-                int idxtopdiff = (idxOpOffset * topheight + y)\
-                 * topwidth + x;  //  topdiff[x,y,o,p]
-                sum += topdiff[idxtopdiff] * bot0tmp;
-              }
-            }
-        }
-      }
-    }
-    const int sumelems = (kernel_radius*2+1)*(kernel_radius*2+1)*bottomchannels;
-    const int bot1index = ((n * bottomheight) + (m - pad_size)) * bottomwidth + (l - pad_size);
-    bottom1diff[bot1index + item * bottomcount] = sum / static_cast<float>(sumelems);
-  }
-}
-// == Correlation Kernel Subtraction
-template <typename Dtype>
-__global__ void CorrelateDataSubtract(const int nthreads, int num, int item,
-  int topwidth, int topheight, int topchannels, int topcount,
-  int max_displacement, int neighborhood_grid_radius,
-  int neighborhood_grid_width, int kernel_radius, int stride1, int stride2,
-  int bottomwidth, int bottomheight, int bottomchannels,
-  const Dtype *bottom0, const Dtype *bottom1, Dtype *top) {
-  CUDA_KERNEL_LOOP(index, nthreads) {
-    int x = index % topwidth;  //  w-pos
-    int y = (index / topwidth) % topheight;  //  h-pos
-    int c = (index / topwidth / topheight) % topchannels;  //  channels
-    //  Offset of patch in image 2
-    int s2o = (c % neighborhood_grid_width - neighborhood_grid_radius) * stride2;
-    int s2p = (c / neighborhood_grid_width - neighborhood_grid_radius) * stride2;
-    //  First (upper left) position of kernel center in current neighborhood in image 1
-    int x1 = x*stride1 + kernel_radius + max_displacement;
-    int y1 = y*stride1 + kernel_radius + max_displacement;
-    //  Iterate through 3D patch
-    Dtype sum = 0;
-    for (int j = -kernel_radius; j <= kernel_radius; j++) {  //  HEIGHT
-      for (int i = -kernel_radius; i <= kernel_radius; i++) {  //  WIDTH
-        for (int l = 0; l < bottomchannels; l++) {  //  CHANNELS
-          //  Calculate position in image 2
-          int x2 = x1 + s2o;
-          int y2 = y1 + s2p;
-          //  Indices in bottom data: (CH=l,W=x2,H=y2,N)
-          int idx1 = ((item * bottomheight + y1 + j) * bottomwidth + x1 + i) \
-          * bottomchannels + l;
-          int idx2 = ((item * bottomheight + y2 + j) * bottomwidth + x2 + i) \
-          * bottomchannels + l;
-          //  Do the correlation:
-          sum += fabsf(bottom0[idx1] - bottom1[idx2]);
-        }
-      }
-    }
-    const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * bottomchannels;
-    top[index + item * topcount] = sum / static_cast<float>(sumelems);
-  }
-}
-//  == Correlation Backward Pass Kernel (For Blob 0)
-template <typename Dtype>
-__global__ void CorrelateDataBackward0Subtract(const int nthreads, int num,
-  int item, int topwidth, int topheight, int topchannels,
-  int max_displacement, int neighborhood_grid_radius,
-  int neighborhood_grid_width, int kernel_radius,
-  int stride1, int stride2, int bottomwidth, int bottomheight,
-  int pbottomwidth, int pbottomheight,
-  int bottomchannels, int bottomcount, int pad_size,
-  Dtype *bottom0diff, const Dtype *bottom0, const Dtype *bottom1, const Dtype *topdiff) {
-  CUDA_KERNEL_LOOP(index, nthreads) {
-    int n = index % bottomchannels;  //  channels
-    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
-    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
-    //  Get X,Y ranges and clamp
-    //  round_off is a trick to enable integer division with ceil, even for negative numbers
-    //  We use a large offset, for the inner part not to become negative.
-    const int round_off = ROUND_OFF;
-    const int round_off_s1 = stride1 * round_off;
-    int idxbot0 = ((item * pbottomheight + m) * pbottomwidth + l)\
-             * bottomchannels + n;
-    //  We add round_off before_s1 the int division and subtract round_off after it,
-    //  to ensure the formula matches ceil behavior:
-    int xmin = (l - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
-     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
-    int ymin = (m - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
-     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
-    //  Same here:
-    int xmax = (l - max_displacement + round_off_s1) / stride1 - round_off;
-    //  floor (l - max_displacement) / stride1
-    int ymax = (m - max_displacement + round_off_s1) / stride1 - round_off;
-    //  floor (m - max_displacement) / stride1
-    Dtype sum = 0;
-    if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth-1) && (ymin <= topheight-1)) {
-        xmin = max(0, xmin);
-        xmax = min(topwidth-1, xmax);
-        ymin = max(0, ymin);
-        ymax = min(topheight-1, ymax);
-        for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
-          for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
-            //  Get bottom1 data:
-            int s2o = stride2 * o;
-            int s2p = stride2 * p;
-            int idxbot1 = ((item * pbottomheight + (m+s2p)) * pbottomwidth\
-             + (l+s2o)) * bottomchannels + n;
-            Dtype bot0tmp = bottom0[idxbot0];
-            Dtype bot1tmp = bottom1[idxbot1];
-            Dtype sign = (bot0tmp >= bot1tmp) ? Dtype(1.0) : Dtype(-1.0);
-            //  Index offset for topdiff in following loops:
-            int op = (p+neighborhood_grid_radius) * neighborhood_grid_width\
-             + (o + neighborhood_grid_radius);  //  index [o,p]
-            int idxopoffset = (item * topchannels + op);
-            for (int y = ymin; y <= ymax; y++) {
-              for (int x = xmin; x <= xmax; x++) {
-                int idxtopdiff = (idxopoffset * topheight + y) * topwidth + x;  //  topdiff[x,y,o,p]
-                sum += topdiff[idxtopdiff] * sign;
-              }
-            }
-          }
-        }
-    }
-    const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2+1) * bottomchannels;
-    const int bot0index = ((n * bottomheight) + (m-pad_size)) * bottomwidth + (l-pad_size);
-    bottom0diff[bot0index + item * bottomcount] = sum / static_cast<float>(sumelems);
-  }
-}
-//  == Correlation Backward Pass Kernel (For Blob 1)
-template <typename Dtype>
-__global__ void CorrelateDataBackward1Subtract(const int nthreads, int num,
-  int item, int topwidth, int topheight, int topchannels,
-  int max_displacement, int neighborhood_grid_radius,
-  int neighborhood_grid_width, int kernel_radius,
-  int stride1, int stride2, int bottomwidth, int bottomheight,
-  int pbottomwidth, int pbottomheight, int bottomchannels,
-  int bottomcount, int pad_size, const Dtype *bottom0,
-  const Dtype *bottom1, Dtype *bottom1diff, const Dtype *topdiff) {
-    CUDA_KERNEL_LOOP(index, nthreads) {
-    //  int l = index % bottomwidth + pad_size; //w-pos
-    //  int m = (index / bottomwidth) % bottomheight + pad_size; //  h-pos
-    //  int n = (index / bottomwidth / bottomheight) % bottomchannels; //  channels
-    int n = index % bottomchannels;  //  channels
-    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
-    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
-    //  round_off is a trick to enable integer division with ceil, even for negative numbers
-    //  We use a large offset, for the inner part not to become negative.
-    const int round_off = ROUND_OFF;
-    const int round_off_s1 = stride1 * round_off;
-    Dtype sum = 0;
-    int idxbot1 = ((item * pbottomheight + m) * pbottomwidth + l)\
-             * bottomchannels + n;
-    for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
-      for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
-        int s2o = stride2 * o;
-        int s2p = stride2 * p;
-        //  Get X,Y ranges and clamp
-        //  We add round_off before_s1 the int division and subtract round_off after it,
-        //  to ensure the formula matches ceil behavior:
-        int xmin = (l - 2*kernel_radius - max_displacement - s2o + round_off_s1 - 1)\
-         / stride1 + 1 - round_off;
-         // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
-        int ymin = (m - 2*kernel_radius - max_displacement - s2p + round_off_s1 - 1)\
-         / stride1 + 1 - round_off;
-        // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
-        //  Same here:
-        int xmax = (l - max_displacement - s2o + round_off_s1) / stride1 - round_off;
-        //  floor (l - max_displacement - s2o) / stride1
-        int ymax = (m - max_displacement - s2p + round_off_s1) / stride1 - round_off;
-        //  floor (m - max_displacement - s2p) / stride1
-        if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth - 1) && (ymin <= topheight - 1)) {
-            xmin = max(0, xmin);
-            xmax = min(topwidth-1, xmax);
-            ymin = max(0, ymin);
-            ymax = min(topheight-1, ymax);
-            //  Get bottom0 data:
-            int idxbot0 = ((item * pbottomheight + (m - s2p)) * pbottomwidth + (l - s2o))\
-             * bottomchannels + n;
-            //  bottom0[l+s2o,m+s2p,n]
-            Dtype bot0tmp = bottom0[idxbot0];
-            Dtype bot1tmp = bottom1[idxbot1];
-            Dtype sign = (bot0tmp >= bot1tmp) ? Dtype(-1.0) : Dtype(1.0);
-            //  Index offset for topdiff in following loops:
-            int op = (p+neighborhood_grid_radius) * \
-            neighborhood_grid_width + (o+neighborhood_grid_radius);  //  index [o,p]
-            int idxOpOffset = (item * topchannels + op);
-            for (int y = ymin; y <= ymax; y++) {
-              for (int x = xmin; x <= xmax; x++) {
-                int idxtopdiff = (idxOpOffset * topheight + y)\
-                 * topwidth + x;  //  topdiff[x,y,o,p]
-                sum += topdiff[idxtopdiff] * sign;
-              }
-            }
-        }
-      }
-    }
-    const int sumelems = (kernel_radius*2+1)*(kernel_radius*2+1)*bottomchannels;
-    const int bot1index = ((n * bottomheight) + (m - pad_size)) * bottomwidth + (l - pad_size);
-    bottom1diff[bot1index + item * bottomcount] = sum / static_cast<float>(sumelems);
-  }
-}
-//  == Forward
-//  == Dimension rearrangement Kernel
-template <typename Dtype>
-__global__ void blob_rearrange_kernel2(const Dtype* in, Dtype* out, int num,
-int channels, int width, int height, int widthheight, int padding, int pwidthheight) {
-    //  change shape from [batchsize,channel,y,x] to [batchsize,y,x,channel]
-    int xy = blockIdx.x * blockDim.x + threadIdx.x;
-    if (xy >= widthheight )
-        return;
-    int ch = blockIdx.y;
-    int n  = blockIdx.z;
-    Dtype value = in[(n * channels + ch) * widthheight + xy];
-    __syncthreads();
-    int xpad  = (xy % width + padding);
-    int ypad  = (xy / width + padding);
-    int xypad = ypad * (width + 2 * padding) + xpad;
-    out[(n * pwidthheight + xypad) * channels + ch] = value;
-}
-template <typename Dtype>
-void Forward_gpu(
-      const Tensor<gpu, 4, Dtype> &out,
-      const Tensor<gpu, 4, Dtype> &data1,
-      const Tensor<gpu, 4, Dtype> &data2,
-      const Tensor<gpu, 4, Dtype> &tmp1,
-      const Tensor<gpu, 4, Dtype> &tmp2,
-      int top_channels_, int top_height_, int top_width_, int pad_size_,
-      bool is_multiply, int max_displacement_, int kernel_size_,
-      int neighborhood_grid_radius_, int neighborhood_grid_width_,
-      int  kernel_radius_, int stride1_, int stride2_, cudaStream_t stream,
-      cudaStream_t stream_tmp1, cudaStream_t stream_tmp2) {
-    const Dtype *bottom_data1 = data1.dptr_;
-    const Dtype *bottom_data2 = data2.dptr_;
-    Dtype *rbot1 = tmp1.dptr_;
-    Dtype *rbot2 = tmp2.dptr_;
-    Dtype *top = out.dptr_;
-    const int bnum = data1.size(0);
-    const int bchannels = data1.size(1);
-    const int bheight = data1.size(2);
-    const int bwidth = data1.size(3);
-    const int bwidthheight = bwidth * bheight;
-    const int topcount = top_width_ * top_height_ * top_channels_;
-    dim3 threadsPerBlock(THREADS_PER_WARP * WARPS_PER_BLOCK);
-    int threads_per_block = 16;
-    dim3 totalBlocksRearr((bwidthheight - 1) / threads_per_block + 1, bchannels, bnum);
-    const int pwidthheight = (bwidth + 2 * pad_size_) * (bheight + 2 * pad_size_);
-    blob_rearrange_kernel2<Dtype><<<totalBlocksRearr, threads_per_block, 0, stream_tmp1>>>
-    (bottom_data1, rbot1, bnum, bchannels, bwidth, bheight, bwidthheight, pad_size_, pwidthheight);
-    blob_rearrange_kernel2<Dtype><<<totalBlocksRearr, threads_per_block, 0, stream_tmp2>>>
-    (bottom_data2, rbot2, bnum, bchannels, bwidth, bheight, bwidthheight, pad_size_, pwidthheight);
-    const int num = bnum;
-    const int channels = bchannels;
-    const int height = bheight + 2 * pad_size_;
-    const int width = bwidth + 2 * pad_size_;
-    const int shared_memory_per_block = (kernel_size_ * kernel_size_) * bchannels;
-    if (is_multiply == true) {
-        //  CorrelationLayer
-        int topThreadCount = topcount;
-        dim3 totalBlocksCorr(top_width_, top_height_, num);
-        CorrelateData<Dtype><<<totalBlocksCorr, threadsPerBlock,
-        shared_memory_per_block * sizeof(Dtype), stream>>>(
-            topThreadCount,
-            num, top_width_, top_height_, top_channels_, topcount,
-            max_displacement_, neighborhood_grid_radius_,
-            neighborhood_grid_width_, kernel_radius_, kernel_size_,
-            stride1_, stride2_,
-            width, height, channels,
-            rbot1, rbot2, top);
-        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
-    } else {
-        //  CorrelationLayer
-        for (int n = 0; n < num; n++) {
-            int topThreadCount = topcount;
-            const int gridSize = (topThreadCount + kMaxThreadsPerBlock - 1)\
-             / kMaxThreadsPerBlock;
-            CorrelateDataSubtract<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream>>>(
-                topThreadCount,
-                num, n, top_width_, top_height_, top_channels_, topcount,
-                max_displacement_, neighborhood_grid_radius_,
-                neighborhood_grid_width_, kernel_radius_,
-                stride1_, stride2_, width, height, channels, rbot1, rbot2, top);
-         CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
-        }
-    }
-}
-template <typename Dtype>
-void Backward_gpu(
-       const Tensor<gpu, 4, Dtype> &out_grad,
-      const Tensor<gpu, 4, Dtype> &in_grad1,
-      const Tensor<gpu, 4, Dtype> &in_grad2,
-      const Tensor<gpu, 4, Dtype> &tmp1,
-      const Tensor<gpu, 4, Dtype> &tmp2,
-      int top_channels_, int top_height_,
-      int top_width_, int pad_size_, bool is_multiply,
-      int max_displacement_, int kernel_size_,
-      int neighborhood_grid_radius_, int neighborhood_grid_width_,
-      int  kernel_radius_, int stride1_, int stride2_,
-      cudaStream_t stream0, cudaStream_t stream1,
-      int num, int channels, int height, int width) {
-    //  Get top diff, compute bottom diff
-    const Dtype* top_diff = out_grad.dptr_;
-    Dtype* bottom0_diff = in_grad1.dptr_;
-    Dtype* bottom1_diff = in_grad2.dptr_;
-    const Dtype* rbot1 = tmp1.dptr_;
-    const Dtype* rbot2 = tmp2.dptr_;
-    const int paddedheight = height + 2 * pad_size_;
-    const int paddedwidth = width + 2 * pad_size_;
-    const int bottomcount = channels * height * width;
-    int botThreadCount = bottomcount;
-    const int gridSize = (botThreadCount + kMaxThreadsPerBlock - 1) / kMaxThreadsPerBlock;
-    //  CorrelationLayerBackward
-    if (is_multiply == true) {
-        //  == Run kernel Backward 0
-        dim3 totalBlocksBackward0(width, height, channels * num);  //  First dim is fastest
-        const int buffer_size_backw0 = \
-        (static_cast<int>(ceil(static_cast<float>(2 * kernel_radius_)\
-         / static_cast<float>(stride1_))) + 1) * top_channels_;
-        //  == Run kernel Backward 0
-        for (int n = 0; n < num; n++) {
-        CorrelateDataBackward0<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream0>>>(
-            botThreadCount,
-            num, n, top_width_, top_height_, top_channels_,
-            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
-            stride1_, stride2_,
-            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
-            bottom0_diff, rbot2, top_diff);
-        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
-        }
-        //  == Run kernel Backward 1
-        for (int n = 0; n < num; n++) {
-        CorrelateDataBackward1<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream1>>>(
-            botThreadCount,
-            num, n, top_width_, top_height_, top_channels_,
-            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
-            stride1_, stride2_,
-            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
-            rbot1, bottom1_diff, top_diff);
-       CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
-        }
-    } else  {
-        for (int n = 0; n < num; n++) {
-        //  Bottom0:
-        CorrelateDataBackward0Subtract<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream0>>>(
-            botThreadCount,
-            num, n, top_width_, top_height_, top_channels_,
-            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
-            stride1_, stride2_,
-            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
-            bottom0_diff, rbot1, rbot2, top_diff);
-        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
-        }
-        for (int n = 0; n < num; n++) {
-        //  Bottom1:
-        CorrelateDataBackward1Subtract<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream1>>>(
-            botThreadCount,
-            num, n, top_width_, top_height_, top_channels_,
-            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
-            stride1_, stride2_,
-            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
-            rbot1, rbot2, bottom1_diff, top_diff);
-        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
-        }
-    }
-}
-}  // namespace cuda
-template<typename Dtype>
-inline void CorrelationForward(const Tensor<gpu, 4, Dtype> &out,
-                               const Tensor<gpu, 4, Dtype> &data1,
-                               const Tensor<gpu, 4, Dtype> &data2,
-                               const Tensor<gpu, 4, Dtype> &tmp1,
-                               const Tensor<gpu, 4, Dtype> &tmp2,
-                               int top_channels_, int top_height_,
-                               int top_width_, int pad_size_, bool is_multiply,
-                               int max_displacement_, int kernel_size_,
-                               int neighborhood_grid_radius_, int neighborhood_grid_width_,
-                               int kernel_radius_, int stride1_, int stride2_
-                           ) {
-  cudaStream_t stream = Stream<gpu>::GetStream(out.stream_);
-  cudaStream_t stream_tmp1 = Stream<gpu>::GetStream(tmp1.stream_);
-  cudaStream_t stream_tmp2 = Stream<gpu>::GetStream(tmp2.stream_);
-  cuda::Forward_gpu(out, data1, data2, tmp1, tmp2, top_channels_, top_height_,
-                    top_width_, pad_size_, is_multiply, max_displacement_, kernel_size_,
-                    neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
-                    stride1_, stride2_, stream, stream_tmp1, stream_tmp2);
-}
-
-template<typename Dtype>
-inline void CorrelationBackward(const Tensor<gpu, 4, Dtype> &out_grad,
-                            const Tensor<gpu, 4, Dtype> &in_grad1,
-                            const Tensor<gpu, 4, Dtype> &in_grad2,
-                            const Tensor<gpu, 4, Dtype> &tmp1,
-                            const Tensor<gpu, 4, Dtype> &tmp2,
-                            int top_channels_, int top_height_,
-                            int top_width_, int pad_size_, bool is_multiply,
-                            int max_displacement_, int kernel_size_,
-                            int neighborhood_grid_radius_, int neighborhood_grid_width_,
-                            int  kernel_radius_, int stride1_,
-                            int stride2_, int num, int channels, int height, int width
-                            ) {
-  cudaStream_t stream0 = Stream<gpu>::GetStream(in_grad1.stream_);
-  cudaStream_t stream1 = Stream<gpu>::GetStream(in_grad2.stream_);
-  cuda::Backward_gpu(out_grad, in_grad1, in_grad2, tmp1, tmp2, top_channels_,
-                      top_height_, top_width_, pad_size_, is_multiply,
-                      max_displacement_, kernel_size_, neighborhood_grid_radius_,
-                      neighborhood_grid_width_, kernel_radius_, stride1_, stride2_,
-                      stream0, stream1, num, channels, height, width);
-}
-}  // namespace mshadow
-namespace mxnet {
-namespace op {
-template<>
-Operator* CreateOp<gpu>(CorrelationParam param) {
-  return new CorrelationOp<gpu>(param);
-}
-}  // namespace op
-}  // namespace mxnet
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright [2016] <Contributors>
+ * \file Correation.cu
+ * \brief  Correlation operator
+ * \author Xu Dong
+*/
+#include "./correlation-inl.h"
+#include <mshadow/tensor.h>
+#include <mshadow/cuda/reduce.cuh>
+#include <algorithm>
+#include <vector>
+
+#define ROUND_OFF 50000
+#define WARPS_PER_BLOCK 1
+#define THREADS_PER_WARP 32
+#define CORRELATION_CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
+  } while (0)
+#define CUDA_KERNEL_LOOP(i, n) \
+for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
+      i < (n); \
+      i += blockDim.x * gridDim.x)
+namespace mshadow {
+namespace cuda {
+// == Correlation Kernel
+template <typename Dtype>
+__global__ void CorrelateData(const int nthreads, int num, int topwidth,
+  int topheight, int topchannels, int topcount,
+  int max_displacement, int neighborhood_grid_radius,
+  int neighborhood_grid_width, int kernel_radius, int kernel_size, int stride1, int stride2,
+  int bottomwidth, int bottomheight, int bottomchannels,
+  const Dtype *bottom0, const Dtype *bottom1, Dtype *top) {
+  extern __shared__ char patch_data_char[];
+  Dtype *patch_data = reinterpret_cast<Dtype *>(patch_data_char);
+  //  First (upper left) position of kernel upper-left corner
+  //  in current center position of neighborhood in image 1
+  int x1 = blockIdx.x * stride1 + max_displacement;
+  int y1 = blockIdx.y * stride1 + max_displacement;
+  int item = blockIdx.z;
+  int ch_off = threadIdx.x;
+  //  Load 3D patch into shared shared memory
+  for (int j = 0; j < kernel_size; j++) {  //  HEIGHT
+    for (int i = 0; i < kernel_size; i++) {  //  WIDTH
+      int ji_off = ((j * kernel_size) + i) * bottomchannels;
+      for (int ch = ch_off; ch < bottomchannels; ch += (THREADS_PER_WARP * WARPS_PER_BLOCK))  {
+          //  CHANNELS
+          int idx1 = ((item * bottomheight + y1+j) * bottomwidth + x1+i) * bottomchannels + ch;
+          int idxPatchData = ji_off + ch;
+          patch_data[idxPatchData] = bottom0[idx1];
+      }
+    }
+  }
+  __syncthreads();
+  __shared__ Dtype sum[THREADS_PER_WARP * WARPS_PER_BLOCK];
+  //  Compute correlation
+  for (int top_channel = 0; top_channel < topchannels; top_channel++) {
+    sum[ch_off] = 0;
+    int s2o = (top_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+    int s2p = (top_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+    for (int j = 0; j < kernel_size; j++) {  //  HEIGHT
+      for (int i = 0; i < kernel_size; i++) {  //  WIDTH
+        int ji_off = ((j * kernel_size) + i) * bottomchannels;
+        for (int ch = ch_off; ch < bottomchannels; ch += (THREADS_PER_WARP * WARPS_PER_BLOCK)) {
+          //  CHANNELS
+          int x2 = x1 + s2o;
+          int y2 = y1 + s2p;
+          int idxPatchData = ji_off + ch;
+          int idx2 = ((item * bottomheight + y2 + j) * bottomwidth + x2 + i) * bottomchannels + ch;
+          sum[ch_off] += patch_data[idxPatchData] * bottom1[idx2];
+        }
+      }
+    }
+    __syncthreads();
+    if (ch_off == 0) {
+        Dtype total_sum = 0;
+        for (int idx = 0; idx < THREADS_PER_WARP * WARPS_PER_BLOCK; idx++) {
+            total_sum += sum[idx];
+        }
+        const int sumelems = kernel_size * kernel_size * bottomchannels;
+        const int index = ((top_channel * topheight + blockIdx.y) * topwidth) + blockIdx.x;
+        top[index + item*topcount] = total_sum / static_cast<float>(sumelems);
+    }  //  Aggregate result of  different threads
+  }
+}
+//  == Correlation Backward Pass Kernel (For data1)
+template <typename Dtype>
+__global__ void CorrelateDataBackward0(const int nthreads, int num, int item,
+  int topwidth, int topheight, int topchannels,
+  int max_displacement, int neighborhood_grid_radius,
+  int neighborhood_grid_width, int kernel_radius, int stride1, int stride2,
+  int bottomwidth, int bottomheight, int pbottomwidth, int pbottomheight,
+  int bottomchannels, int bottomcount, int pad_size,
+  Dtype *bottom0diff, const Dtype *bottom1, const Dtype *topdiff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    int n = index % bottomchannels;  //  channels
+    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
+    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
+    //  Get X,Y ranges and clamp
+    //  round_off is a trick to enable integer division with ceil, even for negative numbers
+    //  We use a large offset, for the inner part not to become negative.
+    const int round_off = ROUND_OFF;
+    const int round_off_s1 = stride1 * round_off;
+    //  We add round_off before_s1 the int division and subtract round_off after it,
+    //  to ensure the formula matches ceil behavior:
+    int xmin = (l - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
+     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
+    int ymin = (m - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
+     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
+    //  Same here:
+    int xmax = (l - max_displacement + round_off_s1) / stride1 - round_off;
+    //  floor (l - max_displacement) / stride1
+    int ymax = (m - max_displacement + round_off_s1) / stride1 - round_off;
+    //  floor (m - max_displacement) / stride1
+    Dtype sum = 0;
+    if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth-1) && (ymin <= topheight-1)) {
+        xmin = max(0, xmin);
+        xmax = min(topwidth-1, xmax);
+        ymin = max(0, ymin);
+        ymax = min(topheight-1, ymax);
+        for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+          for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+            //  Get bottom1 data:
+            int s2o = stride2 * o;
+            int s2p = stride2 * p;
+            int idxbot1 = ((item * pbottomheight + (m + s2p)) * pbottomwidth + (l + s2o))\
+             * bottomchannels + n;
+            Dtype bot1tmp = bottom1[idxbot1];  // bottom1[l+s2o,m+s2p,n]
+            //  Index offset for topdiff in following loops:
+            int op = (p+neighborhood_grid_radius) * neighborhood_grid_width\
+             + (o + neighborhood_grid_radius);  //  index [o,p]
+            int idxopoffset = (item * topchannels + op);
+            for (int y = ymin; y <= ymax; y++) {
+              for (int x = xmin; x <= xmax; x++) {
+                int idxtopdiff = (idxopoffset * topheight + y) * topwidth + x;  //  topdiff[x,y,o,p]
+                sum += topdiff[idxtopdiff] * bot1tmp;
+              }
+            }
+          }
+        }
+    }
+    const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2+1) * bottomchannels;
+    const int bot0index = ((n * bottomheight) + (m-pad_size)) * bottomwidth + (l-pad_size);
+    bottom0diff[bot0index + item * bottomcount] = sum / static_cast<float>(sumelems);
+  }
+}
+// == Correlation Backward Pass Kernel (For Blob 1)
+template <typename Dtype>
+__global__ void CorrelateDataBackward1(const int nthreads,
+  int num, int item, int topwidth, int topheight, int topchannels,
+  int max_displacement, int neighborhood_grid_radius,
+  int neighborhood_grid_width, int kernel_radius, int stride1, int stride2,
+  int bottomwidth, int bottomheight, int pbottomwidth, int pbottomheight,
+  int bottomchannels, int bottomcount, int pad_size,
+  const Dtype *bottom0, Dtype *bottom1diff, const Dtype *topdiff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    //  int l = index % bottomwidth + pad_size; //w-pos
+    //  int m = (index / bottomwidth) % bottomheight + pad_size; //  h-pos
+    //  int n = (index / bottomwidth / bottomheight) % bottomchannels; //  channels
+    int n = index % bottomchannels;  //  channels
+    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
+    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
+    //  round_off is a trick to enable integer division with ceil, even for negative numbers
+    //  We use a large offset, for the inner part not to become negative.
+    const int round_off = ROUND_OFF;
+    const int round_off_s1 = stride1 * round_off;
+    Dtype sum = 0;
+    for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+      for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+        int s2o = stride2 * o;
+        int s2p = stride2 * p;
+        //  Get X,Y ranges and clamp
+        //  We add round_off before_s1 the int division and subtract round_off after it,
+        //  to ensure the formula matches ceil behavior:
+        int xmin = (l - 2*kernel_radius - max_displacement - s2o + round_off_s1 - 1)\
+         / stride1 + 1 - round_off;
+         // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
+        int ymin = (m - 2*kernel_radius - max_displacement - s2p + round_off_s1 - 1)\
+         / stride1 + 1 - round_off;
+        // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
+        //  Same here:
+        int xmax = (l - max_displacement - s2o + round_off_s1) / stride1 - round_off;
+        //  floor (l - max_displacement - s2o) / stride1
+        int ymax = (m - max_displacement - s2p + round_off_s1) / stride1 - round_off;
+        //  floor (m - max_displacement - s2p) / stride1
+        if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth - 1) && (ymin <= topheight - 1)) {
+            xmin = max(0, xmin);
+            xmax = min(topwidth-1, xmax);
+            ymin = max(0, ymin);
+            ymax = min(topheight-1, ymax);
+            //  Get bottom0 data:
+            int idxbot0 = ((item * pbottomheight + (m - s2p)) \
+            * pbottomwidth + (l - s2o)) * bottomchannels + n;
+            Dtype bot0tmp = bottom0[idxbot0];  //  bottom1[l+s2o,m+s2p,n]
+            //  Index offset for topdiff in following loops:
+            int op = (p+neighborhood_grid_radius) * \
+            neighborhood_grid_width + (o+neighborhood_grid_radius);  //  index [o,p]
+            int idxOpOffset = (item * topchannels + op);
+            for (int y = ymin; y <= ymax; y++) {
+              for (int x = xmin; x <= xmax; x++) {
+                int idxtopdiff = (idxOpOffset * topheight + y)\
+                 * topwidth + x;  //  topdiff[x,y,o,p]
+                sum += topdiff[idxtopdiff] * bot0tmp;
+              }
+            }
+        }
+      }
+    }
+    const int sumelems = (kernel_radius*2+1)*(kernel_radius*2+1)*bottomchannels;
+    const int bot1index = ((n * bottomheight) + (m - pad_size)) * bottomwidth + (l - pad_size);
+    bottom1diff[bot1index + item * bottomcount] = sum / static_cast<float>(sumelems);
+  }
+}
+// == Correlation Kernel Subtraction
+template <typename Dtype>
+__global__ void CorrelateDataSubtract(const int nthreads, int num, int item,
+  int topwidth, int topheight, int topchannels, int topcount,
+  int max_displacement, int neighborhood_grid_radius,
+  int neighborhood_grid_width, int kernel_radius, int stride1, int stride2,
+  int bottomwidth, int bottomheight, int bottomchannels,
+  const Dtype *bottom0, const Dtype *bottom1, Dtype *top) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    int x = index % topwidth;  //  w-pos
+    int y = (index / topwidth) % topheight;  //  h-pos
+    int c = (index / topwidth / topheight) % topchannels;  //  channels
+    //  Offset of patch in image 2
+    int s2o = (c % neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+    int s2p = (c / neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+    //  First (upper left) position of kernel center in current neighborhood in image 1
+    int x1 = x*stride1 + kernel_radius + max_displacement;
+    int y1 = y*stride1 + kernel_radius + max_displacement;
+    //  Iterate through 3D patch
+    Dtype sum = 0;
+    for (int j = -kernel_radius; j <= kernel_radius; j++) {  //  HEIGHT
+      for (int i = -kernel_radius; i <= kernel_radius; i++) {  //  WIDTH
+        for (int l = 0; l < bottomchannels; l++) {  //  CHANNELS
+          //  Calculate position in image 2
+          int x2 = x1 + s2o;
+          int y2 = y1 + s2p;
+          //  Indices in bottom data: (CH=l,W=x2,H=y2,N)
+          int idx1 = ((item * bottomheight + y1 + j) * bottomwidth + x1 + i) \
+          * bottomchannels + l;
+          int idx2 = ((item * bottomheight + y2 + j) * bottomwidth + x2 + i) \
+          * bottomchannels + l;
+          //  Do the correlation:
+          sum += fabsf(bottom0[idx1] - bottom1[idx2]);
+        }
+      }
+    }
+    const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * bottomchannels;
+    top[index + item * topcount] = sum / static_cast<float>(sumelems);
+  }
+}
+//  == Correlation Backward Pass Kernel (For Blob 0)
+template <typename Dtype>
+__global__ void CorrelateDataBackward0Subtract(const int nthreads, int num,
+  int item, int topwidth, int topheight, int topchannels,
+  int max_displacement, int neighborhood_grid_radius,
+  int neighborhood_grid_width, int kernel_radius,
+  int stride1, int stride2, int bottomwidth, int bottomheight,
+  int pbottomwidth, int pbottomheight,
+  int bottomchannels, int bottomcount, int pad_size,
+  Dtype *bottom0diff, const Dtype *bottom0, const Dtype *bottom1, const Dtype *topdiff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    int n = index % bottomchannels;  //  channels
+    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
+    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
+    //  Get X,Y ranges and clamp
+    //  round_off is a trick to enable integer division with ceil, even for negative numbers
+    //  We use a large offset, for the inner part not to become negative.
+    const int round_off = ROUND_OFF;
+    const int round_off_s1 = stride1 * round_off;
+    int idxbot0 = ((item * pbottomheight + m) * pbottomwidth + l)\
+             * bottomchannels + n;
+    //  We add round_off before_s1 the int division and subtract round_off after it,
+    //  to ensure the formula matches ceil behavior:
+    int xmin = (l - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
+     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
+    int ymin = (m - 2*kernel_radius - max_displacement + round_off_s1 - 1)\
+     / stride1 + 1 - round_off;  //  ceil (l - 2*kernel_radius - max_displacement) / stride1
+    //  Same here:
+    int xmax = (l - max_displacement + round_off_s1) / stride1 - round_off;
+    //  floor (l - max_displacement) / stride1
+    int ymax = (m - max_displacement + round_off_s1) / stride1 - round_off;
+    //  floor (m - max_displacement) / stride1
+    Dtype sum = 0;
+    if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth-1) && (ymin <= topheight-1)) {
+        xmin = max(0, xmin);
+        xmax = min(topwidth-1, xmax);
+        ymin = max(0, ymin);
+        ymax = min(topheight-1, ymax);
+        for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+          for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+            //  Get bottom1 data:
+            int s2o = stride2 * o;
+            int s2p = stride2 * p;
+            int idxbot1 = ((item * pbottomheight + (m+s2p)) * pbottomwidth\
+             + (l+s2o)) * bottomchannels + n;
+            Dtype bot0tmp = bottom0[idxbot0];
+            Dtype bot1tmp = bottom1[idxbot1];
+            Dtype sign = (bot0tmp >= bot1tmp) ? Dtype(1.0) : Dtype(-1.0);
+            //  Index offset for topdiff in following loops:
+            int op = (p+neighborhood_grid_radius) * neighborhood_grid_width\
+             + (o + neighborhood_grid_radius);  //  index [o,p]
+            int idxopoffset = (item * topchannels + op);
+            for (int y = ymin; y <= ymax; y++) {
+              for (int x = xmin; x <= xmax; x++) {
+                int idxtopdiff = (idxopoffset * topheight + y) * topwidth + x;  //  topdiff[x,y,o,p]
+                sum += topdiff[idxtopdiff] * sign;
+              }
+            }
+          }
+        }
+    }
+    const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2+1) * bottomchannels;
+    const int bot0index = ((n * bottomheight) + (m-pad_size)) * bottomwidth + (l-pad_size);
+    bottom0diff[bot0index + item * bottomcount] = sum / static_cast<float>(sumelems);
+  }
+}
+//  == Correlation Backward Pass Kernel (For Blob 1)
+template <typename Dtype>
+__global__ void CorrelateDataBackward1Subtract(const int nthreads, int num,
+  int item, int topwidth, int topheight, int topchannels,
+  int max_displacement, int neighborhood_grid_radius,
+  int neighborhood_grid_width, int kernel_radius,
+  int stride1, int stride2, int bottomwidth, int bottomheight,
+  int pbottomwidth, int pbottomheight, int bottomchannels,
+  int bottomcount, int pad_size, const Dtype *bottom0,
+  const Dtype *bottom1, Dtype *bottom1diff, const Dtype *topdiff) {
+    CUDA_KERNEL_LOOP(index, nthreads) {
+    //  int l = index % bottomwidth + pad_size; //w-pos
+    //  int m = (index / bottomwidth) % bottomheight + pad_size; //  h-pos
+    //  int n = (index / bottomwidth / bottomheight) % bottomchannels; //  channels
+    int n = index % bottomchannels;  //  channels
+    int l = (index / bottomchannels) % bottomwidth + pad_size;  //  w-pos
+    int m = (index / bottomchannels / bottomwidth) % bottomheight + pad_size;  //  h-pos
+    //  round_off is a trick to enable integer division with ceil, even for negative numbers
+    //  We use a large offset, for the inner part not to become negative.
+    const int round_off = ROUND_OFF;
+    const int round_off_s1 = stride1 * round_off;
+    Dtype sum = 0;
+    int idxbot1 = ((item * pbottomheight + m) * pbottomwidth + l)\
+             * bottomchannels + n;
+    for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+      for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+        int s2o = stride2 * o;
+        int s2p = stride2 * p;
+        //  Get X,Y ranges and clamp
+        //  We add round_off before_s1 the int division and subtract round_off after it,
+        //  to ensure the formula matches ceil behavior:
+        int xmin = (l - 2*kernel_radius - max_displacement - s2o + round_off_s1 - 1)\
+         / stride1 + 1 - round_off;
+         // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
+        int ymin = (m - 2*kernel_radius - max_displacement - s2p + round_off_s1 - 1)\
+         / stride1 + 1 - round_off;
+        // ceil (l - 2*kernel_radius - max_displacement - s2o) / stride1
+        //  Same here:
+        int xmax = (l - max_displacement - s2o + round_off_s1) / stride1 - round_off;
+        //  floor (l - max_displacement - s2o) / stride1
+        int ymax = (m - max_displacement - s2p + round_off_s1) / stride1 - round_off;
+        //  floor (m - max_displacement - s2p) / stride1
+        if (xmax >= 0 && ymax >= 0 && (xmin <= topwidth - 1) && (ymin <= topheight - 1)) {
+            xmin = max(0, xmin);
+            xmax = min(topwidth-1, xmax);
+            ymin = max(0, ymin);
+            ymax = min(topheight-1, ymax);
+            //  Get bottom0 data:
+            int idxbot0 = ((item * pbottomheight + (m - s2p)) * pbottomwidth + (l - s2o))\
+             * bottomchannels + n;
+            //  bottom0[l+s2o,m+s2p,n]
+            Dtype bot0tmp = bottom0[idxbot0];
+            Dtype bot1tmp = bottom1[idxbot1];
+            Dtype sign = (bot0tmp >= bot1tmp) ? Dtype(-1.0) : Dtype(1.0);
+            //  Index offset for topdiff in following loops:
+            int op = (p+neighborhood_grid_radius) * \
+            neighborhood_grid_width + (o+neighborhood_grid_radius);  //  index [o,p]
+            int idxOpOffset = (item * topchannels + op);
+            for (int y = ymin; y <= ymax; y++) {
+              for (int x = xmin; x <= xmax; x++) {
+                int idxtopdiff = (idxOpOffset * topheight + y)\
+                 * topwidth + x;  //  topdiff[x,y,o,p]
+                sum += topdiff[idxtopdiff] * sign;
+              }
+            }
+        }
+      }
+    }
+    const int sumelems = (kernel_radius*2+1)*(kernel_radius*2+1)*bottomchannels;
+    const int bot1index = ((n * bottomheight) + (m - pad_size)) * bottomwidth + (l - pad_size);
+    bottom1diff[bot1index + item * bottomcount] = sum / static_cast<float>(sumelems);
+  }
+}
+//  == Forward
+//  == Dimension rearrangement Kernel
+template <typename Dtype>
+__global__ void blob_rearrange_kernel2(const Dtype* in, Dtype* out, int num,
+int channels, int width, int height, int widthheight, int padding, int pwidthheight) {
+    //  change shape from [batchsize,channel,y,x] to [batchsize,y,x,channel]
+    int xy = blockIdx.x * blockDim.x + threadIdx.x;
+    if (xy >= widthheight )
+        return;
+    int ch = blockIdx.y;
+    int n  = blockIdx.z;
+    Dtype value = in[(n * channels + ch) * widthheight + xy];
+    __syncthreads();
+    int xpad  = (xy % width + padding);
+    int ypad  = (xy / width + padding);
+    int xypad = ypad * (width + 2 * padding) + xpad;
+    out[(n * pwidthheight + xypad) * channels + ch] = value;
+}
+template <typename Dtype>
+void Forward_gpu(
+      const Tensor<gpu, 4, Dtype> &out,
+      const Tensor<gpu, 4, Dtype> &data1,
+      const Tensor<gpu, 4, Dtype> &data2,
+      const Tensor<gpu, 4, Dtype> &tmp1,
+      const Tensor<gpu, 4, Dtype> &tmp2,
+      int top_channels_, int top_height_, int top_width_, int pad_size_,
+      bool is_multiply, int max_displacement_, int kernel_size_,
+      int neighborhood_grid_radius_, int neighborhood_grid_width_,
+      int  kernel_radius_, int stride1_, int stride2_, cudaStream_t stream,
+      cudaStream_t stream_tmp1, cudaStream_t stream_tmp2) {
+    const Dtype *bottom_data1 = data1.dptr_;
+    const Dtype *bottom_data2 = data2.dptr_;
+    Dtype *rbot1 = tmp1.dptr_;
+    Dtype *rbot2 = tmp2.dptr_;
+    Dtype *top = out.dptr_;
+    const int bnum = data1.size(0);
+    const int bchannels = data1.size(1);
+    const int bheight = data1.size(2);
+    const int bwidth = data1.size(3);
+    const int bwidthheight = bwidth * bheight;
+    const int topcount = top_width_ * top_height_ * top_channels_;
+    dim3 threadsPerBlock(THREADS_PER_WARP * WARPS_PER_BLOCK);
+    int threads_per_block = 16;
+    dim3 totalBlocksRearr((bwidthheight - 1) / threads_per_block + 1, bchannels, bnum);
+    const int pwidthheight = (bwidth + 2 * pad_size_) * (bheight + 2 * pad_size_);
+    blob_rearrange_kernel2<Dtype><<<totalBlocksRearr, threads_per_block, 0, stream_tmp1>>>
+    (bottom_data1, rbot1, bnum, bchannels, bwidth, bheight, bwidthheight, pad_size_, pwidthheight);
+    blob_rearrange_kernel2<Dtype><<<totalBlocksRearr, threads_per_block, 0, stream_tmp2>>>
+    (bottom_data2, rbot2, bnum, bchannels, bwidth, bheight, bwidthheight, pad_size_, pwidthheight);
+    const int num = bnum;
+    const int channels = bchannels;
+    const int height = bheight + 2 * pad_size_;
+    const int width = bwidth + 2 * pad_size_;
+    const int shared_memory_per_block = (kernel_size_ * kernel_size_) * bchannels;
+    if (is_multiply == true) {
+        //  CorrelationLayer
+        int topThreadCount = topcount;
+        dim3 totalBlocksCorr(top_width_, top_height_, num);
+        CorrelateData<Dtype><<<totalBlocksCorr, threadsPerBlock,
+        shared_memory_per_block * sizeof(Dtype), stream>>>(
+            topThreadCount,
+            num, top_width_, top_height_, top_channels_, topcount,
+            max_displacement_, neighborhood_grid_radius_,
+            neighborhood_grid_width_, kernel_radius_, kernel_size_,
+            stride1_, stride2_,
+            width, height, channels,
+            rbot1, rbot2, top);
+        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
+    } else {
+        //  CorrelationLayer
+        for (int n = 0; n < num; n++) {
+            int topThreadCount = topcount;
+            const int gridSize = (topThreadCount + kMaxThreadsPerBlock - 1)\
+             / kMaxThreadsPerBlock;
+            CorrelateDataSubtract<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream>>>(
+                topThreadCount,
+                num, n, top_width_, top_height_, top_channels_, topcount,
+                max_displacement_, neighborhood_grid_radius_,
+                neighborhood_grid_width_, kernel_radius_,
+                stride1_, stride2_, width, height, channels, rbot1, rbot2, top);
+         CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
+        }
+    }
+}
+template <typename Dtype>
+void Backward_gpu(
+       const Tensor<gpu, 4, Dtype> &out_grad,
+      const Tensor<gpu, 4, Dtype> &in_grad1,
+      const Tensor<gpu, 4, Dtype> &in_grad2,
+      const Tensor<gpu, 4, Dtype> &tmp1,
+      const Tensor<gpu, 4, Dtype> &tmp2,
+      int top_channels_, int top_height_,
+      int top_width_, int pad_size_, bool is_multiply,
+      int max_displacement_, int kernel_size_,
+      int neighborhood_grid_radius_, int neighborhood_grid_width_,
+      int  kernel_radius_, int stride1_, int stride2_,
+      cudaStream_t stream0, cudaStream_t stream1,
+      int num, int channels, int height, int width) {
+    //  Get top diff, compute bottom diff
+    const Dtype* top_diff = out_grad.dptr_;
+    Dtype* bottom0_diff = in_grad1.dptr_;
+    Dtype* bottom1_diff = in_grad2.dptr_;
+    const Dtype* rbot1 = tmp1.dptr_;
+    const Dtype* rbot2 = tmp2.dptr_;
+    const int paddedheight = height + 2 * pad_size_;
+    const int paddedwidth = width + 2 * pad_size_;
+    const int bottomcount = channels * height * width;
+    int botThreadCount = bottomcount;
+    const int gridSize = (botThreadCount + kMaxThreadsPerBlock - 1) / kMaxThreadsPerBlock;
+    //  CorrelationLayerBackward
+    if (is_multiply == true) {
+        //  == Run kernel Backward 0
+        dim3 totalBlocksBackward0(width, height, channels * num);  //  First dim is fastest
+        const int buffer_size_backw0 = \
+        (static_cast<int>(ceil(static_cast<float>(2 * kernel_radius_)\
+         / static_cast<float>(stride1_))) + 1) * top_channels_;
+        //  == Run kernel Backward 0
+        for (int n = 0; n < num; n++) {
+        CorrelateDataBackward0<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream0>>>(
+            botThreadCount,
+            num, n, top_width_, top_height_, top_channels_,
+            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
+            stride1_, stride2_,
+            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
+            bottom0_diff, rbot2, top_diff);
+        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
+        }
+        //  == Run kernel Backward 1
+        for (int n = 0; n < num; n++) {
+        CorrelateDataBackward1<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream1>>>(
+            botThreadCount,
+            num, n, top_width_, top_height_, top_channels_,
+            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
+            stride1_, stride2_,
+            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
+            rbot1, bottom1_diff, top_diff);
+       CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
+        }
+    } else  {
+        for (int n = 0; n < num; n++) {
+        //  Bottom0:
+        CorrelateDataBackward0Subtract<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream0>>>(
+            botThreadCount,
+            num, n, top_width_, top_height_, top_channels_,
+            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
+            stride1_, stride2_,
+            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
+            bottom0_diff, rbot1, rbot2, top_diff);
+        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
+        }
+        for (int n = 0; n < num; n++) {
+        //  Bottom1:
+        CorrelateDataBackward1Subtract<Dtype><<<gridSize, kMaxThreadsPerBlock, 0, stream1>>>(
+            botThreadCount,
+            num, n, top_width_, top_height_, top_channels_,
+            max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
+            stride1_, stride2_,
+            width, height, paddedwidth, paddedheight, channels, bottomcount, pad_size_,
+            rbot1, rbot2, bottom1_diff, top_diff);
+        CORRELATION_CUDA_CHECK(cudaPeekAtLastError());
+        }
+    }
+}
+}  // namespace cuda
+template<typename Dtype>
+inline void CorrelationForward(const Tensor<gpu, 4, Dtype> &out,
+                               const Tensor<gpu, 4, Dtype> &data1,
+                               const Tensor<gpu, 4, Dtype> &data2,
+                               const Tensor<gpu, 4, Dtype> &tmp1,
+                               const Tensor<gpu, 4, Dtype> &tmp2,
+                               int top_channels_, int top_height_,
+                               int top_width_, int pad_size_, bool is_multiply,
+                               int max_displacement_, int kernel_size_,
+                               int neighborhood_grid_radius_, int neighborhood_grid_width_,
+                               int kernel_radius_, int stride1_, int stride2_
+                           ) {
+  cudaStream_t stream = Stream<gpu>::GetStream(out.stream_);
+  cudaStream_t stream_tmp1 = Stream<gpu>::GetStream(tmp1.stream_);
+  cudaStream_t stream_tmp2 = Stream<gpu>::GetStream(tmp2.stream_);
+  cuda::Forward_gpu(out, data1, data2, tmp1, tmp2, top_channels_, top_height_,
+                    top_width_, pad_size_, is_multiply, max_displacement_, kernel_size_,
+                    neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
+                    stride1_, stride2_, stream, stream_tmp1, stream_tmp2);
+}
+
+template<typename Dtype>
+inline void CorrelationBackward(const Tensor<gpu, 4, Dtype> &out_grad,
+                            const Tensor<gpu, 4, Dtype> &in_grad1,
+                            const Tensor<gpu, 4, Dtype> &in_grad2,
+                            const Tensor<gpu, 4, Dtype> &tmp1,
+                            const Tensor<gpu, 4, Dtype> &tmp2,
+                            int top_channels_, int top_height_,
+                            int top_width_, int pad_size_, bool is_multiply,
+                            int max_displacement_, int kernel_size_,
+                            int neighborhood_grid_radius_, int neighborhood_grid_width_,
+                            int  kernel_radius_, int stride1_,
+                            int stride2_, int num, int channels, int height, int width
+                            ) {
+  cudaStream_t stream0 = Stream<gpu>::GetStream(in_grad1.stream_);
+  cudaStream_t stream1 = Stream<gpu>::GetStream(in_grad2.stream_);
+  cuda::Backward_gpu(out_grad, in_grad1, in_grad2, tmp1, tmp2, top_channels_,
+                      top_height_, top_width_, pad_size_, is_multiply,
+                      max_displacement_, kernel_size_, neighborhood_grid_radius_,
+                      neighborhood_grid_width_, kernel_radius_, stride1_, stride2_,
+                      stream0, stream1, num, channels, height, width);
+}
+}  // namespace mshadow
+namespace mxnet {
+namespace op {
+template<>
+Operator* CreateOp<gpu>(CorrelationParam param) {
+  return new CorrelationOp<gpu>(param);
+}
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/crop-inl.h b/src/operator/crop-inl.h
index 5b5adbf..5a87096 100644
--- a/src/operator/crop-inl.h
+++ b/src/operator/crop-inl.h
@@ -1,214 +1,232 @@
-/*!
- * Copyright (c) 2015 by Contributors
- * \file crop-inl.h
- * \brief
- * \author Wei Wu
-*/
-#ifndef MXNET_OPERATOR_CROP_INL_H_
-#define MXNET_OPERATOR_CROP_INL_H_
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <mxnet/operator.h>
-#include <cstring>
-#include <map>
-#include <string>
-#include <vector>
-#include <utility>
-#include "./operator_common.h"
-
-namespace mxnet {
-namespace op {
-
-namespace crop_enum {
-enum CropOpInputs {kData, kCropLike};
-enum CropOpOutputs {kOut};
-}  // namespace crop_enum
-
-struct CropParam : public dmlc::Parameter<CropParam> {
-  int num_args;
-  TShape offset;
-  TShape h_w;
-  bool center_crop;
-  DMLC_DECLARE_PARAMETER(CropParam) {
-    DMLC_DECLARE_FIELD(num_args).set_range(1, 3)
-    .describe("Number of inputs for crop, if equals one, then we will use the h_w"
-      "for crop height and width, else if equals two, then we will use the height"
-      "and width of the second input symbol, we name crop_like here");
-    int shape[] = {0, 0};
-    DMLC_DECLARE_FIELD(offset).set_default(TShape(shape, shape + 2))
-    .describe("crop offset coordinate: (y, x)");
-    DMLC_DECLARE_FIELD(h_w).set_default(TShape(shape, shape + 2))
-    .describe("crop height and width: (h, w)");
-    DMLC_DECLARE_FIELD(center_crop).set_default(false)
-    .describe("If set to true, then it will use be the center_crop,"
-      "or it will crop using the shape of crop_like");
-  }
-};  // struct CropParam
-
-template<typename xpu>
-class CropOp : public Operator {
- public:
-  explicit CropOp(CropParam param) {
-    this->param_ = param;
-  }
-
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(static_cast<int>(in_data.size()), param_.num_args);
-    CHECK_EQ(out_data.size(), 1U);
-    CHECK_EQ(req[crop_enum::kOut], kWriteTo);
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    Tensor<xpu, 4> data = in_data[crop_enum::kData].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> out = out_data[crop_enum::kOut].get<xpu, 4, real_t>(s);
-    offset_hw_ = InferCropOfferset(data.shape_, out.shape_);
-    out = crop(data, Shape2(out.size(2), out.size(3)), offset_hw_[0], offset_hw_[1]);
-  }
-
-  // because the crop_like input is only used with it's shape, so we should be
-  // careful setting its backwrd grad value to zeros, so that it will not hurt
-  // the connection of crop_like.
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_states) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(in_grad.size(), static_cast<size_t>(param_.num_args)) << in_grad.size();
-    CHECK_EQ(out_grad.size(), 1U) << out_grad.size();
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    Tensor<xpu, 4> grad = out_grad[crop_enum::kOut].get<xpu, 4, real_t>(s);
-    Tensor<xpu, 4> gdata = in_grad[crop_enum::kData].get<xpu, 4, real_t>(s);
-    if (param_.num_args > 1) {
-      // here backward grad is set to zero for crop_like
-      // however, this should only be done when num_args > 1, i.e., crop_like exists
-      Tensor<xpu, 4> gcrop_like = in_grad[crop_enum::kCropLike].get<xpu, 4, real_t>(s);
-      gcrop_like = (real_t)0.0f;
-    }
-    offset_hw_ = InferCropOfferset(gdata.shape_, grad.shape_);
-    gdata = (real_t)0.0f;
-    slice<3>(slice<2>(gdata, offset_hw_[0], offset_hw_[0]+grad.size(2)),
-             offset_hw_[1], offset_hw_[1]+grad.size(3)) = grad;
-  }
-
- private:
-  CropParam param_;
-  std::vector<int> offset_hw_;
-  std::vector<int> InferCropOfferset(const mshadow::Shape<4> &data_shape,
-                                 const mshadow::Shape<4> &out_shape) {
-      std::vector<int> offset_hw;
-      CHECK_GE(data_shape[2], out_shape[2]) <<
-          "data_shape'height should be larger than that of out_shape";
-      CHECK_GE(data_shape[3], out_shape[3]) <<
-          "data_shape'weight should be larger than that of out_shape";
-      if (param_.center_crop) {
-        offset_hw.push_back(static_cast<int>((data_shape[2]-out_shape[2])/2));
-        offset_hw.push_back(static_cast<int>((data_shape[3]-out_shape[3])/2));
-      } else {
-        CHECK_GE(static_cast<int>(param_.offset[0]), 0) <<
-            "offset[0] should be larger than 0";
-        CHECK_LE(param_.offset[0], data_shape[2]-out_shape[2]) <<
-            "offset[0] should be less than the residual space of height";
-        CHECK_GE(static_cast<int>(param_.offset[1]), 0) <<
-            "offset[1] should be larger than 0";
-        CHECK_LE(param_.offset[1], data_shape[3]-out_shape[3]) <<
-            "offset[1] should be less than the residual space of width";
-        offset_hw.push_back(static_cast<int>(param_.offset[0]));
-        offset_hw.push_back(static_cast<int>(param_.offset[1]));
-      }
-      return offset_hw;
-  }
-};  // class CropOp
-
-template<typename xpu>
-Operator *CreateOp(CropParam param);
-
-#if DMLC_USE_CXX11
-class CropProp : public OperatorProperty {
- public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
-    param_.Init(kwargs);
-  }
-
-  std::map<std::string, std::string> GetParams() const override {
-    return param_.__DICT__();
-  }
-
-  std::vector<std::string> ListArguments() const override {
-    // return {"data", "crop_like"};
-    std::vector<std::string> ret;
-    for (int i = 0; i < param_.num_args; ++i) {
-      ret.push_back(std::string("arg") + std::to_string(i));
-    }
-    return ret;
-  }
-
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
-    using namespace mshadow;
-    CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
-    TShape data_shape = in_shape->at(crop_enum::kData);
-    if (data_shape.ndim() == 0) return false;
-    CHECK_EQ(data_shape.ndim(), 4U) << \
-        "Input data should be 4D in batch-num_filter-y-x";
-    std::vector<int> crop_shape;
-    if (param_.num_args == 1) {
-      CHECK_GE(static_cast<int>(param_.h_w[0]), 1) <<
-          "the crop height(h_w[0]) should be larger than 1";
-      CHECK_LE(static_cast<int>(param_.h_w[0]), static_cast<int>(data_shape[2])) <<
-          "the crop height(h_w[0]) should be less than the input data's height";
-      CHECK_GE(static_cast<int>(param_.h_w[1]), 1) <<
-          "the crop width(h_w[1]) should be larger than 1";
-      CHECK_LE(static_cast<int>(param_.h_w[1]), static_cast<int>(data_shape[3])) <<
-          "the crop width(h_w[1]) should be less than the input data's width";
-      crop_shape.push_back(param_.h_w[0]);
-      crop_shape.push_back(param_.h_w[1]);
-    } else if (param_.num_args == 2) {
-      TShape crop_like_shape = in_shape->at(crop_enum::kCropLike);
-      crop_shape.push_back(crop_like_shape[2]);
-      crop_shape.push_back(crop_like_shape[3]);
-    }
-    if (crop_shape.size() == 0) return false;
-    CHECK_EQ(crop_shape.size(), 2U) << \
-        "Input crop_like should be 2D in height-width";
-    out_shape->clear();
-    data_shape[2] = crop_shape[0];
-    data_shape[3] = crop_shape[1];
-    out_shape->push_back(data_shape);
-    return true;
-  }
-
-  OperatorProperty* Copy() const override {
-    auto ptr = new CropProp();
-    ptr->param_ = param_;
-    return ptr;
-  }
-
-  std::string TypeString() const override {
-    return "Crop";
-  }
-
-  std::vector<int> DeclareBackwardDependency(
-    const std::vector<int> &out_grad,
-    const std::vector<int> &in_data,
-    const std::vector<int> &out_data) const override {
-    return out_grad;
-  }
-
-  Operator* CreateOperator(Context ctx) const override;
-
- private:
-  CropParam param_;
-};  // class CropProp
-#endif  // DMLC_USE_CXX11
-}  // namespace op
-}  // namespace mxnet
-#endif  // MXNET_OPERATOR_CROP_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file crop-inl.h
+ * \brief
+ * \author Wei Wu
+*/
+#ifndef MXNET_OPERATOR_CROP_INL_H_
+#define MXNET_OPERATOR_CROP_INL_H_
+#include <dmlc/logging.h>
+#include <dmlc/parameter.h>
+#include <mxnet/operator.h>
+#include <cstring>
+#include <map>
+#include <string>
+#include <vector>
+#include <utility>
+#include "./operator_common.h"
+
+namespace mxnet {
+namespace op {
+
+namespace crop_enum {
+enum CropOpInputs {kData, kCropLike};
+enum CropOpOutputs {kOut};
+}  // namespace crop_enum
+
+struct CropParam : public dmlc::Parameter<CropParam> {
+  int num_args;
+  TShape offset;
+  TShape h_w;
+  bool center_crop;
+  DMLC_DECLARE_PARAMETER(CropParam) {
+    DMLC_DECLARE_FIELD(num_args).set_range(1, 3)
+    .describe("Number of inputs for crop, if equals one, then we will use the h_w"
+      "for crop height and width, else if equals two, then we will use the height"
+      "and width of the second input symbol, we name crop_like here");
+    int shape[] = {0, 0};
+    DMLC_DECLARE_FIELD(offset).set_default(TShape(shape, shape + 2))
+    .describe("crop offset coordinate: (y, x)");
+    DMLC_DECLARE_FIELD(h_w).set_default(TShape(shape, shape + 2))
+    .describe("crop height and width: (h, w)");
+    DMLC_DECLARE_FIELD(center_crop).set_default(false)
+    .describe("If set to true, then it will use be the center_crop,"
+      "or it will crop using the shape of crop_like");
+  }
+};  // struct CropParam
+
+template<typename xpu>
+class CropOp : public Operator {
+ public:
+  explicit CropOp(CropParam param) {
+    this->param_ = param;
+  }
+
+  virtual void Forward(const OpContext &ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &out_data,
+                       const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(static_cast<int>(in_data.size()), param_.num_args);
+    CHECK_EQ(out_data.size(), 1U);
+    CHECK_EQ(req[crop_enum::kOut], kWriteTo);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 4> data = in_data[crop_enum::kData].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> out = out_data[crop_enum::kOut].get<xpu, 4, real_t>(s);
+    offset_hw_ = InferCropOfferset(data.shape_, out.shape_);
+    out = crop(data, Shape2(out.size(2), out.size(3)), offset_hw_[0], offset_hw_[1]);
+  }
+
+  // because the crop_like input is only used with it's shape, so we should be
+  // careful setting its backwrd grad value to zeros, so that it will not hurt
+  // the connection of crop_like.
+  virtual void Backward(const OpContext &ctx,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &in_grad,
+                        const std::vector<TBlob> &aux_states) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(in_grad.size(), static_cast<size_t>(param_.num_args)) << in_grad.size();
+    CHECK_EQ(out_grad.size(), 1U) << out_grad.size();
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 4> grad = out_grad[crop_enum::kOut].get<xpu, 4, real_t>(s);
+    Tensor<xpu, 4> gdata = in_grad[crop_enum::kData].get<xpu, 4, real_t>(s);
+    if (param_.num_args > 1) {
+      // here backward grad is set to zero for crop_like
+      // however, this should only be done when num_args > 1, i.e., crop_like exists
+      Tensor<xpu, 4> gcrop_like = in_grad[crop_enum::kCropLike].get<xpu, 4, real_t>(s);
+      gcrop_like = (real_t)0.0f;
+    }
+    offset_hw_ = InferCropOfferset(gdata.shape_, grad.shape_);
+    gdata = (real_t)0.0f;
+    slice<3>(slice<2>(gdata, offset_hw_[0], offset_hw_[0]+grad.size(2)),
+             offset_hw_[1], offset_hw_[1]+grad.size(3)) = grad;
+  }
+
+ private:
+  CropParam param_;
+  std::vector<int> offset_hw_;
+  std::vector<int> InferCropOfferset(const mshadow::Shape<4> &data_shape,
+                                 const mshadow::Shape<4> &out_shape) {
+      std::vector<int> offset_hw;
+      CHECK_GE(data_shape[2], out_shape[2]) <<
+          "data_shape'height should be larger than that of out_shape";
+      CHECK_GE(data_shape[3], out_shape[3]) <<
+          "data_shape'weight should be larger than that of out_shape";
+      if (param_.center_crop) {
+        offset_hw.push_back(static_cast<int>((data_shape[2]-out_shape[2])/2));
+        offset_hw.push_back(static_cast<int>((data_shape[3]-out_shape[3])/2));
+      } else {
+        CHECK_GE(static_cast<int>(param_.offset[0]), 0) <<
+            "offset[0] should be larger than 0";
+        CHECK_LE(param_.offset[0], data_shape[2]-out_shape[2]) <<
+            "offset[0] should be less than the residual space of height";
+        CHECK_GE(static_cast<int>(param_.offset[1]), 0) <<
+            "offset[1] should be larger than 0";
+        CHECK_LE(param_.offset[1], data_shape[3]-out_shape[3]) <<
+            "offset[1] should be less than the residual space of width";
+        offset_hw.push_back(static_cast<int>(param_.offset[0]));
+        offset_hw.push_back(static_cast<int>(param_.offset[1]));
+      }
+      return offset_hw;
+  }
+};  // class CropOp
+
+template<typename xpu>
+Operator *CreateOp(CropParam param);
+
+#if DMLC_USE_CXX11
+class CropProp : public OperatorProperty {
+ public:
+  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
+    param_.Init(kwargs);
+  }
+
+  std::map<std::string, std::string> GetParams() const override {
+    return param_.__DICT__();
+  }
+
+  std::vector<std::string> ListArguments() const override {
+    // return {"data", "crop_like"};
+    std::vector<std::string> ret;
+    for (int i = 0; i < param_.num_args; ++i) {
+      ret.push_back(std::string("arg") + std::to_string(i));
+    }
+    return ret;
+  }
+
+  bool InferShape(std::vector<TShape> *in_shape,
+                  std::vector<TShape> *out_shape,
+                  std::vector<TShape> *aux_shape) const override {
+    using namespace mshadow;
+    CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
+    TShape data_shape = in_shape->at(crop_enum::kData);
+    if (data_shape.ndim() == 0) return false;
+    CHECK_EQ(data_shape.ndim(), 4U) << \
+        "Input data should be 4D in batch-num_filter-y-x";
+    std::vector<int> crop_shape;
+    if (param_.num_args == 1) {
+      CHECK_GE(static_cast<int>(param_.h_w[0]), 1) <<
+          "the crop height(h_w[0]) should be larger than 1";
+      CHECK_LE(static_cast<int>(param_.h_w[0]), static_cast<int>(data_shape[2])) <<
+          "the crop height(h_w[0]) should be less than the input data's height";
+      CHECK_GE(static_cast<int>(param_.h_w[1]), 1) <<
+          "the crop width(h_w[1]) should be larger than 1";
+      CHECK_LE(static_cast<int>(param_.h_w[1]), static_cast<int>(data_shape[3])) <<
+          "the crop width(h_w[1]) should be less than the input data's width";
+      crop_shape.push_back(param_.h_w[0]);
+      crop_shape.push_back(param_.h_w[1]);
+    } else if (param_.num_args == 2) {
+      TShape crop_like_shape = in_shape->at(crop_enum::kCropLike);
+      crop_shape.push_back(crop_like_shape[2]);
+      crop_shape.push_back(crop_like_shape[3]);
+    }
+    if (crop_shape.size() == 0) return false;
+    CHECK_EQ(crop_shape.size(), 2U) << \
+        "Input crop_like should be 2D in height-width";
+    out_shape->clear();
+    data_shape[2] = crop_shape[0];
+    data_shape[3] = crop_shape[1];
+    out_shape->push_back(data_shape);
+    return true;
+  }
+
+  OperatorProperty* Copy() const override {
+    auto ptr = new CropProp();
+    ptr->param_ = param_;
+    return ptr;
+  }
+
+  std::string TypeString() const override {
+    return "Crop";
+  }
+
+  std::vector<int> DeclareBackwardDependency(
+    const std::vector<int> &out_grad,
+    const std::vector<int> &in_data,
+    const std::vector<int> &out_data) const override {
+    return out_grad;
+  }
+
+  Operator* CreateOperator(Context ctx) const override;
+
+ private:
+  CropParam param_;
+};  // class CropProp
+#endif  // DMLC_USE_CXX11
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_CROP_INL_H_
diff --git a/src/operator/crop.cc b/src/operator/crop.cc
index f1233ba..8465819 100644
--- a/src/operator/crop.cc
+++ b/src/operator/crop.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file crop.cc
  * \brief
  * \author Wei Wu
diff --git a/src/operator/crop.cu b/src/operator/crop.cu
index 64f8cb2..0b51b14 100644
--- a/src/operator/crop.cu
+++ b/src/operator/crop.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file concat.cu
  * \brief
  * \author Wei Wu
diff --git a/src/operator/cross_device_copy.cc b/src/operator/cross_device_copy.cc
index a9a5f47..b32a68d 100644
--- a/src/operator/cross_device_copy.cc
+++ b/src/operator/cross_device_copy.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cross_device_copy.cc
  * \brief Special operator that copys NDArray
 */
diff --git a/src/operator/cudnn_activation-inl.h b/src/operator/cudnn_activation-inl.h
index 68f68b6..317ef47 100644
--- a/src/operator/cudnn_activation-inl.h
+++ b/src/operator/cudnn_activation-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_activation-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/cudnn_algoreg-inl.h b/src/operator/cudnn_algoreg-inl.h
index 0d1c394..1078d65 100644
--- a/src/operator/cudnn_algoreg-inl.h
+++ b/src/operator/cudnn_algoreg-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_algoreg-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/cudnn_algoreg.cc b/src/operator/cudnn_algoreg.cc
index 103c481..5aa8688 100644
--- a/src/operator/cudnn_algoreg.cc
+++ b/src/operator/cudnn_algoreg.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_algoreg.cc
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/cudnn_batch_norm-inl.h b/src/operator/cudnn_batch_norm-inl.h
index 6005b0f..b0c5f43 100644
--- a/src/operator/cudnn_batch_norm-inl.h
+++ b/src/operator/cudnn_batch_norm-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_batch_norm-inl.h
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/cudnn_batch_norm.cc b/src/operator/cudnn_batch_norm.cc
index 424299d..28c592b 100644
--- a/src/operator/cudnn_batch_norm.cc
+++ b/src/operator/cudnn_batch_norm.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_batch_norm.cc
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/cudnn_batch_norm.cu b/src/operator/cudnn_batch_norm.cu
index 3ab43ca..c16fc0c 100644
--- a/src/operator/cudnn_batch_norm.cu
+++ b/src/operator/cudnn_batch_norm.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_batch_norm.cu
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/cudnn_bilinear_sampler-inl.h b/src/operator/cudnn_bilinear_sampler-inl.h
index 8b012b7..57592da 100644
--- a/src/operator/cudnn_bilinear_sampler-inl.h
+++ b/src/operator/cudnn_bilinear_sampler-inl.h
@@ -1,167 +1,185 @@
-/*!
- * Copyright (c) 2016 by Contributors
- * \file cudnn_bilinear_sampler-inl.h
- * \brief
- * \author Xu Dong
-*/
-#ifndef MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
-#define MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
-
-#include <algorithm>
-#include <vector>
-#include "./bilinear_sampler-inl.h"
-namespace mxnet {
-namespace op {
-#if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
-template<typename DType>
-class CuDNNBilinearSamplerOp : public Operator {
- public:
-  explicit CuDNNBilinearSamplerOp(BilinearSamplerParam param) {
-    this->param_ = param;
-    init_cudnn_ = false;
-    dtype_ = mshadow::DataType<DType>::kCudnnFlag;
-    sampler_ = CUDNN_SAMPLER_BILINEAR;
-  }
-
-  ~CuDNNBilinearSamplerOp() {
-    if (init_cudnn_) {
-      CUDNN_CALL(cudnnDestroySpatialTransformerDescriptor(st_desc_));
-      CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc_));
-      CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc_));
-    }
-  }
-
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    CHECK_EQ(req[bs::kOut], kWriteTo);
-    CHECK_EQ(in_data.size(), 2U);
-    CHECK_EQ(out_data.size(), 2U);
-    Stream<gpu> *s = ctx.get_stream<gpu>();
-
-    Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
-    Tensor<gpu, 4, DType> grid = in_data[bs::kGrid].get<gpu, 4, DType>(s);
-    Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
-    Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
-    // grid_tmp : (batch, h, w, 2)
-    grid_tmp = transpose(grid, Shape4(0, 2, 3, 1));
-    if (!init_cudnn_) {
-     Init(s, in_data, out_data);
-    }
-    CHECK_EQ(data.CheckContiguous(), true);
-    CHECK_EQ(out.CheckContiguous(), true);
-    CHECK_EQ(grid_tmp.CheckContiguous(), true);
-    typename DataType<DType>::ScaleType alpha = 1.0f;
-    typename DataType<DType>::ScaleType beta = 0.0f;
-    CUDNN_CALL(cudnnSpatialTfSamplerForward(s->dnn_handle_,
-                                            st_desc_,
-                                            &alpha,
-                                            in_desc_,
-                                            data.dptr_,
-                                            grid_tmp.dptr_,
-                                            &beta,
-                                            out_desc_,
-                                            out.dptr_));
-  }
-
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    CHECK_NE(req[bs::kData], kWriteInplace);
-    CHECK_NE(req[bs::kGrid], kWriteInplace);
-    CHECK_EQ(in_data.size(), 2U);
-    CHECK_EQ(out_data.size(), 2U);
-    CHECK_EQ(out_grad.size(), 1U);
-    Stream<gpu> *s = ctx.get_stream<gpu>();
-    Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
-    Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
-    Tensor<gpu, 4, DType> gdata = in_grad[bs::kData].get<gpu, 4, DType>(s);
-    Tensor<gpu, 4, DType> ggrid = in_grad[bs::kGrid].get<gpu, 4, DType>(s);
-    Tensor<gpu, 4, DType> grad = out_grad[bs::kOut].get<gpu, 4, DType>(s);
-
-    typename DataType<DType>::ScaleType alpha = (req[bs::kData] == kNullOp) ? 0.0f : 1.0f;
-    typename DataType<DType>::ScaleType beta = (req[bs::kData] == kAddTo) ? 1.0f : 0.0f;
-    typename DataType<DType>::ScaleType alpha_dgrid = 1.0f;
-    typename DataType<DType>::ScaleType beta_dgrid = 0.0f;
-    CUDNN_CALL(cudnnSpatialTfSamplerBackward(s->dnn_handle_,
-                                             st_desc_,
-                                             &alpha,
-                                             in_desc_,
-                                             data.dptr_,
-                                             &beta,
-                                             in_desc_/*reuse in_desc_*/,
-                                             gdata.dptr_/*output*/,
-                                             &alpha_dgrid,
-                                             out_desc_/*reuse out_desc_*/,
-                                             grad.dptr_,
-                                             grid_tmp.dptr_,
-                                             &beta_dgrid,
-                                             grid_tmp.dptr_));
-    Assign(ggrid, req[bs::kGrid], transpose(grid_tmp, Shape4(0, 3, 1, 2)));
-  }
-
- private:
-  inline void Init(mshadow::Stream<gpu> *s,
-                   const std::vector<TBlob> &in_data,
-                   const std::vector<TBlob> &out_data) {
-    using namespace mshadow;
-    #if CUDNN_MAJOR >= 5
-    format_ = CUDNN_TENSOR_NCHW;
-    #endif
-    CHECK_EQ(in_data.size(), 2U);
-    CHECK_EQ(out_data.size(), 2U);
-    if (!init_cudnn_) {
-      init_cudnn_ = true;
-      Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
-      Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
-      CUDNN_CALL(cudnnCreateSpatialTransformerDescriptor(&st_desc_));
-      CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc_));
-      CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_));
-      CUDNN_CALL(cudnnSetTensor4dDescriptor(in_desc_,
-                                            format_,
-                                            dtype_,
-                                            data.size(0),
-                                            data.size(1),
-                                            data.size(2),
-                                            data.size(3)));
-      CUDNN_CALL(cudnnSetTensor4dDescriptor(out_desc_,
-                                            format_,
-                                            dtype_,
-                                            out.size(0),
-                                            out.size(1),
-                                            out.size(2),
-                                            out.size(3)));
-      int dim[] = {static_cast<int>(out.size(0)), static_cast<int>(out.size(1)),
-                   static_cast<int>(out.size(2)), static_cast<int>(out.size(3))};
-      CUDNN_CALL(cudnnSetSpatialTransformerNdDescriptor(st_desc_,
-                                                        sampler_,
-                                                        dtype_,
-                                                        4,
-                                                        dim));
-    }
-  }
-
-  bool init_cudnn_;
-  cudnnDataType_t dtype_;
-  cudnnSpatialTransformerDescriptor_t st_desc_;
-  cudnnTensorDescriptor_t in_desc_;
-  cudnnTensorDescriptor_t out_desc_;
-  cudnnSamplerType_t sampler_;
-  #if CUDNN_MAJOR >= 5
-  cudnnTensorFormat_t format_;
-  #endif
-  BilinearSamplerParam param_;
-};
-#endif  // __CUDACC__ && CUDNN
-}  // namespace op
-}  // namespace mxnet
-
-#endif  // MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file cudnn_bilinear_sampler-inl.h
+ * \brief
+ * \author Xu Dong
+*/
+#ifndef MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
+#define MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
+
+#include <algorithm>
+#include <vector>
+#include "./bilinear_sampler-inl.h"
+namespace mxnet {
+namespace op {
+#if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
+template<typename DType>
+class CuDNNBilinearSamplerOp : public Operator {
+ public:
+  explicit CuDNNBilinearSamplerOp(BilinearSamplerParam param) {
+    this->param_ = param;
+    init_cudnn_ = false;
+    dtype_ = mshadow::DataType<DType>::kCudnnFlag;
+    sampler_ = CUDNN_SAMPLER_BILINEAR;
+  }
+
+  ~CuDNNBilinearSamplerOp() {
+    if (init_cudnn_) {
+      CUDNN_CALL(cudnnDestroySpatialTransformerDescriptor(st_desc_));
+      CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc_));
+      CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc_));
+    }
+  }
+
+  virtual void Forward(const OpContext &ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &out_data,
+                       const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    CHECK_EQ(req[bs::kOut], kWriteTo);
+    CHECK_EQ(in_data.size(), 2U);
+    CHECK_EQ(out_data.size(), 2U);
+    Stream<gpu> *s = ctx.get_stream<gpu>();
+
+    Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
+    Tensor<gpu, 4, DType> grid = in_data[bs::kGrid].get<gpu, 4, DType>(s);
+    Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
+    Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
+    // grid_tmp : (batch, h, w, 2)
+    grid_tmp = transpose(grid, Shape4(0, 2, 3, 1));
+    if (!init_cudnn_) {
+     Init(s, in_data, out_data);
+    }
+    CHECK_EQ(data.CheckContiguous(), true);
+    CHECK_EQ(out.CheckContiguous(), true);
+    CHECK_EQ(grid_tmp.CheckContiguous(), true);
+    typename DataType<DType>::ScaleType alpha = 1.0f;
+    typename DataType<DType>::ScaleType beta = 0.0f;
+    CUDNN_CALL(cudnnSpatialTfSamplerForward(s->dnn_handle_,
+                                            st_desc_,
+                                            &alpha,
+                                            in_desc_,
+                                            data.dptr_,
+                                            grid_tmp.dptr_,
+                                            &beta,
+                                            out_desc_,
+                                            out.dptr_));
+  }
+
+  virtual void Backward(const OpContext &ctx,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &in_grad,
+                        const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    CHECK_NE(req[bs::kData], kWriteInplace);
+    CHECK_NE(req[bs::kGrid], kWriteInplace);
+    CHECK_EQ(in_data.size(), 2U);
+    CHECK_EQ(out_data.size(), 2U);
+    CHECK_EQ(out_grad.size(), 1U);
+    Stream<gpu> *s = ctx.get_stream<gpu>();
+    Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
+    Tensor<gpu, 4, DType> grid_tmp = out_data[bs::kTmp].get<gpu, 4, DType>(s);
+    Tensor<gpu, 4, DType> gdata = in_grad[bs::kData].get<gpu, 4, DType>(s);
+    Tensor<gpu, 4, DType> ggrid = in_grad[bs::kGrid].get<gpu, 4, DType>(s);
+    Tensor<gpu, 4, DType> grad = out_grad[bs::kOut].get<gpu, 4, DType>(s);
+
+    typename DataType<DType>::ScaleType alpha = (req[bs::kData] == kNullOp) ? 0.0f : 1.0f;
+    typename DataType<DType>::ScaleType beta = (req[bs::kData] == kAddTo) ? 1.0f : 0.0f;
+    typename DataType<DType>::ScaleType alpha_dgrid = 1.0f;
+    typename DataType<DType>::ScaleType beta_dgrid = 0.0f;
+    CUDNN_CALL(cudnnSpatialTfSamplerBackward(s->dnn_handle_,
+                                             st_desc_,
+                                             &alpha,
+                                             in_desc_,
+                                             data.dptr_,
+                                             &beta,
+                                             in_desc_/*reuse in_desc_*/,
+                                             gdata.dptr_/*output*/,
+                                             &alpha_dgrid,
+                                             out_desc_/*reuse out_desc_*/,
+                                             grad.dptr_,
+                                             grid_tmp.dptr_,
+                                             &beta_dgrid,
+                                             grid_tmp.dptr_));
+    Assign(ggrid, req[bs::kGrid], transpose(grid_tmp, Shape4(0, 3, 1, 2)));
+  }
+
+ private:
+  inline void Init(mshadow::Stream<gpu> *s,
+                   const std::vector<TBlob> &in_data,
+                   const std::vector<TBlob> &out_data) {
+    using namespace mshadow;
+    #if CUDNN_MAJOR >= 5
+    format_ = CUDNN_TENSOR_NCHW;
+    #endif
+    CHECK_EQ(in_data.size(), 2U);
+    CHECK_EQ(out_data.size(), 2U);
+    if (!init_cudnn_) {
+      init_cudnn_ = true;
+      Tensor<gpu, 4, DType> data = in_data[bs::kData].get<gpu, 4, DType>(s);
+      Tensor<gpu, 4, DType> out = out_data[bs::kOut].get<gpu, 4, DType>(s);
+      CUDNN_CALL(cudnnCreateSpatialTransformerDescriptor(&st_desc_));
+      CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc_));
+      CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_));
+      CUDNN_CALL(cudnnSetTensor4dDescriptor(in_desc_,
+                                            format_,
+                                            dtype_,
+                                            data.size(0),
+                                            data.size(1),
+                                            data.size(2),
+                                            data.size(3)));
+      CUDNN_CALL(cudnnSetTensor4dDescriptor(out_desc_,
+                                            format_,
+                                            dtype_,
+                                            out.size(0),
+                                            out.size(1),
+                                            out.size(2),
+                                            out.size(3)));
+      int dim[] = {static_cast<int>(out.size(0)), static_cast<int>(out.size(1)),
+                   static_cast<int>(out.size(2)), static_cast<int>(out.size(3))};
+      CUDNN_CALL(cudnnSetSpatialTransformerNdDescriptor(st_desc_,
+                                                        sampler_,
+                                                        dtype_,
+                                                        4,
+                                                        dim));
+    }
+  }
+
+  bool init_cudnn_;
+  cudnnDataType_t dtype_;
+  cudnnSpatialTransformerDescriptor_t st_desc_;
+  cudnnTensorDescriptor_t in_desc_;
+  cudnnTensorDescriptor_t out_desc_;
+  cudnnSamplerType_t sampler_;
+  #if CUDNN_MAJOR >= 5
+  cudnnTensorFormat_t format_;
+  #endif
+  BilinearSamplerParam param_;
+};
+#endif  // __CUDACC__ && CUDNN
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_CUDNN_BILINEAR_SAMPLER_INL_H_
diff --git a/src/operator/cudnn_convolution-inl.h b/src/operator/cudnn_convolution-inl.h
index 06887a9..e966b56 100644
--- a/src/operator/cudnn_convolution-inl.h
+++ b/src/operator/cudnn_convolution-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_convolution-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/cudnn_deconvolution-inl.h b/src/operator/cudnn_deconvolution-inl.h
index 2e2ae3a..8c8f055 100644
--- a/src/operator/cudnn_deconvolution-inl.h
+++ b/src/operator/cudnn_deconvolution-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file cudnn_deconvolution-inl.h
  * \brief
  * \author Wei Wu, Leonard Lausen
diff --git a/src/operator/cudnn_lrn-inl.h b/src/operator/cudnn_lrn-inl.h
index d65a678..241ec70 100644
--- a/src/operator/cudnn_lrn-inl.h
+++ b/src/operator/cudnn_lrn-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_lrn-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/cudnn_pooling-inl.h b/src/operator/cudnn_pooling-inl.h
index 3c9344e..5b03fe5 100644
--- a/src/operator/cudnn_pooling-inl.h
+++ b/src/operator/cudnn_pooling-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_pooling-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/cudnn_rnn-inl.h b/src/operator/cudnn_rnn-inl.h
index 17acf4a..1122aff 100644
--- a/src/operator/cudnn_rnn-inl.h
+++ b/src/operator/cudnn_rnn-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file cudnn_rnn-inl.h
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/cudnn_softmax_activation-inl.h b/src/operator/cudnn_softmax_activation-inl.h
index 86c2731..c604a8f 100644
--- a/src/operator/cudnn_softmax_activation-inl.h
+++ b/src/operator/cudnn_softmax_activation-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cudnn_activation-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/cudnn_spatial_transformer-inl.h b/src/operator/cudnn_spatial_transformer-inl.h
index b25e8ce..fc76784 100644
--- a/src/operator/cudnn_spatial_transformer-inl.h
+++ b/src/operator/cudnn_spatial_transformer-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file cudnn_spatial_transformer-inl.h
  * \brief
  * \author Wei Wu
diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h
index 3c688fe..4b2d620 100644
--- a/src/operator/custom/custom-inl.h
+++ b/src/operator/custom/custom-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file native_op-inl.h
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc
index 5a40be9..7b257ba 100644
--- a/src/operator/custom/custom.cc
+++ b/src/operator/custom/custom.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file custom.cc
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/custom/native_op-inl.h b/src/operator/custom/native_op-inl.h
index 780b0ae..ebce186 100644
--- a/src/operator/custom/native_op-inl.h
+++ b/src/operator/custom/native_op-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file native_op-inl.h
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/custom/native_op.cc b/src/operator/custom/native_op.cc
index 2ccd286..5dd3504 100644
--- a/src/operator/custom/native_op.cc
+++ b/src/operator/custom/native_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file native_op.cc
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/custom/native_op.cu b/src/operator/custom/native_op.cu
index 8075926..ad8d65e 100644
--- a/src/operator/custom/native_op.cu
+++ b/src/operator/custom/native_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file native_op.cu
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/custom/ndarray_op-inl.h b/src/operator/custom/ndarray_op-inl.h
index fa4208f..b3a4662 100644
--- a/src/operator/custom/ndarray_op-inl.h
+++ b/src/operator/custom/ndarray_op-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file native_op-inl.h
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/custom/ndarray_op.cc b/src/operator/custom/ndarray_op.cc
index 9815f88..48426ba 100644
--- a/src/operator/custom/ndarray_op.cc
+++ b/src/operator/custom/ndarray_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file ndarray_op.cc
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/deconvolution-inl.h b/src/operator/deconvolution-inl.h
index 909a6fd..4353013 100644
--- a/src/operator/deconvolution-inl.h
+++ b/src/operator/deconvolution-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file deconvolution-inl.h
  * \brief
  * \author Wei Wu
diff --git a/src/operator/deconvolution.cc b/src/operator/deconvolution.cc
index 397bd00..6a59ff6 100644
--- a/src/operator/deconvolution.cc
+++ b/src/operator/deconvolution.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file deconvolution.cc
  * \brief
  * \author Wei Wu
diff --git a/src/operator/deconvolution.cu b/src/operator/deconvolution.cu
index 60cf0ad..b9dd1c1 100644
--- a/src/operator/deconvolution.cu
+++ b/src/operator/deconvolution.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file deconvolution.cu
  * \brief
  * \author Wei Wu
diff --git a/src/operator/dropout-inl.h b/src/operator/dropout-inl.h
index 57d7814..b2fb782 100644
--- a/src/operator/dropout-inl.h
+++ b/src/operator/dropout-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file dropout-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/dropout.cc b/src/operator/dropout.cc
index e206214..af65578 100644
--- a/src/operator/dropout.cc
+++ b/src/operator/dropout.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file dropout.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/dropout.cu b/src/operator/dropout.cu
index ea9eb7d..5265d80 100644
--- a/src/operator/dropout.cu
+++ b/src/operator/dropout.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file dropout.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/elemwise_op_common.h b/src/operator/elemwise_op_common.h
index 228303c..9b398f9 100644
--- a/src/operator/elemwise_op_common.h
+++ b/src/operator/elemwise_op_common.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
-* Copyright (c) 2016 by Contributors
 * \file elemwise_op_common.h
 * \brief common function used for broadcasting and reducing
 * \author Xingjian Shi
diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h
index 94616bc..e2fab9f 100644
--- a/src/operator/fully_connected-inl.h
+++ b/src/operator/fully_connected-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file fully_connect_op-inl.h
  * \brief fully connect operator and symbol
 */
diff --git a/src/operator/fully_connected.cc b/src/operator/fully_connected.cc
index 56cf4f6..5dbaf8c 100644
--- a/src/operator/fully_connected.cc
+++ b/src/operator/fully_connected.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file fully_connected.cc
  * \brief fully connect operator
 */
diff --git a/src/operator/fully_connected.cu b/src/operator/fully_connected.cu
index 7b834a3..28a0307 100644
--- a/src/operator/fully_connected.cu
+++ b/src/operator/fully_connected.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file fully_connected.cu
  * \brief fully connect operator
 */
diff --git a/src/operator/grid_generator-inl.h b/src/operator/grid_generator-inl.h
index 04fe7ec..65fb8cc 100644
--- a/src/operator/grid_generator-inl.h
+++ b/src/operator/grid_generator-inl.h
@@ -1,318 +1,336 @@
-/*!
- * Copyright (c) 2017 by Contributors
- * \file grid_generator-inl.h
- * \brief
- * The operator generate sampling grid
- * \author Xu Dong
-*/
-#ifndef MXNET_OPERATOR_GRID_GENERATOR_INL_H_
-#define MXNET_OPERATOR_GRID_GENERATOR_INL_H_
-
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <mxnet/operator.h>
-#include <vector>
-#include <map>
-#include <utility>
-#include <string>
-#include "./mshadow_op.h"
-#include "./operator_common.h"
-
-namespace mxnet {
-namespace op {
-
-namespace grid {
-enum GridGeneratorOpInputs {kData};
-enum GridGeneratorOpOutputs {kOut, kGridDst};
-enum GridGeneratorOpResource {kTempSpace};
-enum GridGeneratorTransformType {kAffine, kWarp};
-}
-
-struct GridGeneratorParam : public dmlc::Parameter<GridGeneratorParam> {
-  int transform_type;
-  TShape target_shape;
-  DMLC_DECLARE_PARAMETER(GridGeneratorParam) {
-    int shape[] = {0, 0};
-    DMLC_DECLARE_FIELD(transform_type)
-    .add_enum("affine", grid::kAffine)
-    .add_enum("warp", grid::kWarp)
-    .describe("The type of transformation. For `affine`, input data should be an affine matrix "
-              "of size (batch, 6). For `warp`, input data should be an optical flow of size "
-              "(batch, 2, h, w).");
-    DMLC_DECLARE_FIELD(target_shape).set_default(TShape(shape, shape + 2))
-    .describe("Specifies the output shape (H, W). This is required if transformation type is "
-              "`affine`. If transformation type is `warp`, this parameter is ignored.");
-  }
-};
-
-template<typename xpu, typename DType>
-class GridGeneratorOp : public Operator {
- public:
-  explicit GridGeneratorOp(GridGeneratorParam p) {
-    this->param_ = p;
-  }
-
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(req[grid::kOut], kWriteTo);
-    CHECK_EQ(in_data.size(), 1U);
-    CHECK_EQ(out_data.size(), 2U);
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    switch (param_.transform_type) {
-      case grid::kAffine: {
-        // if transform_type is affine, data is affine matrix, input shape : (batch, 2, 3)
-        Tensor<xpu, 2, DType> out = out_data[grid::kOut].
-          get_with_shape<xpu, 2, DType>(Shape2(out_data[grid::kOut].shape_[0] * 2,
-                            out_data[grid::kOut].shape_[2] * out_data[grid::kOut].shape_[3]), s);
-        Tensor<xpu, 2, DType> grid_dst = out_data[grid::kGridDst].get<xpu, 2, DType>(s);
-        Shape<2> data_shape = Shape2(out_data[grid::kOut].shape_[0] * 2, 3);
-        Tensor<xpu, 2, DType> data = in_data[grid::kData]
-          .get_with_shape<xpu, 2, DType>(data_shape, s);
-        // x, y, 1
-        grid_dst[0] = range<DType>(0, grid_dst.shape_[1]);
-        grid_dst[0] = grid_dst[0] - tcast<DType>(tcast<int>(grid_dst[0] /
-          scalar<DType>(param_.target_shape[1]))) * scalar<DType>(param_.target_shape[1]);
-        grid_dst[0] = scalar<DType>(-1.0) + grid_dst[0] *
-          scalar<DType>(2.0 / (param_.target_shape[1] - 1));
-        grid_dst[1] = range<DType>(0, grid_dst.shape_[1]);
-        grid_dst[1] = scalar<DType>(-1.0) + tcast<DType>(tcast<int>(grid_dst[1] /
-          scalar<DType>(param_.target_shape[1]))) * scalar<DType>(2.0/(param_.target_shape[0] - 1));
-        grid_dst[2] = scalar<DType>(1.0);
-        Assign(out, req[grid::kOut], dot(data, grid_dst));
-        break;
-      }
-      // Warping transformation
-      case grid::kWarp: {
-        // if transform_type is warp, data is optical flow, input shape : (batch, 2, height, width)
-        // grid_src = grid_dst + optical flow
-        Tensor<xpu, 4, DType> data = in_data[grid::kData].get<xpu, 4, DType>(s);
-        Tensor<xpu, 4, DType> out = out_data[grid::kOut].get<xpu, 4, DType>(s);
-        // grid_dst : (2, H, W)
-        Tensor<xpu, 3, DType> grid_dst = out_data[grid::kGridDst].get<xpu, 3, DType>(s);
-        Tensor<xpu, 2, DType> workspace = ctx.requested[grid::kTempSpace]
-          .get_space_typed<xpu, 2, DType>(Shape2(2, 1), s);
-        grid_dst[0] = repmat(range<DType>(0, data.size(3)), data.size(2));
-        grid_dst[1] = reshape(range<DType>(0, data.size(2), 1, data.size(3)),
-                              Shape2(data.size(2), data.size(3)));
-        workspace[0] = scalar<DType>((DType(data.size(3)) - 1.0) / 2.0);
-        workspace[1] = scalar<DType>((DType(data.size(2)) - 1.0) / 2.0);
-        Assign(out, req[grid::kOut],
-               (data + broadcast_with_axis(grid_dst, -1, data.shape_[0])) /
-                 broadcast_to(reshape(workspace, Shape4(1, 2, 1, 1)),
-                              TShape(data.shape_)) - scalar<DType>(1));
-        break;
-      }
-    }
-  }
-
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(in_data.size(), 1U);
-    CHECK_EQ(out_data.size(), 2U);
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    switch (param_.transform_type) {
-      case grid::kAffine: {
-        Tensor<xpu, 2, DType> grid_dst = out_data[grid::kGridDst].get<xpu, 2, DType>(s);
-        Shape<2> data_shape = Shape2(in_grad[grid::kData].shape_[0] * 2, 3);
-        Tensor<xpu, 2, DType> gdata = in_grad[grid::kData]
-          .get_with_shape<xpu, 2, DType>(data_shape, s);
-        Shape<2> grad_shape = Shape2(out_grad[grid::kOut].shape_[0] * 2,
-          param_.target_shape[0] * param_.target_shape[1]);
-        Tensor<xpu, 2, DType> grad = out_grad[grid::kOut]
-          .get_with_shape<xpu, 2, DType>(grad_shape, s);
-        // grad : (batch * 2, H * W)   grid_dst.T : (H * W, 3)
-        Assign(gdata, req[grid::kData] , dot(grad, grid_dst.T()));
-        break;
-      }
-      case grid::kWarp: {
-        Tensor<xpu, 4, DType> grad = out_grad[grid::kOut].get<xpu, 4, DType>(s);
-        Tensor<xpu, 4, DType> gdata = in_grad[grid::kData].get<xpu, 4, DType>(s);
-        Tensor<xpu, 2, DType> workspace = ctx.requested[grid::kTempSpace]
-          .get_space_typed<xpu, 2, DType>(Shape2(2, 1), s);
-        workspace[0] = scalar<DType>((DType(gdata.size(3)) - 1.0) / 2.0);
-        workspace[1] = scalar<DType>((DType(gdata.size(2)) - 1.0) / 2.0);
-        Assign(gdata, req[grid::kData],
-               grad / broadcast_to(reshape(workspace, Shape4(1, 2, 1, 1)),
-                                   TShape(gdata.shape_)));
-        break;
-      }
-    }
-  }
-
- private:
-  GridGeneratorParam param_;
-};  // class GridGeneratorOp
-
-template<typename xpu>
-Operator* CreateOp(GridGeneratorParam param, int dtype);
-
-#if DMLC_USE_CXX11
-class GridGeneratorProp : public OperatorProperty {
- public:
-  int NumVisibleOutputs() const override {
-    return 1;
-  }
-
-  int NumOutputs() const override {
-    return 2;
-  }
-
-  std::vector<std::string> ListArguments() const override {
-    return {"data"};
-  }
-
-  std::vector<std::string> ListOutputs() const override {
-    return {"output", "grid_dst"};
-  }
-
-  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
-    param_.Init(kwargs);
-  }
-
-  std::map<std::string, std::string> GetParams() const override {
-    return param_.__DICT__();
-  }
-
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
-    using namespace mshadow;
-    CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
-    const TShape &lshape = (*in_shape)[grid::kData];
-    if (lshape.ndim() ==  0) return false;
-    out_shape->clear();
-    switch (param_.transform_type) {
-      case grid::kAffine: {
-        CHECK_EQ(lshape.ndim(), 2U) \
-          << "if transform_type is affine, data is affine matrix"
-          "affine matrix should be 2D in batch-num_hidden";
-        CHECK_EQ(lshape[1], 6U) << "incorrect data shape[1], should be 6";
-        CHECK_GT(param_.target_shape[0], 0U) \
-            << "incorrect target_shape: " << param_.target_shape[0];
-        CHECK_GT(param_.target_shape[1], 0U) \
-            << "incorrect target_shape: " << param_.target_shape[1];
-        out_shape->push_back(Shape4(lshape[0], 2, param_.target_shape[0], param_.target_shape[1]));
-        out_shape->push_back(Shape2(3, param_.target_shape[0] * param_.target_shape[1]));
-        break;
-      }
-      case grid::kWarp: {
-        CHECK_EQ(lshape.ndim(), 4U) \
-          << "if transform_type is warp, data is optical flow"
-             "optical flow should be 4D in batch-num_hidden-y-x";
-        CHECK_EQ(lshape[1], 2U) << "incorrect data shape[1], should be 2";
-        out_shape->push_back(lshape);
-        out_shape->push_back(Shape3(2, lshape[2], lshape[3]));
-        break;
-      }
-    }
-    return true;
-  }
-
-  bool InferType(std::vector<int> *in_type,
-                   std::vector<int> *out_type,
-                   std::vector<int> *aux_type) const override {
-      int dtype = -1;
-      for (size_t i = 0; i < in_type->size(); ++i) {
-        if (dtype == -1) {
-          dtype = in_type->at(i);
-        } else {
-          CHECK(in_type->at(i) == dtype ||
-                in_type->at(i) == -1) <<
-                "Non-uniform data type in GridGenerator";
-        }
-      }
-      if (dtype == -1) {
-        LOG(FATAL) << "Not enough information to infer type in GridGenerator.";
-        return false;
-      }
-      size_t nin = this->ListArguments().size();
-      in_type->clear();
-      for (size_t i = 0; i < nin; ++i) in_type->push_back(dtype);
-      size_t naux = this->ListAuxiliaryStates().size();
-      aux_type->clear();
-      for (size_t i = 0; i < naux; ++i) aux_type->push_back(dtype);
-      size_t nout = this->ListOutputs().size();
-      out_type->clear();
-      for (size_t i = 0; i < nout; ++i) out_type->push_back(dtype);
-      return true;
-    }
-
-  OperatorProperty* Copy() const override {
-    auto ptr = new GridGeneratorProp();
-    ptr->param_ = param_;
-    return ptr;
-  }
-
-  std::string TypeString() const override {
-    return "GridGenerator";
-  }
-
-  std::vector<int> DeclareBackwardDependency(
-    const std::vector<int> &out_grad,
-    const std::vector<int> &in_data,
-    const std::vector<int> &out_data) const override {
-    switch (param_.transform_type) {
-      case grid::kAffine: {
-        return {out_grad[grid::kOut],
-                out_data[grid::kGridDst]};
-      }
-      case grid::kWarp: {
-        return {out_grad[grid::kOut]};
-      }
-    }
-    return {};
-  }
-
-  std::vector<ResourceRequest> ForwardResource(
-    const std::vector<TShape> &in_shape) const override {
-    switch (param_.transform_type) {
-    case grid::kAffine: {
-      return{};
-    }
-    case grid::kWarp: {
-      return{ ResourceRequest::kTempSpace };
-    }
-    }
-    return{};
-  }
-
-  std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
-    switch (param_.transform_type) {
-      case grid::kAffine: {
-        return {};
-      }
-      case grid::kWarp: {
-        return {ResourceRequest::kTempSpace};
-      }
-    }
-    return {};
-  }
-
-  Operator* CreateOperator(Context ctx) const override {
-    LOG(FATAL) << "Not Implemented.";
-    return NULL;
-  }
-
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
-                             std::vector<int> *in_type) const override;
-
- private:
-  GridGeneratorParam param_;
-};  // class GridGeneratorProp
-#endif  // DMLC_USE_CXX11
-}  // namespace op
-}  // namespace mxnet
-#endif  // MXNET_OPERATOR_GRID_GENERATOR_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file grid_generator-inl.h
+ * \brief
+ * The operator generate sampling grid
+ * \author Xu Dong
+*/
+#ifndef MXNET_OPERATOR_GRID_GENERATOR_INL_H_
+#define MXNET_OPERATOR_GRID_GENERATOR_INL_H_
+
+#include <dmlc/logging.h>
+#include <dmlc/parameter.h>
+#include <mxnet/operator.h>
+#include <vector>
+#include <map>
+#include <utility>
+#include <string>
+#include "./mshadow_op.h"
+#include "./operator_common.h"
+
+namespace mxnet {
+namespace op {
+
+namespace grid {
+enum GridGeneratorOpInputs {kData};
+enum GridGeneratorOpOutputs {kOut, kGridDst};
+enum GridGeneratorOpResource {kTempSpace};
+enum GridGeneratorTransformType {kAffine, kWarp};
+}
+
+struct GridGeneratorParam : public dmlc::Parameter<GridGeneratorParam> {
+  int transform_type;
+  TShape target_shape;
+  DMLC_DECLARE_PARAMETER(GridGeneratorParam) {
+    int shape[] = {0, 0};
+    DMLC_DECLARE_FIELD(transform_type)
+    .add_enum("affine", grid::kAffine)
+    .add_enum("warp", grid::kWarp)
+    .describe("The type of transformation. For `affine`, input data should be an affine matrix "
+              "of size (batch, 6). For `warp`, input data should be an optical flow of size "
+              "(batch, 2, h, w).");
+    DMLC_DECLARE_FIELD(target_shape).set_default(TShape(shape, shape + 2))
+    .describe("Specifies the output shape (H, W). This is required if transformation type is "
+              "`affine`. If transformation type is `warp`, this parameter is ignored.");
+  }
+};
+
+template<typename xpu, typename DType>
+class GridGeneratorOp : public Operator {
+ public:
+  explicit GridGeneratorOp(GridGeneratorParam p) {
+    this->param_ = p;
+  }
+
+  virtual void Forward(const OpContext &ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &out_data,
+                       const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(req[grid::kOut], kWriteTo);
+    CHECK_EQ(in_data.size(), 1U);
+    CHECK_EQ(out_data.size(), 2U);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    switch (param_.transform_type) {
+      case grid::kAffine: {
+        // if transform_type is affine, data is affine matrix, input shape : (batch, 2, 3)
+        Tensor<xpu, 2, DType> out = out_data[grid::kOut].
+          get_with_shape<xpu, 2, DType>(Shape2(out_data[grid::kOut].shape_[0] * 2,
+                            out_data[grid::kOut].shape_[2] * out_data[grid::kOut].shape_[3]), s);
+        Tensor<xpu, 2, DType> grid_dst = out_data[grid::kGridDst].get<xpu, 2, DType>(s);
+        Shape<2> data_shape = Shape2(out_data[grid::kOut].shape_[0] * 2, 3);
+        Tensor<xpu, 2, DType> data = in_data[grid::kData]
+          .get_with_shape<xpu, 2, DType>(data_shape, s);
+        // x, y, 1
+        grid_dst[0] = range<DType>(0, grid_dst.shape_[1]);
+        grid_dst[0] = grid_dst[0] - tcast<DType>(tcast<int>(grid_dst[0] /
+          scalar<DType>(param_.target_shape[1]))) * scalar<DType>(param_.target_shape[1]);
+        grid_dst[0] = scalar<DType>(-1.0) + grid_dst[0] *
+          scalar<DType>(2.0 / (param_.target_shape[1] - 1));
+        grid_dst[1] = range<DType>(0, grid_dst.shape_[1]);
+        grid_dst[1] = scalar<DType>(-1.0) + tcast<DType>(tcast<int>(grid_dst[1] /
+          scalar<DType>(param_.target_shape[1]))) * scalar<DType>(2.0/(param_.target_shape[0] - 1));
+        grid_dst[2] = scalar<DType>(1.0);
+        Assign(out, req[grid::kOut], dot(data, grid_dst));
+        break;
+      }
+      // Warping transformation
+      case grid::kWarp: {
+        // if transform_type is warp, data is optical flow, input shape : (batch, 2, height, width)
+        // grid_src = grid_dst + optical flow
+        Tensor<xpu, 4, DType> data = in_data[grid::kData].get<xpu, 4, DType>(s);
+        Tensor<xpu, 4, DType> out = out_data[grid::kOut].get<xpu, 4, DType>(s);
+        // grid_dst : (2, H, W)
+        Tensor<xpu, 3, DType> grid_dst = out_data[grid::kGridDst].get<xpu, 3, DType>(s);
+        Tensor<xpu, 2, DType> workspace = ctx.requested[grid::kTempSpace]
+          .get_space_typed<xpu, 2, DType>(Shape2(2, 1), s);
+        grid_dst[0] = repmat(range<DType>(0, data.size(3)), data.size(2));
+        grid_dst[1] = reshape(range<DType>(0, data.size(2), 1, data.size(3)),
+                              Shape2(data.size(2), data.size(3)));
+        workspace[0] = scalar<DType>((DType(data.size(3)) - 1.0) / 2.0);
+        workspace[1] = scalar<DType>((DType(data.size(2)) - 1.0) / 2.0);
+        Assign(out, req[grid::kOut],
+               (data + broadcast_with_axis(grid_dst, -1, data.shape_[0])) /
+                 broadcast_to(reshape(workspace, Shape4(1, 2, 1, 1)),
+                              TShape(data.shape_)) - scalar<DType>(1));
+        break;
+      }
+    }
+  }
+
+  virtual void Backward(const OpContext &ctx,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &in_grad,
+                        const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(in_data.size(), 1U);
+    CHECK_EQ(out_data.size(), 2U);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    switch (param_.transform_type) {
+      case grid::kAffine: {
+        Tensor<xpu, 2, DType> grid_dst = out_data[grid::kGridDst].get<xpu, 2, DType>(s);
+        Shape<2> data_shape = Shape2(in_grad[grid::kData].shape_[0] * 2, 3);
+        Tensor<xpu, 2, DType> gdata = in_grad[grid::kData]
+          .get_with_shape<xpu, 2, DType>(data_shape, s);
+        Shape<2> grad_shape = Shape2(out_grad[grid::kOut].shape_[0] * 2,
+          param_.target_shape[0] * param_.target_shape[1]);
+        Tensor<xpu, 2, DType> grad = out_grad[grid::kOut]
+          .get_with_shape<xpu, 2, DType>(grad_shape, s);
+        // grad : (batch * 2, H * W)   grid_dst.T : (H * W, 3)
+        Assign(gdata, req[grid::kData] , dot(grad, grid_dst.T()));
+        break;
+      }
+      case grid::kWarp: {
+        Tensor<xpu, 4, DType> grad = out_grad[grid::kOut].get<xpu, 4, DType>(s);
+        Tensor<xpu, 4, DType> gdata = in_grad[grid::kData].get<xpu, 4, DType>(s);
+        Tensor<xpu, 2, DType> workspace = ctx.requested[grid::kTempSpace]
+          .get_space_typed<xpu, 2, DType>(Shape2(2, 1), s);
+        workspace[0] = scalar<DType>((DType(gdata.size(3)) - 1.0) / 2.0);
+        workspace[1] = scalar<DType>((DType(gdata.size(2)) - 1.0) / 2.0);
+        Assign(gdata, req[grid::kData],
+               grad / broadcast_to(reshape(workspace, Shape4(1, 2, 1, 1)),
+                                   TShape(gdata.shape_)));
+        break;
+      }
+    }
+  }
+
+ private:
+  GridGeneratorParam param_;
+};  // class GridGeneratorOp
+
+template<typename xpu>
+Operator* CreateOp(GridGeneratorParam param, int dtype);
+
+#if DMLC_USE_CXX11
+class GridGeneratorProp : public OperatorProperty {
+ public:
+  int NumVisibleOutputs() const override {
+    return 1;
+  }
+
+  int NumOutputs() const override {
+    return 2;
+  }
+
+  std::vector<std::string> ListArguments() const override {
+    return {"data"};
+  }
+
+  std::vector<std::string> ListOutputs() const override {
+    return {"output", "grid_dst"};
+  }
+
+  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
+    param_.Init(kwargs);
+  }
+
+  std::map<std::string, std::string> GetParams() const override {
+    return param_.__DICT__();
+  }
+
+  bool InferShape(std::vector<TShape> *in_shape,
+                  std::vector<TShape> *out_shape,
+                  std::vector<TShape> *aux_shape) const override {
+    using namespace mshadow;
+    CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
+    const TShape &lshape = (*in_shape)[grid::kData];
+    if (lshape.ndim() ==  0) return false;
+    out_shape->clear();
+    switch (param_.transform_type) {
+      case grid::kAffine: {
+        CHECK_EQ(lshape.ndim(), 2U) \
+          << "if transform_type is affine, data is affine matrix"
+          "affine matrix should be 2D in batch-num_hidden";
+        CHECK_EQ(lshape[1], 6U) << "incorrect data shape[1], should be 6";
+        CHECK_GT(param_.target_shape[0], 0U) \
+            << "incorrect target_shape: " << param_.target_shape[0];
+        CHECK_GT(param_.target_shape[1], 0U) \
+            << "incorrect target_shape: " << param_.target_shape[1];
+        out_shape->push_back(Shape4(lshape[0], 2, param_.target_shape[0], param_.target_shape[1]));
+        out_shape->push_back(Shape2(3, param_.target_shape[0] * param_.target_shape[1]));
+        break;
+      }
+      case grid::kWarp: {
+        CHECK_EQ(lshape.ndim(), 4U) \
+          << "if transform_type is warp, data is optical flow"
+             "optical flow should be 4D in batch-num_hidden-y-x";
+        CHECK_EQ(lshape[1], 2U) << "incorrect data shape[1], should be 2";
+        out_shape->push_back(lshape);
+        out_shape->push_back(Shape3(2, lshape[2], lshape[3]));
+        break;
+      }
+    }
+    return true;
+  }
+
+  bool InferType(std::vector<int> *in_type,
+                   std::vector<int> *out_type,
+                   std::vector<int> *aux_type) const override {
+      int dtype = -1;
+      for (size_t i = 0; i < in_type->size(); ++i) {
+        if (dtype == -1) {
+          dtype = in_type->at(i);
+        } else {
+          CHECK(in_type->at(i) == dtype ||
+                in_type->at(i) == -1) <<
+                "Non-uniform data type in GridGenerator";
+        }
+      }
+      if (dtype == -1) {
+        LOG(FATAL) << "Not enough information to infer type in GridGenerator.";
+        return false;
+      }
+      size_t nin = this->ListArguments().size();
+      in_type->clear();
+      for (size_t i = 0; i < nin; ++i) in_type->push_back(dtype);
+      size_t naux = this->ListAuxiliaryStates().size();
+      aux_type->clear();
+      for (size_t i = 0; i < naux; ++i) aux_type->push_back(dtype);
+      size_t nout = this->ListOutputs().size();
+      out_type->clear();
+      for (size_t i = 0; i < nout; ++i) out_type->push_back(dtype);
+      return true;
+    }
+
+  OperatorProperty* Copy() const override {
+    auto ptr = new GridGeneratorProp();
+    ptr->param_ = param_;
+    return ptr;
+  }
+
+  std::string TypeString() const override {
+    return "GridGenerator";
+  }
+
+  std::vector<int> DeclareBackwardDependency(
+    const std::vector<int> &out_grad,
+    const std::vector<int> &in_data,
+    const std::vector<int> &out_data) const override {
+    switch (param_.transform_type) {
+      case grid::kAffine: {
+        return {out_grad[grid::kOut],
+                out_data[grid::kGridDst]};
+      }
+      case grid::kWarp: {
+        return {out_grad[grid::kOut]};
+      }
+    }
+    return {};
+  }
+
+  std::vector<ResourceRequest> ForwardResource(
+    const std::vector<TShape> &in_shape) const override {
+    switch (param_.transform_type) {
+    case grid::kAffine: {
+      return{};
+    }
+    case grid::kWarp: {
+      return{ ResourceRequest::kTempSpace };
+    }
+    }
+    return{};
+  }
+
+  std::vector<ResourceRequest> BackwardResource(
+      const std::vector<TShape> &in_shape) const override {
+    switch (param_.transform_type) {
+      case grid::kAffine: {
+        return {};
+      }
+      case grid::kWarp: {
+        return {ResourceRequest::kTempSpace};
+      }
+    }
+    return {};
+  }
+
+  Operator* CreateOperator(Context ctx) const override {
+    LOG(FATAL) << "Not Implemented.";
+    return NULL;
+  }
+
+  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+                             std::vector<int> *in_type) const override;
+
+ private:
+  GridGeneratorParam param_;
+};  // class GridGeneratorProp
+#endif  // DMLC_USE_CXX11
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_GRID_GENERATOR_INL_H_
diff --git a/src/operator/grid_generator.cc b/src/operator/grid_generator.cc
index 62ff75a..411f856 100644
--- a/src/operator/grid_generator.cc
+++ b/src/operator/grid_generator.cc
@@ -1,36 +1,54 @@
-/*!
- * Copyright (c) 2017 by Contributors
- * \file grid_generator.cc
- * \brief
- * \author Xu Dong
-*/
-
-#include "./grid_generator-inl.h"
-
-namespace mxnet {
-namespace op {
-template<>
-Operator* CreateOp<cpu>(GridGeneratorParam param, int dtype) {
-  Operator *op = NULL;
-  if (dtype == mshadow::kFloat32) {
-    op = new GridGeneratorOp<cpu, float>(param);
-  } else {
-    LOG(FATAL) << "Other DTypes are not supported!";
-  }
-  return op;
-}
-
-Operator *GridGeneratorProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
-                                     std::vector<int> *in_type) const {
-  DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
-}
-
-DMLC_REGISTER_PARAMETER(GridGeneratorParam);
-
-MXNET_REGISTER_OP_PROPERTY(GridGenerator, GridGeneratorProp)
-.add_argument("data", "NDArray-or-Symbol", "Input data to the function.")
-.add_arguments(GridGeneratorParam::__FIELDS__())
-.describe("Generates 2D sampling grid for bilinear sampling.");
-
-}  // namespace op
-}  // namespace mxnet
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file grid_generator.cc
+ * \brief
+ * \author Xu Dong
+*/
+
+#include "./grid_generator-inl.h"
+
+namespace mxnet {
+namespace op {
+template<>
+Operator* CreateOp<cpu>(GridGeneratorParam param, int dtype) {
+  Operator *op = NULL;
+  if (dtype == mshadow::kFloat32) {
+    op = new GridGeneratorOp<cpu, float>(param);
+  } else {
+    LOG(FATAL) << "Other DTypes are not supported!";
+  }
+  return op;
+}
+
+Operator *GridGeneratorProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+                                     std::vector<int> *in_type) const {
+  DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
+}
+
+DMLC_REGISTER_PARAMETER(GridGeneratorParam);
+
+MXNET_REGISTER_OP_PROPERTY(GridGenerator, GridGeneratorProp)
+.add_argument("data", "NDArray-or-Symbol", "Input data to the function.")
+.add_arguments(GridGeneratorParam::__FIELDS__())
+.describe("Generates 2D sampling grid for bilinear sampling.");
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/grid_generator.cu b/src/operator/grid_generator.cu
index 991948c..7c0a802 100644
--- a/src/operator/grid_generator.cu
+++ b/src/operator/grid_generator.cu
@@ -1,21 +1,39 @@
-/*!
- * Copyright (c) 2017 by Contributors
- * \file grid_generator.cu
- * \brief
- * \author Xu Dong
-*/
-
-#include "./grid_generator-inl.h"
-
-namespace mxnet {
-namespace op {
-template<>
-Operator* CreateOp<gpu>(GridGeneratorParam param, int dtype) {
-  Operator *op = NULL;
-  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
-    op = new GridGeneratorOp<gpu, DType>(param);
-  })
-  return op;
-}
-}  // namespace op
-}  // namespace mxnet
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file grid_generator.cu
+ * \brief
+ * \author Xu Dong
+*/
+
+#include "./grid_generator-inl.h"
+
+namespace mxnet {
+namespace op {
+template<>
+Operator* CreateOp<gpu>(GridGeneratorParam param, int dtype) {
+  Operator *op = NULL;
+  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+    op = new GridGeneratorOp<gpu, DType>(param);
+  })
+  return op;
+}
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/identity_attach_KL_sparse_reg-inl.h b/src/operator/identity_attach_KL_sparse_reg-inl.h
index 413bac9..2307914 100644
--- a/src/operator/identity_attach_KL_sparse_reg-inl.h
+++ b/src/operator/identity_attach_KL_sparse_reg-inl.h
@@ -1,177 +1,195 @@
-/*!
- * Copyright (c) 2015 by Contributors
- * \file sparse_reg-inl.h
- * \brief
-*/
-#ifndef MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
-#define MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
-#include <dmlc/logging.h>
-#include <mxnet/operator.h>
-#include <cstring>
-#include <map>
-#include <string>
-#include <vector>
-#include <utility>
-#include "./mshadow_op.h"
-#include "./operator_common.h"
-
-namespace mxnet {
-namespace op {
-
-namespace sparsereg {
-enum IdentityAttachKLSparseRegOpInputs {kData};
-enum IdentityAttachKLSparseRegOpOutputs {kOut};
-enum IdentityAttachKLSparseRegOpAuxiliary {kMovingAvg};
-enum IdentityAttachKLSparseRegBackResource {kTempSpace};
-}  // namespace sparsereg
-
-struct IdentityAttachKLSparseRegParam : public dmlc::Parameter<IdentityAttachKLSparseRegParam> {
-  float penalty;
-  float sparseness_target;
-  float momentum;
-  DMLC_DECLARE_PARAMETER(IdentityAttachKLSparseRegParam) {
-    DMLC_DECLARE_FIELD(sparseness_target).set_default(0.1)
-    .set_range(0, 1)
-    .describe("The sparseness target");
-    DMLC_DECLARE_FIELD(penalty).set_default(0.001)
-    .describe("The tradeoff parameter for the sparseness penalty");
-    DMLC_DECLARE_FIELD(momentum).set_default(0.9)
-    .set_range(0, 1)
-    .describe("The momentum for running average");
-  }
-};  // struct IdentityAttachKLSparseRegParam
-
-// This op regularizes the output of a sigmoid activation function.
-// In forward, it simply copies the input.
-// In backward, it attaches sparseness penalty to the gradient.
-// The regularization is based on the KL divergence of mean activation and target.
-// More details: P11 of https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
-// Please make sure that it is only paired with sigmoid activation, otherwise NaN may occur.
-template<typename xpu>
-class IdentityAttachKLSparseRegOp : public Operator {
- public:
-  explicit IdentityAttachKLSparseRegOp(IdentityAttachKLSparseRegParam param) {
-    this->param_ = param;
-  }
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(in_data.size(), 1U);
-    CHECK_EQ(out_data.size(), 1U);
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    Tensor<xpu, 2> data = in_data[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
-    Tensor<xpu, 2> out = out_data[sparsereg::kOut].FlatTo2D<xpu, real_t>(s);
-    Assign(out, req[sparsereg::kData], F<mshadow_op::identity>(data));
-  }
-
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_args) {
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    Tensor<xpu, 2> grad_in = in_grad[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
-    Tensor<xpu, 2> data_in = in_data[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
-    Tensor<xpu, 2> grad_out = out_grad[sparsereg::kOut].FlatTo2D<xpu, real_t>(s);
-    Tensor<xpu, 1> moving_avg = aux_args[sparsereg::kMovingAvg].get<xpu, 1, real_t>(s);
-    Tensor<xpu, 1> avg = ctx.requested[sparsereg::kTempSpace].get_space<xpu>(
-        mshadow::Shape1(moving_avg.shape_[0]), s);
-    avg = sumall_except_dim<1>(data_in);
-    avg /= data_in.shape_[0];
-    moving_avg = param_.momentum * moving_avg + (1 - param_.momentum) * avg;
-    Assign(grad_in, req[sparsereg::kData], grad_out + param_.penalty *
-      (-param_.sparseness_target / broadcast<1>(moving_avg, data_in.shape_) +
-      ((1 - param_.sparseness_target) / (1 - broadcast<1>(moving_avg, data_in.shape_)))));
-  }
-
- private:
-  IdentityAttachKLSparseRegParam param_;
-};  // class IdentityAttachKLSparseRegOp
-
-template<typename xpu>
-Operator *CreateOp(IdentityAttachKLSparseRegParam param);
-
-#if DMLC_USE_CXX11
-class IdentityAttachKLSparseRegProp : public OperatorProperty {
- public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
-    param_.Init(kwargs);
-  }
-
-  std::map<std::string, std::string> GetParams() const override {
-    return param_.__DICT__();
-  }
-
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
-    using namespace mshadow;
-    CHECK_EQ(in_shape->size(), 1U);
-    const TShape &dshape = in_shape->at(sparsereg::kData);
-    if (dshape.ndim() == 0) return false;
-    out_shape->clear();
-    out_shape->push_back(dshape);
-    aux_shape->clear();
-    aux_shape->push_back(Shape1(dshape[1]));
-    return true;
-  }
-
-  OperatorProperty* Copy() const override {
-    auto ptr = new IdentityAttachKLSparseRegProp();
-    ptr->param_ = param_;
-    return ptr;
-  }
-
-  std::string TypeString() const override {
-    return "IdentityAttachKLSparseReg";
-  }
-
-  std::vector<int> DeclareBackwardDependency(
-      const std::vector<int> &out_grad,
-      const std::vector<int> &in_data,
-      const std::vector<int> &out_data) const override {
-    return {out_grad[sparsereg::kOut], in_data[sparsereg::kData]};
-  }
-
-  std::vector<std::pair<int, void*> > ForwardInplaceOption(
-      const std::vector<int> &in_data,
-      const std::vector<void*> &out_data) const override {
-    return {{in_data[sparsereg::kData], out_data[sparsereg::kOut]}};
-  }
-
-  std::vector<std::pair<int, void*> > BackwardInplaceOption(
-      const std::vector<int> &out_grad,
-      const std::vector<int> &in_data,
-      const std::vector<int> &out_data,
-      const std::vector<void*> &in_grad) const override {
-    return { {out_grad[sparsereg::kOut], in_grad[sparsereg::kData]} };
-  }
-
-  std::vector<std::string> ListAuxiliaryStates() const override {
-    return {"moving_avg"};
-  }
-
-  std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
-    return {ResourceRequest::kTempSpace};
-  }
-
-  Operator* CreateOperator(Context ctx) const override;
-
- private:
-  IdentityAttachKLSparseRegParam param_;
-};  // class IdentityAttachKLSparseRegProperty
-
-#endif  // DMLC_USE_CXX11
-}  // namespace op
-}  // namespace mxnet
-
-#endif  // MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file sparse_reg-inl.h
+ * \brief
+*/
+#ifndef MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
+#define MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
+#include <dmlc/logging.h>
+#include <mxnet/operator.h>
+#include <cstring>
+#include <map>
+#include <string>
+#include <vector>
+#include <utility>
+#include "./mshadow_op.h"
+#include "./operator_common.h"
+
+namespace mxnet {
+namespace op {
+
+namespace sparsereg {
+enum IdentityAttachKLSparseRegOpInputs {kData};
+enum IdentityAttachKLSparseRegOpOutputs {kOut};
+enum IdentityAttachKLSparseRegOpAuxiliary {kMovingAvg};
+enum IdentityAttachKLSparseRegBackResource {kTempSpace};
+}  // namespace sparsereg
+
+struct IdentityAttachKLSparseRegParam : public dmlc::Parameter<IdentityAttachKLSparseRegParam> {
+  float penalty;
+  float sparseness_target;
+  float momentum;
+  DMLC_DECLARE_PARAMETER(IdentityAttachKLSparseRegParam) {
+    DMLC_DECLARE_FIELD(sparseness_target).set_default(0.1)
+    .set_range(0, 1)
+    .describe("The sparseness target");
+    DMLC_DECLARE_FIELD(penalty).set_default(0.001)
+    .describe("The tradeoff parameter for the sparseness penalty");
+    DMLC_DECLARE_FIELD(momentum).set_default(0.9)
+    .set_range(0, 1)
+    .describe("The momentum for running average");
+  }
+};  // struct IdentityAttachKLSparseRegParam
+
+// This op regularizes the output of a sigmoid activation function.
+// In forward, it simply copies the input.
+// In backward, it attaches sparseness penalty to the gradient.
+// The regularization is based on the KL divergence of mean activation and target.
+// More details: P11 of https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
+// Please make sure that it is only paired with sigmoid activation, otherwise NaN may occur.
+template<typename xpu>
+class IdentityAttachKLSparseRegOp : public Operator {
+ public:
+  explicit IdentityAttachKLSparseRegOp(IdentityAttachKLSparseRegParam param) {
+    this->param_ = param;
+  }
+  virtual void Forward(const OpContext &ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &out_data,
+                       const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(in_data.size(), 1U);
+    CHECK_EQ(out_data.size(), 1U);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 2> data = in_data[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
+    Tensor<xpu, 2> out = out_data[sparsereg::kOut].FlatTo2D<xpu, real_t>(s);
+    Assign(out, req[sparsereg::kData], F<mshadow_op::identity>(data));
+  }
+
+  virtual void Backward(const OpContext &ctx,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &in_grad,
+                        const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 2> grad_in = in_grad[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
+    Tensor<xpu, 2> data_in = in_data[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
+    Tensor<xpu, 2> grad_out = out_grad[sparsereg::kOut].FlatTo2D<xpu, real_t>(s);
+    Tensor<xpu, 1> moving_avg = aux_args[sparsereg::kMovingAvg].get<xpu, 1, real_t>(s);
+    Tensor<xpu, 1> avg = ctx.requested[sparsereg::kTempSpace].get_space<xpu>(
+        mshadow::Shape1(moving_avg.shape_[0]), s);
+    avg = sumall_except_dim<1>(data_in);
+    avg /= data_in.shape_[0];
+    moving_avg = param_.momentum * moving_avg + (1 - param_.momentum) * avg;
+    Assign(grad_in, req[sparsereg::kData], grad_out + param_.penalty *
+      (-param_.sparseness_target / broadcast<1>(moving_avg, data_in.shape_) +
+      ((1 - param_.sparseness_target) / (1 - broadcast<1>(moving_avg, data_in.shape_)))));
+  }
+
+ private:
+  IdentityAttachKLSparseRegParam param_;
+};  // class IdentityAttachKLSparseRegOp
+
+template<typename xpu>
+Operator *CreateOp(IdentityAttachKLSparseRegParam param);
+
+#if DMLC_USE_CXX11
+class IdentityAttachKLSparseRegProp : public OperatorProperty {
+ public:
+  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
+    param_.Init(kwargs);
+  }
+
+  std::map<std::string, std::string> GetParams() const override {
+    return param_.__DICT__();
+  }
+
+  bool InferShape(std::vector<TShape> *in_shape,
+                  std::vector<TShape> *out_shape,
+                  std::vector<TShape> *aux_shape) const override {
+    using namespace mshadow;
+    CHECK_EQ(in_shape->size(), 1U);
+    const TShape &dshape = in_shape->at(sparsereg::kData);
+    if (dshape.ndim() == 0) return false;
+    out_shape->clear();
+    out_shape->push_back(dshape);
+    aux_shape->clear();
+    aux_shape->push_back(Shape1(dshape[1]));
+    return true;
+  }
+
+  OperatorProperty* Copy() const override {
+    auto ptr = new IdentityAttachKLSparseRegProp();
+    ptr->param_ = param_;
+    return ptr;
+  }
+
+  std::string TypeString() const override {
+    return "IdentityAttachKLSparseReg";
+  }
+
+  std::vector<int> DeclareBackwardDependency(
+      const std::vector<int> &out_grad,
+      const std::vector<int> &in_data,
+      const std::vector<int> &out_data) const override {
+    return {out_grad[sparsereg::kOut], in_data[sparsereg::kData]};
+  }
+
+  std::vector<std::pair<int, void*> > ForwardInplaceOption(
+      const std::vector<int> &in_data,
+      const std::vector<void*> &out_data) const override {
+    return {{in_data[sparsereg::kData], out_data[sparsereg::kOut]}};
+  }
+
+  std::vector<std::pair<int, void*> > BackwardInplaceOption(
+      const std::vector<int> &out_grad,
+      const std::vector<int> &in_data,
+      const std::vector<int> &out_data,
+      const std::vector<void*> &in_grad) const override {
+    return { {out_grad[sparsereg::kOut], in_grad[sparsereg::kData]} };
+  }
+
+  std::vector<std::string> ListAuxiliaryStates() const override {
+    return {"moving_avg"};
+  }
+
+  std::vector<ResourceRequest> BackwardResource(
+      const std::vector<TShape> &in_shape) const override {
+    return {ResourceRequest::kTempSpace};
+  }
+
+  Operator* CreateOperator(Context ctx) const override;
+
+ private:
+  IdentityAttachKLSparseRegParam param_;
+};  // class IdentityAttachKLSparseRegProperty
+
+#endif  // DMLC_USE_CXX11
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
diff --git a/src/operator/identity_attach_KL_sparse_reg.cc b/src/operator/identity_attach_KL_sparse_reg.cc
index 51e6772..5e77677 100644
--- a/src/operator/identity_attach_KL_sparse_reg.cc
+++ b/src/operator/identity_attach_KL_sparse_reg.cc
@@ -1,37 +1,55 @@
-/*!
- * Copyright (c) 2015 by Contributors
- * \file identity_attach_KL_sparse_reg.cc
- * \brief\
-*/
-#include "./identity_attach_KL_sparse_reg-inl.h"
-#include <nnvm/op_attr_types.h>
-
-namespace mxnet {
-namespace op {
-template<>
-Operator *CreateOp<cpu>(IdentityAttachKLSparseRegParam param) {
-  return new IdentityAttachKLSparseRegOp<cpu>(param);
-}
-
-Operator *IdentityAttachKLSparseRegProp::CreateOperator(Context ctx) const {
-  DO_BIND_DISPATCH(CreateOp, param_);
-}
-
-DMLC_REGISTER_PARAMETER(IdentityAttachKLSparseRegParam);
-
-MXNET_REGISTER_OP_PROPERTY(IdentityAttachKLSparseReg, IdentityAttachKLSparseRegProp)
-.describe("Apply a sparse regularization to the output a sigmoid activation function.")
-.add_argument("data", "NDArray-or-Symbol", "Input data.")
-.add_arguments(IdentityAttachKLSparseRegParam::__FIELDS__());
-
-NNVM_REGISTER_OP(IdentityAttachKLSparseReg)
-.set_attr<nnvm::FSetInputVarAttrOnCompose>("FSetInputVarAttrOnCompose",
-    [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) {
-      if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return;
-      if (index == 1) {
-        var->attrs.dict["__init__"] = "[\"zero\", {}]";
-      }
-    });
-}  // namespace op
-}  // namespace mxnet
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file identity_attach_KL_sparse_reg.cc
+ * \brief\
+*/
+#include "./identity_attach_KL_sparse_reg-inl.h"
+#include <nnvm/op_attr_types.h>
+
+namespace mxnet {
+namespace op {
+template<>
+Operator *CreateOp<cpu>(IdentityAttachKLSparseRegParam param) {
+  return new IdentityAttachKLSparseRegOp<cpu>(param);
+}
+
+Operator *IdentityAttachKLSparseRegProp::CreateOperator(Context ctx) const {
+  DO_BIND_DISPATCH(CreateOp, param_);
+}
+
+DMLC_REGISTER_PARAMETER(IdentityAttachKLSparseRegParam);
+
+MXNET_REGISTER_OP_PROPERTY(IdentityAttachKLSparseReg, IdentityAttachKLSparseRegProp)
+.describe("Apply a sparse regularization to the output a sigmoid activation function.")
+.add_argument("data", "NDArray-or-Symbol", "Input data.")
+.add_arguments(IdentityAttachKLSparseRegParam::__FIELDS__());
+
+NNVM_REGISTER_OP(IdentityAttachKLSparseReg)
+.set_attr<nnvm::FSetInputVarAttrOnCompose>("FSetInputVarAttrOnCompose",
+    [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) {
+      if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return;
+      if (index == 1) {
+        var->attrs.dict["__init__"] = "[\"zero\", {}]";
+      }
+    });
+}  // namespace op
+}  // namespace mxnet
+
diff --git a/src/operator/identity_attach_KL_sparse_reg.cu b/src/operator/identity_attach_KL_sparse_reg.cu
index 6188fb9..0a11fb1 100644
--- a/src/operator/identity_attach_KL_sparse_reg.cu
+++ b/src/operator/identity_attach_KL_sparse_reg.cu
@@ -1,16 +1,34 @@
-/*!
- * Copyright (c) 2015 by Contributors
- * \file identity_attach_KL_sparse_reg.cu
- * \brief
-*/
-#include "./identity_attach_KL_sparse_reg-inl.h"
-
-namespace mxnet {
-namespace op {
-template<>
-Operator *CreateOp<gpu>(IdentityAttachKLSparseRegParam param) {
-  return new IdentityAttachKLSparseRegOp<gpu>(param);
-}
-
-}  // namespace op
-}  // namespace mxnet
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file identity_attach_KL_sparse_reg.cu
+ * \brief
+*/
+#include "./identity_attach_KL_sparse_reg-inl.h"
+
+namespace mxnet {
+namespace op {
+template<>
+Operator *CreateOp<gpu>(IdentityAttachKLSparseRegParam param) {
+  return new IdentityAttachKLSparseRegOp<gpu>(param);
+}
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/instance_norm-inl.h b/src/operator/instance_norm-inl.h
index 4a4f515..6e78f76 100644
--- a/src/operator/instance_norm-inl.h
+++ b/src/operator/instance_norm-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file instance_norm-inl.h
  * \brief Reproducing paper Instance Normalization: The Missing Ingredient for
  * Fast Stylization, D. Ulyanov, A. Vedaldi, V. Lempitsky, 2016
diff --git a/src/operator/instance_norm.cc b/src/operator/instance_norm.cc
index cc2bd6b..0666b4b 100644
--- a/src/operator/instance_norm.cc
+++ b/src/operator/instance_norm.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file instance_norm.cc
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/instance_norm.cu b/src/operator/instance_norm.cu
index 0960084..9f8cbea 100644
--- a/src/operator/instance_norm.cu
+++ b/src/operator/instance_norm.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file instance_norm.cu
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/l2_normalization-inl.h b/src/operator/l2_normalization-inl.h
index a49c836..c1f17ac 100644
--- a/src/operator/l2_normalization-inl.h
+++ b/src/operator/l2_normalization-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file l2_normalization_op-inl.h
  * \brief instance l2 Normalization op
 */
diff --git a/src/operator/l2_normalization.cc b/src/operator/l2_normalization.cc
index 8a4112d..6995a0d 100644
--- a/src/operator/l2_normalization.cc
+++ b/src/operator/l2_normalization.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file l2_normalization.cc
  * \brief l2 normalization operator
 */
diff --git a/src/operator/l2_normalization.cu b/src/operator/l2_normalization.cu
index d7bab35..ae76278 100644
--- a/src/operator/l2_normalization.cu
+++ b/src/operator/l2_normalization.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file l2_normalization.cu
  * \brief l2 normalization operator
 */
diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
index b0a5c0e..828930a 100644
--- a/src/operator/leaky_relu-inl.h
+++ b/src/operator/leaky_relu-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file leaky_relu-inl.h
  * \brief leaky relu family operator
  * \author Bing Xu
diff --git a/src/operator/leaky_relu.cc b/src/operator/leaky_relu.cc
index 25fe17e..da58bd9 100644
--- a/src/operator/leaky_relu.cc
+++ b/src/operator/leaky_relu.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file leaky_relu.cc
  * \brief
  * \author Bing Xu
@@ -24,7 +42,7 @@ DMLC_REGISTER_PARAMETER(LeakyReLUParam);
 MXNET_REGISTER_OP_PROPERTY(LeakyReLU, LeakyReLUProp)
 .describe(R"code(Applies Leaky rectified linear unit activation element-wise to the input.
 
-Leaky ReLUs attempt to fix the "dying ReLU" problem by allowing a small `slope` 
+Leaky ReLUs attempt to fix the "dying ReLU" problem by allowing a small `slope`
 when the input is negative and has a slope of one when input is positive.
 
 The following modified ReLU Activation functions are supported:
diff --git a/src/operator/leaky_relu.cu b/src/operator/leaky_relu.cu
index c9af119..b9b3a7b 100644
--- a/src/operator/leaky_relu.cu
+++ b/src/operator/leaky_relu.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file leaky_relu.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/loss_binary_op-inl.h b/src/operator/loss_binary_op-inl.h
index a61cee7..8add827 100644
--- a/src/operator/loss_binary_op-inl.h
+++ b/src/operator/loss_binary_op-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file loss_binary_op-inl.h
  * \brief Loss functions
  */
diff --git a/src/operator/loss_binary_op.cc b/src/operator/loss_binary_op.cc
index 43bf694..d0a7794 100644
--- a/src/operator/loss_binary_op.cc
+++ b/src/operator/loss_binary_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file loss_binary_op.cc
  * \brief loss function that takes a data and label
 */
diff --git a/src/operator/loss_binary_op.cu b/src/operator/loss_binary_op.cu
index 66700e7..8694b9f 100644
--- a/src/operator/loss_binary_op.cu
+++ b/src/operator/loss_binary_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file loss_binary_op.cu
  * \brief loss function that takes a data and label
 */
diff --git a/src/operator/lrn-inl.h b/src/operator/lrn-inl.h
index 66be9ac..a320a26 100644
--- a/src/operator/lrn-inl.h
+++ b/src/operator/lrn-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file lrn-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/lrn.cc b/src/operator/lrn.cc
index ac4a309..46f4fca 100644
--- a/src/operator/lrn.cc
+++ b/src/operator/lrn.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file lrn.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/lrn.cu b/src/operator/lrn.cu
index 681de80..702f4b2 100644
--- a/src/operator/lrn.cu
+++ b/src/operator/lrn.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file lrn.cu
  * \brief
  * \author Bing Xu
diff --git a/src/operator/make_loss-inl.h b/src/operator/make_loss-inl.h
index 65af627..3f4a993 100644
--- a/src/operator/make_loss-inl.h
+++ b/src/operator/make_loss-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file make_loss-inl.h
  * \brief special layer for propagating loss
 */
diff --git a/src/operator/make_loss.cc b/src/operator/make_loss.cc
index 1be93de..748357d 100644
--- a/src/operator/make_loss.cc
+++ b/src/operator/make_loss.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file make_loss.cc
  * \brief special layer for propagating loss
 */
diff --git a/src/operator/make_loss.cu b/src/operator/make_loss.cu
index 5f5fad6..7f50850 100644
--- a/src/operator/make_loss.cu
+++ b/src/operator/make_loss.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file make_loss.cu
  * \brief special layer for propagating loss
 */
diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
index 8d86704..f7815d2 100644
--- a/src/operator/mshadow_op.h
+++ b/src/operator/mshadow_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file mshadow_op.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
index 9b5dcfe..0af7d02 100644
--- a/src/operator/mxnet_op.h
+++ b/src/operator/mxnet_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file mxnet_op.h
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/nn/im2col.cuh b/src/operator/nn/im2col.cuh
index 786fd22..edd5b0d 100644
--- a/src/operator/nn/im2col.cuh
+++ b/src/operator/nn/im2col.cuh
@@ -1,34 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  *
  * COPYRIGHT
- * 
+ *
  * All contributions by the University of California:
  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  * All rights reserved.
- * 
+ *
  * All other contributions:
  * Copyright (c) 2014-2017, the respective contributors
  * All rights reserved.
- * 
+ *
  * Caffe uses a shared copyright model: each contributor holds copyright over
  * their contributions to Caffe. The project versioning records all such
  * contribution and copyright details. If a contributor wants to further mark
  * their specific copyright on a particular contribution, they should indicate
  * their copyright solely in the commit message of the change when it is
  * committed.
- * 
+ *
  * LICENSE
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met: 
- * 
+ * modification, are permitted provided that the following conditions are met:
+ *
  * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer. 
+ * list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution. 
- * 
+ * and/or other materials provided with the distribution.
+ *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -39,9 +58,9 @@
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
+ *
  * CONTRIBUTION AGREEMENT
- * 
+ *
  * By contributing to the BVLC/caffe repository through pull-request, comment,
  * or otherwise, the contributor releases their content to the
  * license and copyright terms herein.
@@ -304,7 +323,7 @@ inline void im2col(mshadow::Stream<gpu>* s,
         <<<cuda_get_num_blocks(num_kernels), mshadow::cuda::kBaseThreadNum,
            0, mshadow::Stream<gpu>::GetStream(s)>>>(
         num_kernels, data_im, im_shape[2], im_shape[3], kernel_shape[0], kernel_shape[1],
-        pad[0], pad[1], stride[0], stride[1], dilation[0], dilation[1], 
+        pad[0], pad[1], stride[0], stride[1], dilation[0], dilation[1],
         col_shape[1], col_shape[2], data_col);
     break;
   case 3:
diff --git a/src/operator/nn/im2col.h b/src/operator/nn/im2col.h
index ce4d9e3..621b245 100644
--- a/src/operator/nn/im2col.h
+++ b/src/operator/nn/im2col.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  *
@@ -48,7 +67,6 @@
  *
  ***************** END Caffe Copyright Notice and Disclaimer ********************
  *
- * Copyright (c) 2017 by Contributors
  * \file im2col.h
  * \brief Function definitions of converting an image to
  * column matrix based on kernel, padding, and dilation.
diff --git a/src/operator/nn/pool.cuh b/src/operator/nn/pool.cuh
index 54fd346..0e9cff0 100644
--- a/src/operator/nn/pool.cuh
+++ b/src/operator/nn/pool.cuh
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  *
diff --git a/src/operator/nn/pool.h b/src/operator/nn/pool.h
index 79accb5..3bac865 100644
--- a/src/operator/nn/pool.h
+++ b/src/operator/nn/pool.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  *
@@ -48,7 +67,6 @@
  *
  ***************** END Caffe Copyright Notice and Disclaimer ********************
  *
- * Copyright (c) 2017 by Contributors
  * \file pool.h
  * \brief Function definitions of pooling 1/2/3-D images.
  * We adopted looping 2-D image pixels from Caffe and extended it to 1-D and 3-D cases.
diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h
index 749ad73..e1150b1 100644
--- a/src/operator/nn/softmax-inl.h
+++ b/src/operator/nn/softmax-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file softmax-inl.h
  * \brief
 */
diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc
index f1f2078..58c1a05 100644
--- a/src/operator/nn/softmax.cc
+++ b/src/operator/nn/softmax.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file softmax.cc
  * \brief CPU Implementation of softmax
  */
diff --git a/src/operator/nn/softmax.cu b/src/operator/nn/softmax.cu
index 570f5bf..d5a843d 100644
--- a/src/operator/nn/softmax.cu
+++ b/src/operator/nn/softmax.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file softmax.cc
  * \brief CPU Implementation of softmax
  */
diff --git a/src/operator/nnpack/nnpack_convolution-inl.h b/src/operator/nnpack/nnpack_convolution-inl.h
index 03f4a0b..4a13426 100644
--- a/src/operator/nnpack/nnpack_convolution-inl.h
+++ b/src/operator/nnpack/nnpack_convolution-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file nnpack_convolution-inl.h
  * \brief
  * \author Carwin
diff --git a/src/operator/nnpack/nnpack_fully_connected-inl.h b/src/operator/nnpack/nnpack_fully_connected-inl.h
index 2d87db1..f85ddd8 100644
--- a/src/operator/nnpack/nnpack_fully_connected-inl.h
+++ b/src/operator/nnpack/nnpack_fully_connected-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file nnpack_fully_connected-inl.h
  * \brief
  * \author Wei Wu
diff --git a/src/operator/nnpack/nnpack_pooling-inl.h b/src/operator/nnpack/nnpack_pooling-inl.h
index 0df070d..968ead1 100644
--- a/src/operator/nnpack/nnpack_pooling-inl.h
+++ b/src/operator/nnpack/nnpack_pooling-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file nnpack_pooling-inl.h
  * \brief
  * \author Wei Wu
diff --git a/src/operator/nnpack/nnpack_util.cc b/src/operator/nnpack/nnpack_util.cc
index 8004bb1..b873b59 100644
--- a/src/operator/nnpack/nnpack_util.cc
+++ b/src/operator/nnpack/nnpack_util.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file nnpack_util.cc
  * \brief
  * \author Wei Wu
diff --git a/src/operator/nnpack/nnpack_util.h b/src/operator/nnpack/nnpack_util.h
index 280c6ff..cde1880 100644
--- a/src/operator/nnpack/nnpack_util.h
+++ b/src/operator/nnpack/nnpack_util.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file nnpack_util.h
  * \brief
  * \author Carwin
diff --git a/src/operator/operator.cc b/src/operator/operator.cc
index e476c58..9117c1c 100644
--- a/src/operator/operator.cc
+++ b/src/operator/operator.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file operator.cc
  * \brief operator module of mxnet
  */
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index a43d092..2d46bd3 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file  operator_common.h
  * \brief common internal header of most operators
  *   this header includes utility functions operator can use
diff --git a/src/operator/operator_util.cc b/src/operator/operator_util.cc
index 84a19d6..25fa209 100644
--- a/src/operator/operator_util.cc
+++ b/src/operator/operator_util.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file operator_util.cc
  *  Implementation of operator util.
  */
diff --git a/src/operator/optimizer_op-inl.h b/src/operator/optimizer_op-inl.h
index 9f49593..70759b1 100644
--- a/src/operator/optimizer_op-inl.h
+++ b/src/operator/optimizer_op-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file optimizer_op-inl.h
  * \brief Optimizer operators
  * \author Junyuan Xie
diff --git a/src/operator/optimizer_op.cc b/src/operator/optimizer_op.cc
index 3fdb9c2..b26c333 100644
--- a/src/operator/optimizer_op.cc
+++ b/src/operator/optimizer_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file optimizer_op.cc
  * \brief Optimizer operators
  * \author Junyuan Xie
diff --git a/src/operator/optimizer_op.cu b/src/operator/optimizer_op.cu
index a30584d..0e74e30 100644
--- a/src/operator/optimizer_op.cu
+++ b/src/operator/optimizer_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file optimizer_op.cu
  * \brief Optimizer operators
  * \author Junyuan Xie
diff --git a/src/operator/pad-inl.h b/src/operator/pad-inl.h
index e6e6b7b..80f9e0b 100644
--- a/src/operator/pad-inl.h
+++ b/src/operator/pad-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file pad-inl.h
  * \brief
  * \author Sebastian Bodenstien
diff --git a/src/operator/pad.cc b/src/operator/pad.cc
index 77177b5..468629a 100644
--- a/src/operator/pad.cc
+++ b/src/operator/pad.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file pad.cc
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/pad.cu b/src/operator/pad.cu
index bf7265c..98220b6 100644
--- a/src/operator/pad.cu
+++ b/src/operator/pad.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file pad.cu
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/pooling-inl.h b/src/operator/pooling-inl.h
index 8156c37..fbc6981 100644
--- a/src/operator/pooling-inl.h
+++ b/src/operator/pooling-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file pooling-inl.h
  * \brief
  * \author Bing Xu, Jun Wu
diff --git a/src/operator/pooling.cc b/src/operator/pooling.cc
index 72b1703..51dce87 100644
--- a/src/operator/pooling.cc
+++ b/src/operator/pooling.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file pooling.cc
  * \brief
  * \author Bing Xu, Jun Wu
diff --git a/src/operator/pooling.cu b/src/operator/pooling.cu
index bc7716b..950f099 100644
--- a/src/operator/pooling.cu
+++ b/src/operator/pooling.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file pooling.cu
  * \brief
  * \author Bing Xu, Jun Wu
diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h
index 0b9f7ad..e541298 100644
--- a/src/operator/pooling_v1-inl.h
+++ b/src/operator/pooling_v1-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file pooling_v1-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/pooling_v1.cc b/src/operator/pooling_v1.cc
index 20acff7..40de745 100644
--- a/src/operator/pooling_v1.cc
+++ b/src/operator/pooling_v1.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file pooling_v1.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/pooling_v1.cu b/src/operator/pooling_v1.cu
index 99aebbc..4db22c1 100644
--- a/src/operator/pooling_v1.cu
+++ b/src/operator/pooling_v1.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file pooling_v1.cu
  * \brief
  * \author Bing Xu
diff --git a/src/operator/random/multisample_op.cc b/src/operator/random/multisample_op.cc
index 303d1d2..f1264e5 100644
--- a/src/operator/random/multisample_op.cc
+++ b/src/operator/random/multisample_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file multisample_op.cc
  * \brief CPU-implementation of multi-sampling operators
  */
diff --git a/src/operator/random/multisample_op.h b/src/operator/random/multisample_op.h
index 0b5b4ce..748b3ba 100644
--- a/src/operator/random/multisample_op.h
+++ b/src/operator/random/multisample_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file sampling_op.h
  * \brief Function definitions of operators for sampling from multiple distributions
  */
diff --git a/src/operator/random/sample_multinomial_op.cc b/src/operator/random/sample_multinomial_op.cc
index 9e6dbe9..b358b3b 100644
--- a/src/operator/random/sample_multinomial_op.cc
+++ b/src/operator/random/sample_multinomial_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file sample_multinomial_op.h
  * \brief Operator for sampling from multinomial distributions
  */
diff --git a/src/operator/random/sample_multinomial_op.cu b/src/operator/random/sample_multinomial_op.cu
index 434202d..c2bc99b 100644
--- a/src/operator/random/sample_multinomial_op.cu
+++ b/src/operator/random/sample_multinomial_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file sample_multinomial_op.h
  * \brief Operator for sampling from multinomial distributions
  */
diff --git a/src/operator/random/sample_multinomial_op.h b/src/operator/random/sample_multinomial_op.h
index ab73ebf..2b01632 100644
--- a/src/operator/random/sample_multinomial_op.h
+++ b/src/operator/random/sample_multinomial_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file sample_multinomial_op.h
  * \brief Operator for sampling from multinomial distributions
  */
diff --git a/src/operator/random/sample_op.cc b/src/operator/random/sample_op.cc
index 1b3c293..8d87d2b 100644
--- a/src/operator/random/sample_op.cc
+++ b/src/operator/random/sample_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file sample_op.cc
  * \brief CPU Implementation of sample op
  */
@@ -106,7 +124,7 @@ MXNET_OPERATOR_REGISTER_SAMPLE(random_negative_binomial, SampleNegBinomialParam)
 .add_alias("_sample_negbinomial")
 .describe(R"code(Draw random samples from a negative binomial distribution.
 
-Samples are distributed according to a negative binomial distribution parametrized by 
+Samples are distributed according to a negative binomial distribution parametrized by
 *k* (limit of unsuccessful experiments) and *p* (failure probability in each experiment).
 Samples will always be returned as a floating point data type.
 
@@ -121,8 +139,8 @@ MXNET_OPERATOR_REGISTER_SAMPLE(random_generalized_negative_binomial, SampleGenNe
 .add_alias("_sample_gennegbinomial")
 .describe(R"code(Draw random samples from a generalized negative binomial distribution.
 
-Samples are distributed according to a generalized negative binomial distribution parametrized by 
-*mu* (mean) and *alpha* (dispersion). *alpha* is defined as *1/k* where *k* is the failure limit of the 
+Samples are distributed according to a generalized negative binomial distribution parametrized by
+*mu* (mean) and *alpha* (dispersion). *alpha* is defined as *1/k* where *k* is the failure limit of the
 number of unsuccessful experiments (generalized to real numbers).
 Samples will always be returned as a floating point data type.
 
diff --git a/src/operator/random/sample_op.cu b/src/operator/random/sample_op.cu
index 62c8a73..0d4b2e5 100644
--- a/src/operator/random/sample_op.cu
+++ b/src/operator/random/sample_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file sample_op.cu
  * \brief GPU Implementation of sample op
  */
diff --git a/src/operator/random/sample_op.h b/src/operator/random/sample_op.h
index 79655b3..a1a6a23 100644
--- a/src/operator/random/sample_op.h
+++ b/src/operator/random/sample_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file sample_op.h
  * \brief Elementary sampling operators
  */
diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h
index 7f8b294..0de312c 100644
--- a/src/operator/regression_output-inl.h
+++ b/src/operator/regression_output-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file regression_ouput-inl.h
  * \brief Regression output operator.
  */
diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc
index fc71a99..d19f336 100644
--- a/src/operator/regression_output.cc
+++ b/src/operator/regression_output.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file regression_output.cc
  * \brief regression output operator
 */
diff --git a/src/operator/regression_output.cu b/src/operator/regression_output.cu
index 18e7a1f..64dcef3 100644
--- a/src/operator/regression_output.cu
+++ b/src/operator/regression_output.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file regression_output.cu
  * \brief regression output operator
 */
diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h
index bc5f3d7..4f09ebe 100644
--- a/src/operator/rnn-inl.h
+++ b/src/operator/rnn-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file rnn-inl.h
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc
index f19c3bb..4c7954f 100644
--- a/src/operator/rnn.cc
+++ b/src/operator/rnn.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file rnn.cc
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/rnn.cu b/src/operator/rnn.cu
index 4e3998e..0daee32 100644
--- a/src/operator/rnn.cu
+++ b/src/operator/rnn.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file rnn.cu
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/roi_pooling-inl.h b/src/operator/roi_pooling-inl.h
index 7eb14b7..05a0ae4 100644
--- a/src/operator/roi_pooling-inl.h
+++ b/src/operator/roi_pooling-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file roi_pooling-inl.h
  * \brief roi pooling operator and symbol
  * \author Kye-Hyeon Kim, Jian Guo
diff --git a/src/operator/roi_pooling.cc b/src/operator/roi_pooling.cc
index 0faca1e..9af00bb 100644
--- a/src/operator/roi_pooling.cc
+++ b/src/operator/roi_pooling.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file roi_pooling.cc
  * \brief roi pooling operator
  * \author Ross Girshick, Kye-Hyeon Kim, Jian Guo
diff --git a/src/operator/roi_pooling.cu b/src/operator/roi_pooling.cu
index 3d74468..80d38e4 100644
--- a/src/operator/roi_pooling.cu
+++ b/src/operator/roi_pooling.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file roi_pooling.cu
  * \brief roi pooling operator
  * \author Ross Girshick, Kye-Hyeon Kim, Jian Guo
diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h
index c2acbf1..0551ee9 100644
--- a/src/operator/sequence_last-inl.h
+++ b/src/operator/sequence_last-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file sequence_last-inl.h
  * \brief
  * \author Sebastian Bodenstien
diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc
index 8a50ff7..6c04bdd 100644
--- a/src/operator/sequence_last.cc
+++ b/src/operator/sequence_last.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file sequence_last.cc
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/sequence_last.cu b/src/operator/sequence_last.cu
index 329c2c7..9215b24 100644
--- a/src/operator/sequence_last.cu
+++ b/src/operator/sequence_last.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file sequence_last.cu
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/sequence_mask-inl.h b/src/operator/sequence_mask-inl.h
index 69c9874..dec1f2a 100644
--- a/src/operator/sequence_mask-inl.h
+++ b/src/operator/sequence_mask-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file wl_sequence_mask-inl.h
  * \brief
  * \author Sebastian Bodenstien
diff --git a/src/operator/sequence_mask.cc b/src/operator/sequence_mask.cc
index 0ac782e..ed90f3e 100644
--- a/src/operator/sequence_mask.cc
+++ b/src/operator/sequence_mask.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file sequence_mask.cc
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/sequence_mask.cu b/src/operator/sequence_mask.cu
index 41c0894..d370ff3 100644
--- a/src/operator/sequence_mask.cu
+++ b/src/operator/sequence_mask.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file sequence_mask.cu
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/sequence_op_common.h b/src/operator/sequence_op_common.h
index a292492..9e58431 100644
--- a/src/operator/sequence_op_common.h
+++ b/src/operator/sequence_op_common.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file sequence_op_common.h
  * \brief common function used for sequence layers
  * \author Sebastian Bodenstein
diff --git a/src/operator/sequence_reverse-inl.h b/src/operator/sequence_reverse-inl.h
index 0ead25c..0a43138 100644
--- a/src/operator/sequence_reverse-inl.h
+++ b/src/operator/sequence_reverse-inl.h
@@ -1,5 +1,23 @@
 /*
- * Copyright (c) 2016 by Contributors
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
  * \file sequence_reverse-inl.h
  * \brief
  * \author Sebastian Bodenstien
diff --git a/src/operator/sequence_reverse.cc b/src/operator/sequence_reverse.cc
index 01dcb68..61821d3 100644
--- a/src/operator/sequence_reverse.cc
+++ b/src/operator/sequence_reverse.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file sequence_reverse.cc
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/sequence_reverse.cu b/src/operator/sequence_reverse.cu
index cdd8f34..c6cc3f6 100644
--- a/src/operator/sequence_reverse.cu
+++ b/src/operator/sequence_reverse.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file sequence_reverse.cu
  * \brief
  * \author Sebastian Bodenstein
diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h
index 69d1f2a..a48c52f 100644
--- a/src/operator/slice_channel-inl.h
+++ b/src/operator/slice_channel-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file slice_channel-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/slice_channel.cc b/src/operator/slice_channel.cc
index 689f010..7293ba6 100644
--- a/src/operator/slice_channel.cc
+++ b/src/operator/slice_channel.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file slice_channel.cc
  * \brief
  * \author Bing Xu
@@ -31,7 +49,7 @@ MXNET_REGISTER_OP_PROPERTY(SliceChannel, SliceChannelProp)
 
 .. note:: ``SliceChannel`` is deprecated. Use ``split`` instead.
 
-**Note** that `num_outputs` should evenly divide the length of the axis 
+**Note** that `num_outputs` should evenly divide the length of the axis
 along which to split the array.
 
 Example::
diff --git a/src/operator/slice_channel.cu b/src/operator/slice_channel.cu
index 6afd450..eb1c9c8 100644
--- a/src/operator/slice_channel.cu
+++ b/src/operator/slice_channel.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file slice_channel.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/softmax_activation-inl.h b/src/operator/softmax_activation-inl.h
index 0f4bf5f..b1b7693 100644
--- a/src/operator/softmax_activation-inl.h
+++ b/src/operator/softmax_activation-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file softmax_activation-inl.h
  * \brief SoftmaxActivation operator
  * \author Junyuan Xie
diff --git a/src/operator/softmax_activation.cc b/src/operator/softmax_activation.cc
index 827ea03..115b0a7 100644
--- a/src/operator/softmax_activation.cc
+++ b/src/operator/softmax_activation.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file activation.cc
  * \brief softmax_activation op
  * \author Junyuan Xie
diff --git a/src/operator/softmax_activation.cu b/src/operator/softmax_activation.cu
index b2d903a..5bebed2 100644
--- a/src/operator/softmax_activation.cu
+++ b/src/operator/softmax_activation.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file softmax_activation.cu
  * \brief
  * \author Junyuan Xie
diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h
index 3bed938..d14193b 100644
--- a/src/operator/softmax_output-inl.h
+++ b/src/operator/softmax_output-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file softmax_output-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc
index 08580e9..52bb2a4 100644
--- a/src/operator/softmax_output.cc
+++ b/src/operator/softmax_output.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file softmax_output.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/softmax_output.cu b/src/operator/softmax_output.cu
index 7d9324b..8de5df6 100644
--- a/src/operator/softmax_output.cu
+++ b/src/operator/softmax_output.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file softmax_output.cu
  * \brief
  * \author Bing Xu
diff --git a/src/operator/spatial_transformer-inl.h b/src/operator/spatial_transformer-inl.h
index 8c02d35..7796757 100644
--- a/src/operator/spatial_transformer-inl.h
+++ b/src/operator/spatial_transformer-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file spatial_transformer-inl.h
  * \brief
  *  Reproducing paper: aderberg M, Simonyan K, Zisserman A. "Spatial transformer networks"
diff --git a/src/operator/spatial_transformer.cc b/src/operator/spatial_transformer.cc
index 409339b..0d8ee29 100644
--- a/src/operator/spatial_transformer.cc
+++ b/src/operator/spatial_transformer.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file spatial_transformer.cc
  * \brief
  * \author Wei Wu
diff --git a/src/operator/spatial_transformer.cu b/src/operator/spatial_transformer.cu
index 4942f65..b3d635c 100644
--- a/src/operator/spatial_transformer.cu
+++ b/src/operator/spatial_transformer.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2016 by Contributors
  * \file spatial_transformer.cu
  * \brief
  * \author Wei Wu
diff --git a/src/operator/special_functions-inl.h b/src/operator/special_functions-inl.h
index 743391e..b9460a3 100644
--- a/src/operator/special_functions-inl.h
+++ b/src/operator/special_functions-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file special_functions-inl.h
  * \brief
  * \author Valentin Flunkert
diff --git a/src/operator/svm_output-inl.h b/src/operator/svm_output-inl.h
index 7f460ac..f025461 100644
--- a/src/operator/svm_output-inl.h
+++ b/src/operator/svm_output-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file svm_output-inl.h
  * \brief
  * \author Jonas Amaro
diff --git a/src/operator/svm_output.cc b/src/operator/svm_output.cc
index 5f1f77a..766968d 100644
--- a/src/operator/svm_output.cc
+++ b/src/operator/svm_output.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file svm_output.cc
  * \brief
  * \author Jonas Amaro
diff --git a/src/operator/svm_output.cu b/src/operator/svm_output.cu
index d4b9596..250df91 100644
--- a/src/operator/svm_output.cu
+++ b/src/operator/svm_output.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file svm_output.cu
  * \brief
  * \author Jonas Amaro
diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h
index 9595f6e..89c7245 100644
--- a/src/operator/swapaxis-inl.h
+++ b/src/operator/swapaxis-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file swapaxis-inl.h
  * \brief
  * \author Ming Zhang
diff --git a/src/operator/swapaxis.cc b/src/operator/swapaxis.cc
index 097f983..a6c3e8b 100644
--- a/src/operator/swapaxis.cc
+++ b/src/operator/swapaxis.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file swapaxis.cc
  * \brief
  * \author Ming Zhang
diff --git a/src/operator/swapaxis.cu b/src/operator/swapaxis.cu
index 93f78c2..e9b105d 100644
--- a/src/operator/swapaxis.cu
+++ b/src/operator/swapaxis.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file swapaxis.cu
  * \brief
  * \author Ming Zhang
diff --git a/src/operator/tensor/broadcast_reduce-inl.cuh b/src/operator/tensor/broadcast_reduce-inl.cuh
index 2ba0eb5..b1a259f 100644
--- a/src/operator/tensor/broadcast_reduce-inl.cuh
+++ b/src/operator/tensor/broadcast_reduce-inl.cuh
@@ -1,605 +1,624 @@
-/*!
- * Copyright (c) 2015-2017 by Contributors
- * \file broadcast_reduce-inl.cuh
- * \brief CUDA implementations for binary broadcast and reduce
- * \author Antti-Pekka Hynninen
-*/
-#ifndef MXNET_OPERATOR_TENSOR_BROADCAST_REDUCE_INL_CUH_
-#define MXNET_OPERATOR_TENSOR_BROADCAST_REDUCE_INL_CUH_
-
-using namespace mshadow::cuda;
-
-template<int ndim, typename DType, typename OP, int unroll>
-__launch_bounds__(kMaxThreadsPerBlock)
-__global__ void binary_broadcast_kernel(const int N, const bool addto,
-                                        const DType* __restrict lhs,
-                                        const DType* __restrict rhs, DType *out,
-                                        const Shape<ndim> lstride, const Shape<ndim> rstride,
-                                        const Shape<ndim> oshape) {
-  for (int idx = blockIdx.x * blockDim.x * unroll + threadIdx.x; idx < N;
-    idx += blockDim.x * gridDim.x * unroll)
-  {
-    int j[unroll];
-    int k[unroll];
-    DType val[unroll];
-    #pragma unroll
-    for (int i=0;i < unroll;i++) {
-      unravel_dot(idx + i*blockDim.x, oshape, lstride, rstride, &j[i], &k[i]);
-      val[i] = OP::Map(lhs[j[i]], rhs[k[i]]);
-    }
-    #pragma unroll
-    for (int i=0;i < unroll;i++) {
-      if (idx + i*blockDim.x < N) assign(&out[idx + i*blockDim.x], addto, val[i]);
-    }
-
-  }
-}
-
-template<int ndim, typename DType, typename OP>
-void BinaryBroadcastComputeImpl(Stream<gpu> *s, const OpReqType req,
-                                const TBlob& lhs, const TBlob& rhs, const TBlob& out) {
-  if (req == kNullOp) return;
-  cudaStream_t stream = Stream<gpu>::GetStream(s);
-  int N = out.shape_.Size();
-  const int warpSize = 32;
-  const int unroll = 2;
-  int nthread = std::min(kMaxThreadsPerBlock, ((N + warpSize - 1)/warpSize)*warpSize );
-  int ngrid = std::min(kBaseGridNum, (N + nthread*unroll - 1) / (nthread*unroll));
-  Shape<ndim> lstride = calc_stride(lhs.shape_.get<ndim>());
-  Shape<ndim> rstride = calc_stride(rhs.shape_.get<ndim>());
-  binary_broadcast_kernel<ndim, DType, OP, unroll><<<ngrid, nthread, 0, stream>>>(
-    N, req == kAddTo, lhs.dptr<DType>(), rhs.dptr<DType>(), out.dptr<DType>(), lstride, rstride,
-    out.shape_.get<ndim>());
-}
-
-const int nthread_reduce = kMaxThreadsPerBlock;
-template<typename Reducer, int ndim, typename DType, typename OP, int unroll>
-__launch_bounds__(nthread_reduce)
-__global__ void reduce_kernel(const int N, const int M, const bool addto,
-                              const DType* __restrict big, DType *small,
-                              const Shape<ndim> big_shape0, const Shape<ndim> small_shape,
-                              const Shape<ndim> big_shape, const Shape<ndim> big_stride,
-                              const int Mnext, const bool do_transpose) {
-  extern __shared__ char shTileChar[];
-  DType* shTile = (DType*)(shTileChar);
-  const int tid = threadIdx.x + threadIdx.y*blockDim.x;
-  const int bx = (do_transpose) ? blockDim.y : blockDim.x;
-  const int by = (do_transpose) ? blockDim.x : blockDim.y;
-  const int tidx = (do_transpose) ? tid / by : threadIdx.x;
-  const int tidy = (do_transpose) ? tid % by : threadIdx.y;
-  for (int m0 = blockIdx.y; m0 < Mnext; m0 += gridDim.y) {
-    // This TB handles M range [Mstart, ...., Mend - 1]
-    const int Mstart = (int)((uint64_t)M*(uint64_t)m0/(uint64_t)Mnext);
-    const int Mend   = (int)((uint64_t)M*(uint64_t)(m0 + 1)/(uint64_t)Mnext);
-    for (int idx0 = blockIdx.x*bx; idx0 < N; idx0 += bx*gridDim.x) {
-      int idx = idx0 + tidx;
-      Shape<ndim> coord = unravel(idx, small_shape);
-      int idx_big0 = ravel(coord, big_shape0);
-
-      DType val;
-      Reducer::SetInitValue(val);
-      if (idx < N) {
-        for (int k = tidy + Mstart; k < Mend; k += by*unroll) {
-          int idx_big[unroll];
-          #pragma unroll
-          for (int u=0;u < unroll;u++) {
-            idx_big[u] = idx_big0 + unravel_dot(k + u*by, big_shape, big_stride);
-          }
-          DType tmp[unroll];
-          #pragma unroll
-          for (int u=0;u < unroll;u++) {
-            if (k + u*by < Mend) {
-              tmp[u] = OP::Map(big[idx_big[u]]);
-            }
-          }
-          #pragma unroll
-          for (int u=0;u < unroll;u++) {
-            if (k + u*by < Mend) Reducer::Reduce(val, tmp[u]);
-          }
-        }
-      }
-
-      // Shared memory block bx * by. Reduction is along by. Final result is in tidy=0
-      if (by > 1) {
-        // Fix bx to avoid bank conflicts. Assumes warpSize number of banks
-        const int fbx = (do_transpose && ((bx & (warpSize - 1)) == 0)) ? (bx + 1) : bx;
-        const int it0 = tidx + tidy*fbx;
-        shTile[it0] = val;
-        __syncthreads();
-        for (int t=1;t < by;t <<= 1) {
-          DType tmp;
-          Reducer::SetInitValue(tmp);
-          if (tidy + t < by) tmp = shTile[it0 + t*fbx];
-          __syncthreads();
-          Reducer::Reduce(shTile[it0], tmp);
-          __syncthreads();
-        }
-        if (idx < N && tidy == 0) {
-          assign(&small[idx + m0*N], addto, shTile[tidx]);
-        }
-      } else {
-        if (idx < N) {
-          assign(&small[idx + m0*N], addto, val);
-        }        
-      }
-    }
-  }
-
-}
-
-template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2, int unroll>
-__launch_bounds__(nthread_reduce)
-__global__ void reduce_kernel(const int N, const int M, const bool addto,
-                              const DType* __restrict big, const DType* __restrict lhs,
-                              const DType* __restrict rhs, DType *small,
-                              const Shape<ndim> big_shape0, const Shape<ndim> lhs_shape0,
-                              const Shape<ndim> rhs_shape0, const Shape<ndim> small_shape,
-                              const Shape<ndim> big_shape, const Shape<ndim> lhs_shape,
-                              const Shape<ndim> rhs_shape, const Shape<ndim> big_stride,
-                              const Shape<ndim> lhs_stride, const Shape<ndim> rhs_stride,
-                              const int Mnext, const bool do_transpose) {
-  extern __shared__ char shTileChar[];
-  DType* shTile = (DType*)(shTileChar);
-  const int tid = threadIdx.x + threadIdx.y*blockDim.x;
-  const int bx = (do_transpose) ? blockDim.y : blockDim.x;
-  const int by = (do_transpose) ? blockDim.x : blockDim.y;
-  const int tidx = (do_transpose) ? tid / by : threadIdx.x;
-  const int tidy = (do_transpose) ? tid % by : threadIdx.y;
-  for (int m0 = blockIdx.y; m0 < Mnext; m0 += gridDim.y) {
-    // This TB handles M range [Mstart, ...., Mend - 1]
-    const int Mstart = (int)((uint64_t)M*(uint64_t)m0/(uint64_t)Mnext);
-    const int Mend   = (int)((uint64_t)M*(uint64_t)(m0 + 1)/(uint64_t)Mnext);
-    for (int idx0 = blockIdx.x*bx; idx0 < N; idx0 += bx*gridDim.x) {
-      int idx = idx0 + tidx;
-      Shape<ndim> coord = unravel(idx, small_shape);
-      int idx_big0 = ravel(coord, big_shape0);
-      int idx_lhs0 = ravel(coord, lhs_shape0);
-      int idx_rhs0 = ravel(coord, rhs_shape0);
-
-      DType val;
-      Reducer::SetInitValue(val);
-      if (idx < N) {
-        for (int k = tidy + Mstart; k < Mend; k += by*unroll) {
-          int idx_big[unroll];
-          int idx_lhs[unroll];
-          int idx_rhs[unroll];
-          #pragma unroll
-          for (int u=0;u < unroll;u++) {
-            idx_big[u] = idx_big0 + unravel_dot(k + u*by, big_shape, big_stride);
-            idx_lhs[u] = idx_lhs0 + unravel_dot(k + u*by, lhs_shape, lhs_stride);
-            idx_rhs[u] = idx_rhs0 + unravel_dot(k + u*by, rhs_shape, rhs_stride);
-          }
-          DType tmp[unroll];
-          #pragma unroll
-          for (int u=0;u < unroll;u++) {
-            if (k + u*by < Mend) {
-              tmp[u] = OP1::Map(big[idx_big[u]], OP2::Map(lhs[idx_lhs[u]], rhs[idx_rhs[u]]));
-            }
-          }
-          #pragma unroll
-          for (int u=0;u < unroll;u++) {
-            if (k + u*by < Mend) Reducer::Reduce(val, tmp[u]);
-          }
-        }
-      }
-
-      // Shared memory block bx * by. Reduction is along by. Final result is in tidy=0
-      if (by > 1) {
-        // Fix bx to avoid bank conflicts. Assumes warpSize number of banks
-        const int fbx = (do_transpose && ((bx & (warpSize - 1)) == 0)) ? (bx + 1) : bx;
-        const int it0 = tidx + tidy*fbx;
-        shTile[it0] = val;
-        __syncthreads();
-        for (int t=1;t < by;t <<= 1) {
-          DType tmp;
-          Reducer::SetInitValue(tmp);
-          if (tidy + t < by) tmp = shTile[it0 + t*fbx];
-          __syncthreads();
-          Reducer::Reduce(shTile[it0], tmp);
-          __syncthreads();
-        }
-        if (idx < N && tidy == 0) {
-          assign(&small[idx + m0*N], addto, shTile[tidx]);
-        }
-      } else {
-        if (idx < N) {
-          assign(&small[idx + m0*N], addto, val);
-        }        
-      }
-    }
-  }
-
-}
-
-// Simple reduction of lines when M is small
-template<typename Reducer, typename DType>
-__launch_bounds__(kMaxThreadsPerBlock)
-__global__ void reduce_lines_kernel(const int N, const int M, const bool addto,
-  const int small_in_stride, const DType* __restrict small_in, DType *small_out) {
-  for (int idx = threadIdx.x + blockIdx.x*blockDim.x; idx < N; idx += blockDim.x*gridDim.x) {
-    
-    DType val;
-    Reducer::SetInitValue(val);
-    for (int k = 0; k < M; k++) {
-      Reducer::Reduce(val, small_in[idx + k*small_in_stride]);
-    }
-
-    if (idx < N) {
-      assign(&small_out[idx], addto, val);
-    }
-
-  }
-}
-
-template<typename Reducer, int ndim, typename DType, typename OP>
-__global__ void reduce_kernel_M1(const int N, const bool addto,
-                                const DType* __restrict big, DType *small, const Shape<ndim> bshape,
-                                const Shape<ndim> sshape) {
-  for (int idx = threadIdx.x + blockIdx.x*blockDim.x; idx < N; idx += blockDim.x*gridDim.x) {
-    Shape<ndim> coord = unravel(idx, sshape);
-    int j = ravel(coord, bshape);
-    assign(&small[idx], addto, OP::Map(big[j]));
-  }
-}
-
-template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2>
-__global__ void reduce_kernel_M1(const int N, const bool addto,
-                                 const DType* __restrict big,
-                                 const DType* __restrict lhs,
-                                 const DType* __restrict rhs,
-                                 DType *small,
-                                 const Shape<ndim> big_shape,
-                                 const Shape<ndim> lhs_shape,
-                                 const Shape<ndim> rhs_shape,
-                                 const Shape<ndim> small_shape) {
-  for (int idx = threadIdx.x + blockIdx.x*blockDim.x; idx < N; idx += blockDim.x*gridDim.x) {
-    Shape<ndim> coord = unravel(idx, small_shape);
-    int idx_big = ravel(coord, big_shape);
-    int idx_lhs = ravel(coord, lhs_shape);
-    int idx_rhs = ravel(coord, rhs_shape);
-    DType val = OP1::Map(big[idx_big], OP2::Map(lhs[idx_lhs], rhs[idx_rhs]));
-    assign(&small[idx], addto, val);
-  }
-}
-
-// Returns the stride with which the fastest dimension is moving.
-// Used to detect memory access scatter.
-template<int ndim>
-MSHADOW_XINLINE int fastest_stride(const Shape<ndim>& small, const Shape<ndim>& big, 
-  const Shape<ndim>& big_stride) {
-  for (int i = ndim-1; i >= 0; --i) {
-    if (big[i] != 1) {
-      return (small[i] == big[i]) ? 1 : big_stride[i];
-    }
-  }
-  return 1;
-}
-
-// Returns a/b integer division rounded up
-template<typename Type>
-Type ceil_idiv(const Type a, const Type b) {
-  return (a + b - 1)/b;
-}
-
-// Configuration for ReduceImpl()
-template<int ndim>
-struct ReduceImplConfig {
-  static const int warpSize = 32;
-  static const int unroll_reduce = 2;
-  static const int maxLoopPerTB = 64;
-  int N;
-  int M;
-  int Mnext;
-  struct {
-    dim3 blockDim;
-    dim3 gridDim;
-    int shMemSize;
-    bool do_transpose;
-  } kernel_1;
-  struct {
-    int blockSize;
-    int gridSize;
-  } kernel_2;
-  size_t workspace_size;
-
-  Shape<ndim> rshape, rstride;
-  Shape<ndim> lhs_shape, lhs_stride;
-  Shape<ndim> rhs_shape, rhs_stride;
-};
-
-static inline uint64_t calc_num_load(const int X, const int Y, const int* strides) {
-  const int warpSize = ReduceImplConfig<1>::warpSize;
-  // Number of full warps
-  uint64_t num_full_warp = X / warpSize;
-  // Length of the partial warp i.e. number of threads that are performing loads
-  uint64_t len_part_warp = X % warpSize;
-
-  uint64_t num_load_full = (std::min(warpSize, strides[0]) +
-    std::min(warpSize, strides[1]) +
-    std::min(warpSize, strides[2]))*num_full_warp;
-
-  uint64_t num_load_part =
-  (std::min(len_part_warp, ceil_idiv<uint64_t>(len_part_warp*strides[0], warpSize)) +
-    std::min(len_part_warp, ceil_idiv<uint64_t>(len_part_warp*strides[1], warpSize)) +
-    std::min(len_part_warp, ceil_idiv<uint64_t>(len_part_warp*strides[2], warpSize)))*
-  (len_part_warp != 0);
-
-  uint64_t num_load = (num_load_full + num_load_part)*(uint64_t)Y;
-  return num_load;
-}
-
-template<int ndim, typename DType>
-ReduceImplConfig<ndim> ConfigureReduceImpl(const TBlob& small, const TBlob& big, const TBlob* lhs,
-  const TBlob* rhs) {
-
-  ReduceImplConfig<ndim> config;
-
-  diff(small.shape_.get<ndim>(), big.shape_.get<ndim>(), &config.rshape, &config.rstride);
-  config.N = small.shape_.Size();
-  config.M = config.rshape.Size();
-
-  bool multiOp = false;
-  if (lhs != NULL) {
-    CHECK_NOTNULL(rhs);
-    diff(small.shape_.get<ndim>(), lhs->shape_.get<ndim>(), &config.lhs_shape,
-      &config.lhs_stride);
-    diff(small.shape_.get<ndim>(), rhs->shape_.get<ndim>(), &config.rhs_shape,
-      &config.rhs_stride);
-    multiOp = true;
-  }
-
-  config.workspace_size = 0;
-
-  if (config.M == 1) {
-    config.kernel_1.blockDim.x = kMaxThreadsPerBlock;
-    config.kernel_1.gridDim.x = std::min((unsigned int)kBaseGridNum,
-      (config.N + config.kernel_1.blockDim.x - 1)/config.kernel_1.blockDim.x);
-  } else {
-
-    int reduce_strides[3];
-    reduce_strides[0] = fastest_stride(small.shape_.get<ndim>(), big.shape_.get<ndim>(),
-      big.shape_.get<ndim>());
-    reduce_strides[1] = (multiOp) ? fastest_stride(small.shape_.get<ndim>(),
-      lhs->shape_.get<ndim>(), lhs->shape_.get<ndim>()) : 1;
-    reduce_strides[2] = (multiOp) ? fastest_stride(small.shape_.get<ndim>(),
-      rhs->shape_.get<ndim>(), rhs->shape_.get<ndim>()) : 1;
-
-    int reduce_strides_transp[3];
-    reduce_strides_transp[0] = fastest_stride(small.shape_.get<ndim>(), config.rshape,
-      config.rstride);
-    reduce_strides_transp[1] = (multiOp) ?
-      fastest_stride(small.shape_.get<ndim>(), config.lhs_shape, config.lhs_stride) : 1;
-    reduce_strides_transp[2] = (multiOp) ?
-      fastest_stride(small.shape_.get<ndim>(), config.rhs_shape, config.rhs_stride) : 1;
-
-    uint64_t num_load = calc_num_load(config.N, config.M, reduce_strides);
-    uint64_t num_load_transp = calc_num_load(config.M, config.N, reduce_strides_transp);
-
-    config.Mnext = 1;
-    config.kernel_1.do_transpose = (num_load > num_load_transp);
-
-    config.kernel_1.blockDim.x = 0;
-    config.kernel_1.blockDim.y = 0;
-
-    if (config.kernel_1.do_transpose) {
-      // Fastest thread ID goes through M
-      // Loop over N has step size config.kernel_1.blockDim.y
-      if (config.N < 8) {
-        config.kernel_1.blockDim.y = 1;
-      } else if (config.N < 256) {
-        config.kernel_1.blockDim.y = 4;
-      } else {
-        if (config.M < 8) {
-          config.kernel_1.blockDim.x = 1;
-        } else if (config.M < 256) {
-          config.kernel_1.blockDim.x = 4;
-        } else {
-          config.kernel_1.blockDim.x = config.warpSize;
-        }
-      }
-    } else {
-      // Fastest thread ID goes through N
-      // Loop over M has step size config.kernel_1.blockDim.y
-      if (config.M < 8) {
-        config.kernel_1.blockDim.y = 1;
-      } else if (config.M < 256) {
-        config.kernel_1.blockDim.y = 4;
-      } else {
-        if (config.N < 8) {
-          config.kernel_1.blockDim.x = 1;
-        } else if (config.N < 256) {
-          config.kernel_1.blockDim.x = 4;
-        } else {
-          config.kernel_1.blockDim.x = config.warpSize;
-        }
-      }
-    }
-
-    if (config.kernel_1.blockDim.x == 0 && config.kernel_1.blockDim.y == 0) {
-      LOG(FATAL) << "Unable to set blockDim";
-    } else if (config.kernel_1.blockDim.x == 0) {
-      config.kernel_1.blockDim.x = nthread_reduce / config.kernel_1.blockDim.y;
-    } else if (config.kernel_1.blockDim.y == 0) {
-      config.kernel_1.blockDim.y = nthread_reduce / config.kernel_1.blockDim.x;
-    }
-
-    if (config.kernel_1.do_transpose) {
-      // Fastest thread ID goes through M
-      config.kernel_1.gridDim.x = std::min((unsigned int)kBaseGridNum,
-        ceil_idiv<unsigned int>(config.N, config.kernel_1.blockDim.y));
-      config.kernel_1.gridDim.y = std::min(kBaseGridNum, config.Mnext);
-      int by = config.kernel_1.blockDim.y;
-      if (config.kernel_1.blockDim.y % config.warpSize == 0) {
-        // Fix shared memory bank conflict
-        by++;
-      }
-      config.kernel_1.shMemSize = (config.kernel_1.blockDim.x > 1) ?
-        config.kernel_1.blockDim.x*by*sizeof(DType) : 0;
-      // Maximum number of times we want TB to loop in M
-      // Max size of M-block each TB can handle
-      int maxMblock = config.kernel_1.blockDim.x*config.maxLoopPerTB;
-      config.Mnext = (config.M + maxMblock - 1) / maxMblock;
-    } else {
-      // Fastest thread ID goes through N
-      config.kernel_1.gridDim.x = std::min((unsigned int)kBaseGridNum,
-        ceil_idiv<unsigned int>(config.N, config.kernel_1.blockDim.x));
-      config.kernel_1.gridDim.y = std::min(kBaseGridNum, config.Mnext);
-      config.kernel_1.shMemSize = (config.kernel_1.blockDim.y > 1) ?
-        config.kernel_1.blockDim.x*config.kernel_1.blockDim.y*sizeof(DType) : 0;
-      // Maximum number of times we want TB to loop in M
-      // Max size of M-block each TB can handle
-      int maxMblock = config.kernel_1.blockDim.y*config.maxLoopPerTB;
-      config.Mnext = (config.M + maxMblock - 1) / maxMblock;
-    }
-
-    if (config.Mnext > 1) {
-      // small_dptr[] is N*Mnext*sizeof(DType) bytes
-      config.workspace_size += config.N*config.Mnext*sizeof(DType);
-      // Set gridDim.y to Mnext
-      config.kernel_1.gridDim.y = std::min(kBaseGridNum, config.Mnext);
-    }
-
-    if (config.Mnext > 1) {
-      config.kernel_2.blockSize = kMaxThreadsPerBlock;
-      config.kernel_2.gridSize = std::min((int)kBaseGridNum,
-        (config.N + config.kernel_2.blockSize - 1)/config.kernel_2.blockSize );
-    }
-
-  }
-
-  return config;
-}
-
-#define KERNEL_UNROLL_SWITCH(do_unroll, unrollAmount, unrollVar, ...) \
-  if (do_unroll) {                                                    \
-    const int unrollVar = unrollAmount;                               \
-    {__VA_ARGS__}                                                     \
-  } else {                                                            \
-    const int unrollVar = 1;                                          \
-    {__VA_ARGS__}                                                     \
-  }
-
-template<typename Reducer, int ndim, typename DType, typename OP>
-void ReduceImpl(cudaStream_t stream, const TBlob& small, const OpReqType req,
-                const TBlob& big, const Tensor<gpu, 1, char>& workspace,
-                const ReduceImplConfig<ndim>& config) {
-  if (config.M == 1) {
-    reduce_kernel_M1<Reducer, ndim, DType, OP>
-    <<< config.kernel_1.gridDim, config.kernel_1.blockDim, 0, stream >>>(
-      config.N, req == kAddTo, big.dptr<DType>(), small.dptr<DType>(), big.shape_.get<ndim>(),
-      small.shape_.get<ndim>());
-  } else {
-
-    DType* small_dptr = small.dptr<DType>();
-    bool addto = (req == kAddTo);
-    if (config.Mnext > 1) {
-      // small_dptr[] is N*Mnext*sizeof(DType) bytes
-      small_dptr = reinterpret_cast<DType*>(workspace.dptr_);
-      addto = false;
-      // Check that the workspace is contigiuous
-      CHECK_EQ(workspace.CheckContiguous(), true);
-      // Check that we have enough storage
-      CHECK_GE(workspace.size(0), config.workspace_size);
-    }
-
-    const int by = (config.kernel_1.do_transpose) ?
-      config.kernel_1.blockDim.x : config.kernel_1.blockDim.y;
-    const bool do_unroll = ( config.M / (by*config.Mnext) >= config.unroll_reduce );
-    KERNEL_UNROLL_SWITCH(do_unroll, ReduceImplConfig<ndim>::unroll_reduce, UNROLL, {
-      reduce_kernel<Reducer, ndim, DType, OP, UNROLL>
-      <<< config.kernel_1.gridDim, config.kernel_1.blockDim, config.kernel_1.shMemSize, stream>>>(
-        config.N, config.M, addto, big.dptr<DType>(), small_dptr, big.shape_.get<ndim>(),
-        small.shape_.get<ndim>(), config.rshape, config.rstride, config.Mnext,
-        config.kernel_1.do_transpose);
-    });
-
-    if (config.Mnext > 1) {
-      reduce_lines_kernel<Reducer, DType>
-      <<< config.kernel_2.gridSize, config.kernel_2.blockSize, 0, stream >>>
-        (config.N, config.Mnext, req == kAddTo, config.N, small_dptr, small.dptr<DType>());
-    }
-  }
-}
-
-template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2>
-void ReduceImpl(cudaStream_t stream, const TBlob& small, const TBlob& lhs, const TBlob& rhs,
-                const OpReqType req, const TBlob& big, const Tensor<gpu, 1, char>& workspace,
-                const ReduceImplConfig<ndim>& config) {
-  if (config.M == 1) {
-    reduce_kernel_M1<Reducer, ndim, DType, OP1, OP2>
-    <<< config.kernel_1.gridDim, config.kernel_1.blockDim, 0, stream >>>(
-      config.N, req == kAddTo, big.dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>(),
-      small.dptr<DType>(), big.shape_.get<ndim>(), lhs.shape_.get<ndim>(),
-      rhs.shape_.get<ndim>(), small.shape_.get<ndim>());
-  } else {
-    DType* small_dptr = small.dptr<DType>();
-    bool addto = (req == kAddTo);
-    if (config.Mnext > 1) {
-      // small_dptr[] is N*Mnext*sizeof(DType) bytes
-      small_dptr = reinterpret_cast<DType*>(workspace.dptr_);
-      addto = false;
-      // Check that the workspace is contigiuous
-      CHECK_EQ(workspace.CheckContiguous(), true);
-      // Check that we have enough storage
-      CHECK_GE(workspace.size(0), config.workspace_size);
-    }
-
-    const int by = (config.kernel_1.do_transpose) ?
-      config.kernel_1.blockDim.x : config.kernel_1.blockDim.y;
-    const bool do_unroll = ( config.M / (by*config.Mnext) >= config.unroll_reduce );
-    KERNEL_UNROLL_SWITCH(do_unroll, ReduceImplConfig<ndim>::unroll_reduce, UNROLL, {
-      reduce_kernel<Reducer, ndim, DType, OP1, OP2, UNROLL>
-      <<< config.kernel_1.gridDim, config.kernel_1.blockDim, config.kernel_1.shMemSize, stream>>>(
-        config.N, config.M, addto, big.dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>(),
-        small_dptr, big.shape_.get<ndim>(), lhs.shape_.get<ndim>(),
-        rhs.shape_.get<ndim>(), small.shape_.get<ndim>(), config.rshape, config.lhs_shape,
-        config.rhs_shape, config.rstride, config.lhs_stride, config.rhs_stride, config.Mnext,
-        config.kernel_1.do_transpose);
-    });
-
-    if (config.Mnext > 1) {
-      reduce_lines_kernel<Reducer, DType>
-      <<< config.kernel_2.gridSize, config.kernel_2.blockSize, 0, stream >>>
-        (config.N, config.Mnext, req == kAddTo, config.N, small_dptr, small.dptr<DType>());
-    }
-  }
-}
-
-#undef KERNEL_UNROLL_SWITCH
-
-template<typename Reducer, int ndim, typename DType, typename OP>
-void Reduce(Stream<gpu> *s, const TBlob& small, const OpReqType req,
-            const Tensor<gpu, 1, char>& workspace, const TBlob& big) {
-  if (req == kNullOp) return;
-  cudaStream_t stream = Stream<gpu>::GetStream(s);
-  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, NULL, NULL);
-  ReduceImpl<Reducer, ndim, DType, OP>(stream, small, req, big, workspace, config);
-}
-
-template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2>
-void Reduce(Stream<gpu> *s, const TBlob& small, const OpReqType req,
-            const Tensor<gpu, 1, char>& workspace, const TBlob& big,
-            const TBlob& lhs, const TBlob& rhs) {
-  if (req == kNullOp) return;
-  cudaStream_t stream = Stream<gpu>::GetStream(s);
-  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, &lhs, &rhs);
-  ReduceImpl<Reducer, ndim, DType, OP1, OP2>(stream, small, lhs, rhs, req, big, workspace, config);
-}
-
-template<int ndim, typename DType>
-size_t ReduceWorkspaceSize(Stream<gpu> *s, const TBlob& small, const OpReqType req,
-                           const TBlob& big) {
-  if (req == kNullOp) return 0;
-  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, NULL, NULL);
-  return config.workspace_size;
-}
-
-template<int ndim, typename DType>
-size_t ReduceWorkspaceSize(Stream<gpu> *s, const TBlob& small, const OpReqType req,
-                           const TBlob& big, const TBlob& lhs, const TBlob& rhs) {
-  if (req == kNullOp) return 0;
-  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, &lhs, &rhs);
-  return config.workspace_size;
-}
-
-#endif  //MXNET_OPERATOR_TENSOR_BROADCAST_REDUCE_INL_CUH_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2015-2017 by Contributors
+ * \file broadcast_reduce-inl.cuh
+ * \brief CUDA implementations for binary broadcast and reduce
+ * \author Antti-Pekka Hynninen
+*/
+#ifndef MXNET_OPERATOR_TENSOR_BROADCAST_REDUCE_INL_CUH_
+#define MXNET_OPERATOR_TENSOR_BROADCAST_REDUCE_INL_CUH_
+
+using namespace mshadow::cuda;
+
+template<int ndim, typename DType, typename OP, int unroll>
+__launch_bounds__(kMaxThreadsPerBlock)
+__global__ void binary_broadcast_kernel(const int N, const bool addto,
+                                        const DType* __restrict lhs,
+                                        const DType* __restrict rhs, DType *out,
+                                        const Shape<ndim> lstride, const Shape<ndim> rstride,
+                                        const Shape<ndim> oshape) {
+  for (int idx = blockIdx.x * blockDim.x * unroll + threadIdx.x; idx < N;
+    idx += blockDim.x * gridDim.x * unroll)
+  {
+    int j[unroll];
+    int k[unroll];
+    DType val[unroll];
+    #pragma unroll
+    for (int i=0;i < unroll;i++) {
+      unravel_dot(idx + i*blockDim.x, oshape, lstride, rstride, &j[i], &k[i]);
+      val[i] = OP::Map(lhs[j[i]], rhs[k[i]]);
+    }
+    #pragma unroll
+    for (int i=0;i < unroll;i++) {
+      if (idx + i*blockDim.x < N) assign(&out[idx + i*blockDim.x], addto, val[i]);
+    }
+
+  }
+}
+
+template<int ndim, typename DType, typename OP>
+void BinaryBroadcastComputeImpl(Stream<gpu> *s, const OpReqType req,
+                                const TBlob& lhs, const TBlob& rhs, const TBlob& out) {
+  if (req == kNullOp) return;
+  cudaStream_t stream = Stream<gpu>::GetStream(s);
+  int N = out.shape_.Size();
+  const int warpSize = 32;
+  const int unroll = 2;
+  int nthread = std::min(kMaxThreadsPerBlock, ((N + warpSize - 1)/warpSize)*warpSize );
+  int ngrid = std::min(kBaseGridNum, (N + nthread*unroll - 1) / (nthread*unroll));
+  Shape<ndim> lstride = calc_stride(lhs.shape_.get<ndim>());
+  Shape<ndim> rstride = calc_stride(rhs.shape_.get<ndim>());
+  binary_broadcast_kernel<ndim, DType, OP, unroll><<<ngrid, nthread, 0, stream>>>(
+    N, req == kAddTo, lhs.dptr<DType>(), rhs.dptr<DType>(), out.dptr<DType>(), lstride, rstride,
+    out.shape_.get<ndim>());
+}
+
+const int nthread_reduce = kMaxThreadsPerBlock;
+template<typename Reducer, int ndim, typename DType, typename OP, int unroll>
+__launch_bounds__(nthread_reduce)
+__global__ void reduce_kernel(const int N, const int M, const bool addto,
+                              const DType* __restrict big, DType *small,
+                              const Shape<ndim> big_shape0, const Shape<ndim> small_shape,
+                              const Shape<ndim> big_shape, const Shape<ndim> big_stride,
+                              const int Mnext, const bool do_transpose) {
+  extern __shared__ char shTileChar[];
+  DType* shTile = (DType*)(shTileChar);
+  const int tid = threadIdx.x + threadIdx.y*blockDim.x;
+  const int bx = (do_transpose) ? blockDim.y : blockDim.x;
+  const int by = (do_transpose) ? blockDim.x : blockDim.y;
+  const int tidx = (do_transpose) ? tid / by : threadIdx.x;
+  const int tidy = (do_transpose) ? tid % by : threadIdx.y;
+  for (int m0 = blockIdx.y; m0 < Mnext; m0 += gridDim.y) {
+    // This TB handles M range [Mstart, ...., Mend - 1]
+    const int Mstart = (int)((uint64_t)M*(uint64_t)m0/(uint64_t)Mnext);
+    const int Mend   = (int)((uint64_t)M*(uint64_t)(m0 + 1)/(uint64_t)Mnext);
+    for (int idx0 = blockIdx.x*bx; idx0 < N; idx0 += bx*gridDim.x) {
+      int idx = idx0 + tidx;
+      Shape<ndim> coord = unravel(idx, small_shape);
+      int idx_big0 = ravel(coord, big_shape0);
+
+      DType val;
+      Reducer::SetInitValue(val);
+      if (idx < N) {
+        for (int k = tidy + Mstart; k < Mend; k += by*unroll) {
+          int idx_big[unroll];
+          #pragma unroll
+          for (int u=0;u < unroll;u++) {
+            idx_big[u] = idx_big0 + unravel_dot(k + u*by, big_shape, big_stride);
+          }
+          DType tmp[unroll];
+          #pragma unroll
+          for (int u=0;u < unroll;u++) {
+            if (k + u*by < Mend) {
+              tmp[u] = OP::Map(big[idx_big[u]]);
+            }
+          }
+          #pragma unroll
+          for (int u=0;u < unroll;u++) {
+            if (k + u*by < Mend) Reducer::Reduce(val, tmp[u]);
+          }
+        }
+      }
+
+      // Shared memory block bx * by. Reduction is along by. Final result is in tidy=0
+      if (by > 1) {
+        // Fix bx to avoid bank conflicts. Assumes warpSize number of banks
+        const int fbx = (do_transpose && ((bx & (warpSize - 1)) == 0)) ? (bx + 1) : bx;
+        const int it0 = tidx + tidy*fbx;
+        shTile[it0] = val;
+        __syncthreads();
+        for (int t=1;t < by;t <<= 1) {
+          DType tmp;
+          Reducer::SetInitValue(tmp);
+          if (tidy + t < by) tmp = shTile[it0 + t*fbx];
+          __syncthreads();
+          Reducer::Reduce(shTile[it0], tmp);
+          __syncthreads();
+        }
+        if (idx < N && tidy == 0) {
+          assign(&small[idx + m0*N], addto, shTile[tidx]);
+        }
+      } else {
+        if (idx < N) {
+          assign(&small[idx + m0*N], addto, val);
+        }
+      }
+    }
+  }
+
+}
+
+template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2, int unroll>
+__launch_bounds__(nthread_reduce)
+__global__ void reduce_kernel(const int N, const int M, const bool addto,
+                              const DType* __restrict big, const DType* __restrict lhs,
+                              const DType* __restrict rhs, DType *small,
+                              const Shape<ndim> big_shape0, const Shape<ndim> lhs_shape0,
+                              const Shape<ndim> rhs_shape0, const Shape<ndim> small_shape,
+                              const Shape<ndim> big_shape, const Shape<ndim> lhs_shape,
+                              const Shape<ndim> rhs_shape, const Shape<ndim> big_stride,
+                              const Shape<ndim> lhs_stride, const Shape<ndim> rhs_stride,
+                              const int Mnext, const bool do_transpose) {
+  extern __shared__ char shTileChar[];
+  DType* shTile = (DType*)(shTileChar);
+  const int tid = threadIdx.x + threadIdx.y*blockDim.x;
+  const int bx = (do_transpose) ? blockDim.y : blockDim.x;
+  const int by = (do_transpose) ? blockDim.x : blockDim.y;
+  const int tidx = (do_transpose) ? tid / by : threadIdx.x;
+  const int tidy = (do_transpose) ? tid % by : threadIdx.y;
+  for (int m0 = blockIdx.y; m0 < Mnext; m0 += gridDim.y) {
+    // This TB handles M range [Mstart, ...., Mend - 1]
+    const int Mstart = (int)((uint64_t)M*(uint64_t)m0/(uint64_t)Mnext);
+    const int Mend   = (int)((uint64_t)M*(uint64_t)(m0 + 1)/(uint64_t)Mnext);
+    for (int idx0 = blockIdx.x*bx; idx0 < N; idx0 += bx*gridDim.x) {
+      int idx = idx0 + tidx;
+      Shape<ndim> coord = unravel(idx, small_shape);
+      int idx_big0 = ravel(coord, big_shape0);
+      int idx_lhs0 = ravel(coord, lhs_shape0);
+      int idx_rhs0 = ravel(coord, rhs_shape0);
+
+      DType val;
+      Reducer::SetInitValue(val);
+      if (idx < N) {
+        for (int k = tidy + Mstart; k < Mend; k += by*unroll) {
+          int idx_big[unroll];
+          int idx_lhs[unroll];
+          int idx_rhs[unroll];
+          #pragma unroll
+          for (int u=0;u < unroll;u++) {
+            idx_big[u] = idx_big0 + unravel_dot(k + u*by, big_shape, big_stride);
+            idx_lhs[u] = idx_lhs0 + unravel_dot(k + u*by, lhs_shape, lhs_stride);
+            idx_rhs[u] = idx_rhs0 + unravel_dot(k + u*by, rhs_shape, rhs_stride);
+          }
+          DType tmp[unroll];
+          #pragma unroll
+          for (int u=0;u < unroll;u++) {
+            if (k + u*by < Mend) {
+              tmp[u] = OP1::Map(big[idx_big[u]], OP2::Map(lhs[idx_lhs[u]], rhs[idx_rhs[u]]));
+            }
+          }
+          #pragma unroll
+          for (int u=0;u < unroll;u++) {
+            if (k + u*by < Mend) Reducer::Reduce(val, tmp[u]);
+          }
+        }
+      }
+
+      // Shared memory block bx * by. Reduction is along by. Final result is in tidy=0
+      if (by > 1) {
+        // Fix bx to avoid bank conflicts. Assumes warpSize number of banks
+        const int fbx = (do_transpose && ((bx & (warpSize - 1)) == 0)) ? (bx + 1) : bx;
+        const int it0 = tidx + tidy*fbx;
+        shTile[it0] = val;
+        __syncthreads();
+        for (int t=1;t < by;t <<= 1) {
+          DType tmp;
+          Reducer::SetInitValue(tmp);
+          if (tidy + t < by) tmp = shTile[it0 + t*fbx];
+          __syncthreads();
+          Reducer::Reduce(shTile[it0], tmp);
+          __syncthreads();
+        }
+        if (idx < N && tidy == 0) {
+          assign(&small[idx + m0*N], addto, shTile[tidx]);
+        }
+      } else {
+        if (idx < N) {
+          assign(&small[idx + m0*N], addto, val);
+        }
+      }
+    }
+  }
+
+}
+
+// Simple reduction of lines when M is small
+template<typename Reducer, typename DType>
+__launch_bounds__(kMaxThreadsPerBlock)
+__global__ void reduce_lines_kernel(const int N, const int M, const bool addto,
+  const int small_in_stride, const DType* __restrict small_in, DType *small_out) {
+  for (int idx = threadIdx.x + blockIdx.x*blockDim.x; idx < N; idx += blockDim.x*gridDim.x) {
+
+    DType val;
+    Reducer::SetInitValue(val);
+    for (int k = 0; k < M; k++) {
+      Reducer::Reduce(val, small_in[idx + k*small_in_stride]);
+    }
+
+    if (idx < N) {
+      assign(&small_out[idx], addto, val);
+    }
+
+  }
+}
+
+template<typename Reducer, int ndim, typename DType, typename OP>
+__global__ void reduce_kernel_M1(const int N, const bool addto,
+                                const DType* __restrict big, DType *small, const Shape<ndim> bshape,
+                                const Shape<ndim> sshape) {
+  for (int idx = threadIdx.x + blockIdx.x*blockDim.x; idx < N; idx += blockDim.x*gridDim.x) {
+    Shape<ndim> coord = unravel(idx, sshape);
+    int j = ravel(coord, bshape);
+    assign(&small[idx], addto, OP::Map(big[j]));
+  }
+}
+
+template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2>
+__global__ void reduce_kernel_M1(const int N, const bool addto,
+                                 const DType* __restrict big,
+                                 const DType* __restrict lhs,
+                                 const DType* __restrict rhs,
+                                 DType *small,
+                                 const Shape<ndim> big_shape,
+                                 const Shape<ndim> lhs_shape,
+                                 const Shape<ndim> rhs_shape,
+                                 const Shape<ndim> small_shape) {
+  for (int idx = threadIdx.x + blockIdx.x*blockDim.x; idx < N; idx += blockDim.x*gridDim.x) {
+    Shape<ndim> coord = unravel(idx, small_shape);
+    int idx_big = ravel(coord, big_shape);
+    int idx_lhs = ravel(coord, lhs_shape);
+    int idx_rhs = ravel(coord, rhs_shape);
+    DType val = OP1::Map(big[idx_big], OP2::Map(lhs[idx_lhs], rhs[idx_rhs]));
+    assign(&small[idx], addto, val);
+  }
+}
+
+// Returns the stride with which the fastest dimension is moving.
+// Used to detect memory access scatter.
+template<int ndim>
+MSHADOW_XINLINE int fastest_stride(const Shape<ndim>& small, const Shape<ndim>& big,
+  const Shape<ndim>& big_stride) {
+  for (int i = ndim-1; i >= 0; --i) {
+    if (big[i] != 1) {
+      return (small[i] == big[i]) ? 1 : big_stride[i];
+    }
+  }
+  return 1;
+}
+
+// Returns a/b integer division rounded up
+template<typename Type>
+Type ceil_idiv(const Type a, const Type b) {
+  return (a + b - 1)/b;
+}
+
+// Configuration for ReduceImpl()
+template<int ndim>
+struct ReduceImplConfig {
+  static const int warpSize = 32;
+  static const int unroll_reduce = 2;
+  static const int maxLoopPerTB = 64;
+  int N;
+  int M;
+  int Mnext;
+  struct {
+    dim3 blockDim;
+    dim3 gridDim;
+    int shMemSize;
+    bool do_transpose;
+  } kernel_1;
+  struct {
+    int blockSize;
+    int gridSize;
+  } kernel_2;
+  size_t workspace_size;
+
+  Shape<ndim> rshape, rstride;
+  Shape<ndim> lhs_shape, lhs_stride;
+  Shape<ndim> rhs_shape, rhs_stride;
+};
+
+static inline uint64_t calc_num_load(const int X, const int Y, const int* strides) {
+  const int warpSize = ReduceImplConfig<1>::warpSize;
+  // Number of full warps
+  uint64_t num_full_warp = X / warpSize;
+  // Length of the partial warp i.e. number of threads that are performing loads
+  uint64_t len_part_warp = X % warpSize;
+
+  uint64_t num_load_full = (std::min(warpSize, strides[0]) +
+    std::min(warpSize, strides[1]) +
+    std::min(warpSize, strides[2]))*num_full_warp;
+
+  uint64_t num_load_part =
+  (std::min(len_part_warp, ceil_idiv<uint64_t>(len_part_warp*strides[0], warpSize)) +
+    std::min(len_part_warp, ceil_idiv<uint64_t>(len_part_warp*strides[1], warpSize)) +
+    std::min(len_part_warp, ceil_idiv<uint64_t>(len_part_warp*strides[2], warpSize)))*
+  (len_part_warp != 0);
+
+  uint64_t num_load = (num_load_full + num_load_part)*(uint64_t)Y;
+  return num_load;
+}
+
+template<int ndim, typename DType>
+ReduceImplConfig<ndim> ConfigureReduceImpl(const TBlob& small, const TBlob& big, const TBlob* lhs,
+  const TBlob* rhs) {
+
+  ReduceImplConfig<ndim> config;
+
+  diff(small.shape_.get<ndim>(), big.shape_.get<ndim>(), &config.rshape, &config.rstride);
+  config.N = small.shape_.Size();
+  config.M = config.rshape.Size();
+
+  bool multiOp = false;
+  if (lhs != NULL) {
+    CHECK_NOTNULL(rhs);
+    diff(small.shape_.get<ndim>(), lhs->shape_.get<ndim>(), &config.lhs_shape,
+      &config.lhs_stride);
+    diff(small.shape_.get<ndim>(), rhs->shape_.get<ndim>(), &config.rhs_shape,
+      &config.rhs_stride);
+    multiOp = true;
+  }
+
+  config.workspace_size = 0;
+
+  if (config.M == 1) {
+    config.kernel_1.blockDim.x = kMaxThreadsPerBlock;
+    config.kernel_1.gridDim.x = std::min((unsigned int)kBaseGridNum,
+      (config.N + config.kernel_1.blockDim.x - 1)/config.kernel_1.blockDim.x);
+  } else {
+
+    int reduce_strides[3];
+    reduce_strides[0] = fastest_stride(small.shape_.get<ndim>(), big.shape_.get<ndim>(),
+      big.shape_.get<ndim>());
+    reduce_strides[1] = (multiOp) ? fastest_stride(small.shape_.get<ndim>(),
+      lhs->shape_.get<ndim>(), lhs->shape_.get<ndim>()) : 1;
+    reduce_strides[2] = (multiOp) ? fastest_stride(small.shape_.get<ndim>(),
+      rhs->shape_.get<ndim>(), rhs->shape_.get<ndim>()) : 1;
+
+    int reduce_strides_transp[3];
+    reduce_strides_transp[0] = fastest_stride(small.shape_.get<ndim>(), config.rshape,
+      config.rstride);
+    reduce_strides_transp[1] = (multiOp) ?
+      fastest_stride(small.shape_.get<ndim>(), config.lhs_shape, config.lhs_stride) : 1;
+    reduce_strides_transp[2] = (multiOp) ?
+      fastest_stride(small.shape_.get<ndim>(), config.rhs_shape, config.rhs_stride) : 1;
+
+    uint64_t num_load = calc_num_load(config.N, config.M, reduce_strides);
+    uint64_t num_load_transp = calc_num_load(config.M, config.N, reduce_strides_transp);
+
+    config.Mnext = 1;
+    config.kernel_1.do_transpose = (num_load > num_load_transp);
+
+    config.kernel_1.blockDim.x = 0;
+    config.kernel_1.blockDim.y = 0;
+
+    if (config.kernel_1.do_transpose) {
+      // Fastest thread ID goes through M
+      // Loop over N has step size config.kernel_1.blockDim.y
+      if (config.N < 8) {
+        config.kernel_1.blockDim.y = 1;
+      } else if (config.N < 256) {
+        config.kernel_1.blockDim.y = 4;
+      } else {
+        if (config.M < 8) {
+          config.kernel_1.blockDim.x = 1;
+        } else if (config.M < 256) {
+          config.kernel_1.blockDim.x = 4;
+        } else {
+          config.kernel_1.blockDim.x = config.warpSize;
+        }
+      }
+    } else {
+      // Fastest thread ID goes through N
+      // Loop over M has step size config.kernel_1.blockDim.y
+      if (config.M < 8) {
+        config.kernel_1.blockDim.y = 1;
+      } else if (config.M < 256) {
+        config.kernel_1.blockDim.y = 4;
+      } else {
+        if (config.N < 8) {
+          config.kernel_1.blockDim.x = 1;
+        } else if (config.N < 256) {
+          config.kernel_1.blockDim.x = 4;
+        } else {
+          config.kernel_1.blockDim.x = config.warpSize;
+        }
+      }
+    }
+
+    if (config.kernel_1.blockDim.x == 0 && config.kernel_1.blockDim.y == 0) {
+      LOG(FATAL) << "Unable to set blockDim";
+    } else if (config.kernel_1.blockDim.x == 0) {
+      config.kernel_1.blockDim.x = nthread_reduce / config.kernel_1.blockDim.y;
+    } else if (config.kernel_1.blockDim.y == 0) {
+      config.kernel_1.blockDim.y = nthread_reduce / config.kernel_1.blockDim.x;
+    }
+
+    if (config.kernel_1.do_transpose) {
+      // Fastest thread ID goes through M
+      config.kernel_1.gridDim.x = std::min((unsigned int)kBaseGridNum,
+        ceil_idiv<unsigned int>(config.N, config.kernel_1.blockDim.y));
+      config.kernel_1.gridDim.y = std::min(kBaseGridNum, config.Mnext);
+      int by = config.kernel_1.blockDim.y;
+      if (config.kernel_1.blockDim.y % config.warpSize == 0) {
+        // Fix shared memory bank conflict
+        by++;
+      }
+      config.kernel_1.shMemSize = (config.kernel_1.blockDim.x > 1) ?
+        config.kernel_1.blockDim.x*by*sizeof(DType) : 0;
+      // Maximum number of times we want TB to loop in M
+      // Max size of M-block each TB can handle
+      int maxMblock = config.kernel_1.blockDim.x*config.maxLoopPerTB;
+      config.Mnext = (config.M + maxMblock - 1) / maxMblock;
+    } else {
+      // Fastest thread ID goes through N
+      config.kernel_1.gridDim.x = std::min((unsigned int)kBaseGridNum,
+        ceil_idiv<unsigned int>(config.N, config.kernel_1.blockDim.x));
+      config.kernel_1.gridDim.y = std::min(kBaseGridNum, config.Mnext);
+      config.kernel_1.shMemSize = (config.kernel_1.blockDim.y > 1) ?
+        config.kernel_1.blockDim.x*config.kernel_1.blockDim.y*sizeof(DType) : 0;
+      // Maximum number of times we want TB to loop in M
+      // Max size of M-block each TB can handle
+      int maxMblock = config.kernel_1.blockDim.y*config.maxLoopPerTB;
+      config.Mnext = (config.M + maxMblock - 1) / maxMblock;
+    }
+
+    if (config.Mnext > 1) {
+      // small_dptr[] is N*Mnext*sizeof(DType) bytes
+      config.workspace_size += config.N*config.Mnext*sizeof(DType);
+      // Set gridDim.y to Mnext
+      config.kernel_1.gridDim.y = std::min(kBaseGridNum, config.Mnext);
+    }
+
+    if (config.Mnext > 1) {
+      config.kernel_2.blockSize = kMaxThreadsPerBlock;
+      config.kernel_2.gridSize = std::min((int)kBaseGridNum,
+        (config.N + config.kernel_2.blockSize - 1)/config.kernel_2.blockSize );
+    }
+
+  }
+
+  return config;
+}
+
+#define KERNEL_UNROLL_SWITCH(do_unroll, unrollAmount, unrollVar, ...) \
+  if (do_unroll) {                                                    \
+    const int unrollVar = unrollAmount;                               \
+    {__VA_ARGS__}                                                     \
+  } else {                                                            \
+    const int unrollVar = 1;                                          \
+    {__VA_ARGS__}                                                     \
+  }
+
+template<typename Reducer, int ndim, typename DType, typename OP>
+void ReduceImpl(cudaStream_t stream, const TBlob& small, const OpReqType req,
+                const TBlob& big, const Tensor<gpu, 1, char>& workspace,
+                const ReduceImplConfig<ndim>& config) {
+  if (config.M == 1) {
+    reduce_kernel_M1<Reducer, ndim, DType, OP>
+    <<< config.kernel_1.gridDim, config.kernel_1.blockDim, 0, stream >>>(
+      config.N, req == kAddTo, big.dptr<DType>(), small.dptr<DType>(), big.shape_.get<ndim>(),
+      small.shape_.get<ndim>());
+  } else {
+
+    DType* small_dptr = small.dptr<DType>();
+    bool addto = (req == kAddTo);
+    if (config.Mnext > 1) {
+      // small_dptr[] is N*Mnext*sizeof(DType) bytes
+      small_dptr = reinterpret_cast<DType*>(workspace.dptr_);
+      addto = false;
+      // Check that the workspace is contigiuous
+      CHECK_EQ(workspace.CheckContiguous(), true);
+      // Check that we have enough storage
+      CHECK_GE(workspace.size(0), config.workspace_size);
+    }
+
+    const int by = (config.kernel_1.do_transpose) ?
+      config.kernel_1.blockDim.x : config.kernel_1.blockDim.y;
+    const bool do_unroll = ( config.M / (by*config.Mnext) >= config.unroll_reduce );
+    KERNEL_UNROLL_SWITCH(do_unroll, ReduceImplConfig<ndim>::unroll_reduce, UNROLL, {
+      reduce_kernel<Reducer, ndim, DType, OP, UNROLL>
+      <<< config.kernel_1.gridDim, config.kernel_1.blockDim, config.kernel_1.shMemSize, stream>>>(
+        config.N, config.M, addto, big.dptr<DType>(), small_dptr, big.shape_.get<ndim>(),
+        small.shape_.get<ndim>(), config.rshape, config.rstride, config.Mnext,
+        config.kernel_1.do_transpose);
+    });
+
+    if (config.Mnext > 1) {
+      reduce_lines_kernel<Reducer, DType>
+      <<< config.kernel_2.gridSize, config.kernel_2.blockSize, 0, stream >>>
+        (config.N, config.Mnext, req == kAddTo, config.N, small_dptr, small.dptr<DType>());
+    }
+  }
+}
+
+template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2>
+void ReduceImpl(cudaStream_t stream, const TBlob& small, const TBlob& lhs, const TBlob& rhs,
+                const OpReqType req, const TBlob& big, const Tensor<gpu, 1, char>& workspace,
+                const ReduceImplConfig<ndim>& config) {
+  if (config.M == 1) {
+    reduce_kernel_M1<Reducer, ndim, DType, OP1, OP2>
+    <<< config.kernel_1.gridDim, config.kernel_1.blockDim, 0, stream >>>(
+      config.N, req == kAddTo, big.dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>(),
+      small.dptr<DType>(), big.shape_.get<ndim>(), lhs.shape_.get<ndim>(),
+      rhs.shape_.get<ndim>(), small.shape_.get<ndim>());
+  } else {
+    DType* small_dptr = small.dptr<DType>();
+    bool addto = (req == kAddTo);
+    if (config.Mnext > 1) {
+      // small_dptr[] is N*Mnext*sizeof(DType) bytes
+      small_dptr = reinterpret_cast<DType*>(workspace.dptr_);
+      addto = false;
+      // Check that the workspace is contigiuous
+      CHECK_EQ(workspace.CheckContiguous(), true);
+      // Check that we have enough storage
+      CHECK_GE(workspace.size(0), config.workspace_size);
+    }
+
+    const int by = (config.kernel_1.do_transpose) ?
+      config.kernel_1.blockDim.x : config.kernel_1.blockDim.y;
+    const bool do_unroll = ( config.M / (by*config.Mnext) >= config.unroll_reduce );
+    KERNEL_UNROLL_SWITCH(do_unroll, ReduceImplConfig<ndim>::unroll_reduce, UNROLL, {
+      reduce_kernel<Reducer, ndim, DType, OP1, OP2, UNROLL>
+      <<< config.kernel_1.gridDim, config.kernel_1.blockDim, config.kernel_1.shMemSize, stream>>>(
+        config.N, config.M, addto, big.dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>(),
+        small_dptr, big.shape_.get<ndim>(), lhs.shape_.get<ndim>(),
+        rhs.shape_.get<ndim>(), small.shape_.get<ndim>(), config.rshape, config.lhs_shape,
+        config.rhs_shape, config.rstride, config.lhs_stride, config.rhs_stride, config.Mnext,
+        config.kernel_1.do_transpose);
+    });
+
+    if (config.Mnext > 1) {
+      reduce_lines_kernel<Reducer, DType>
+      <<< config.kernel_2.gridSize, config.kernel_2.blockSize, 0, stream >>>
+        (config.N, config.Mnext, req == kAddTo, config.N, small_dptr, small.dptr<DType>());
+    }
+  }
+}
+
+#undef KERNEL_UNROLL_SWITCH
+
+template<typename Reducer, int ndim, typename DType, typename OP>
+void Reduce(Stream<gpu> *s, const TBlob& small, const OpReqType req,
+            const Tensor<gpu, 1, char>& workspace, const TBlob& big) {
+  if (req == kNullOp) return;
+  cudaStream_t stream = Stream<gpu>::GetStream(s);
+  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, NULL, NULL);
+  ReduceImpl<Reducer, ndim, DType, OP>(stream, small, req, big, workspace, config);
+}
+
+template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2>
+void Reduce(Stream<gpu> *s, const TBlob& small, const OpReqType req,
+            const Tensor<gpu, 1, char>& workspace, const TBlob& big,
+            const TBlob& lhs, const TBlob& rhs) {
+  if (req == kNullOp) return;
+  cudaStream_t stream = Stream<gpu>::GetStream(s);
+  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, &lhs, &rhs);
+  ReduceImpl<Reducer, ndim, DType, OP1, OP2>(stream, small, lhs, rhs, req, big, workspace, config);
+}
+
+template<int ndim, typename DType>
+size_t ReduceWorkspaceSize(Stream<gpu> *s, const TBlob& small, const OpReqType req,
+                           const TBlob& big) {
+  if (req == kNullOp) return 0;
+  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, NULL, NULL);
+  return config.workspace_size;
+}
+
+template<int ndim, typename DType>
+size_t ReduceWorkspaceSize(Stream<gpu> *s, const TBlob& small, const OpReqType req,
+                           const TBlob& big, const TBlob& lhs, const TBlob& rhs) {
+  if (req == kNullOp) return 0;
+  ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, &lhs, &rhs);
+  return config.workspace_size;
+}
+
+#endif  //MXNET_OPERATOR_TENSOR_BROADCAST_REDUCE_INL_CUH_
diff --git a/src/operator/tensor/broadcast_reduce-inl.h b/src/operator/tensor/broadcast_reduce-inl.h
index 118b505..744308d 100644
--- a/src/operator/tensor/broadcast_reduce-inl.h
+++ b/src/operator/tensor/broadcast_reduce-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015-2017 by Contributors
  * \file broadcast_reduce_kernel.h
  * \brief Function definition of elementwise unary operators
  */
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 1de33bc..aa678fd 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file elementwise_unary_op-inl.h
  * \brief Function definition of elementwise unary operators
  */
diff --git a/src/operator/tensor/broadcast_reduce_op_index.cc b/src/operator/tensor/broadcast_reduce_op_index.cc
index 5944e23..6887955 100644
--- a/src/operator/tensor/broadcast_reduce_op_index.cc
+++ b/src/operator/tensor/broadcast_reduce_op_index.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file broadcast_reduce_op.cc
  * \brief CPU Implementation of broadcast and reduce functions.
  */
diff --git a/src/operator/tensor/broadcast_reduce_op_index.cu b/src/operator/tensor/broadcast_reduce_op_index.cu
index e07b3a2..defa35e 100644
--- a/src/operator/tensor/broadcast_reduce_op_index.cu
+++ b/src/operator/tensor/broadcast_reduce_op_index.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file broadcast_reduce_op.cu
  * \brief GPU Implementation of broadcast and reduce functions.
  */
diff --git a/src/operator/tensor/broadcast_reduce_op_value.cc b/src/operator/tensor/broadcast_reduce_op_value.cc
index fdbaf76..551ee8b 100644
--- a/src/operator/tensor/broadcast_reduce_op_value.cc
+++ b/src/operator/tensor/broadcast_reduce_op_value.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file broadcast_reduce_op.cc
  * \brief CPU Implementation of broadcast and reduce functions.
  */
diff --git a/src/operator/tensor/broadcast_reduce_op_value.cu b/src/operator/tensor/broadcast_reduce_op_value.cu
index e4b90d5..2c216e7 100644
--- a/src/operator/tensor/broadcast_reduce_op_value.cu
+++ b/src/operator/tensor/broadcast_reduce_op_value.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file broadcast_reduce_op.cu
  * \brief GPU Implementation of broadcast and reduce functions.
  */
diff --git a/src/operator/tensor/control_flow_op.cc b/src/operator/tensor/control_flow_op.cc
index 5dd525e..bf08fe7 100644
--- a/src/operator/tensor/control_flow_op.cc
+++ b/src/operator/tensor/control_flow_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file control_flow_op.cc
  * \brief CPU Implementation of flow control
  */
diff --git a/src/operator/tensor/control_flow_op.cu b/src/operator/tensor/control_flow_op.cu
index dbea01c..da2c472 100644
--- a/src/operator/tensor/control_flow_op.cu
+++ b/src/operator/tensor/control_flow_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file control_flow_op.cu
  * \brief
  */
diff --git a/src/operator/tensor/control_flow_op.h b/src/operator/tensor/control_flow_op.h
index c7fcda0..c240247 100644
--- a/src/operator/tensor/control_flow_op.h
+++ b/src/operator/tensor/control_flow_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file control_flow.h
  * \brief Function definitions of operators for controlling flow
  */
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h
index be0d27d..851a1c5 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op.h
+++ b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file elementwise_binary_broadcast_op.h
  * \brief Function definition of elementwise unary operators
  */
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc
index 27a4b5f..c80d46a 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cu b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cu
index ef0e679..bf69132 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cu
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
index 127d8c0..42da191 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cu b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cu
index 649e19b..2b7cc70 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cu
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
index 900f376..957b00b 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cu b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cu
index 167b5d3..8673b4f 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cu
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_op.h b/src/operator/tensor/elemwise_binary_op.h
index 6062feb..87b0d46 100644
--- a/src/operator/tensor/elemwise_binary_op.h
+++ b/src/operator/tensor/elemwise_binary_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_op.h
  * \brief Function definition of elementwise binary operators
  */
diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc
index 635f2a8..65d4ca9 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_op_basic.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_op_basic.cu b/src/operator/tensor/elemwise_binary_op_basic.cu
index 6355c4e..429140a 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cu
+++ b/src/operator/tensor/elemwise_binary_op_basic.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_op_extended.cc b/src/operator/tensor/elemwise_binary_op_extended.cc
index c1669c6..31d977c 100644
--- a/src/operator/tensor/elemwise_binary_op_extended.cc
+++ b/src/operator/tensor/elemwise_binary_op_extended.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_op_extended.cu b/src/operator/tensor/elemwise_binary_op_extended.cu
index 7325ebf..9a10b05 100644
--- a/src/operator/tensor/elemwise_binary_op_extended.cu
+++ b/src/operator/tensor/elemwise_binary_op_extended.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_op_logic.cc b/src/operator/tensor/elemwise_binary_op_logic.cc
index 0903b50..85f2bf1 100644
--- a/src/operator/tensor/elemwise_binary_op_logic.cc
+++ b/src/operator/tensor/elemwise_binary_op_logic.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_op_logic.cu b/src/operator/tensor/elemwise_binary_op_logic.cu
index 9fab912..1a703ed 100644
--- a/src/operator/tensor/elemwise_binary_op_logic.cu
+++ b/src/operator/tensor/elemwise_binary_op_logic.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_scalar_op.h b/src/operator/tensor/elemwise_binary_scalar_op.h
index 5e577c6..f27df27 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op.h
+++ b/src/operator/tensor/elemwise_binary_scalar_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.h
  * \brief Function definition of elementwise binary scalar operators
  */
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc
index bd0b533..3249bcb 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_basic.cu b/src/operator/tensor/elemwise_binary_scalar_op_basic.cu
index ae19aa8..a843f67 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_basic.cu
+++ b/src/operator/tensor/elemwise_binary_scalar_op_basic.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
index 6b712fc..785fce2 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
+++ b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_extended.cu b/src/operator/tensor/elemwise_binary_scalar_op_extended.cu
index 4623b05..74e6b7d 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_extended.cu
+++ b/src/operator/tensor/elemwise_binary_scalar_op_extended.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_logic.cc b/src/operator/tensor/elemwise_binary_scalar_op_logic.cc
index a0f5c23..6771fff 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_logic.cc
+++ b/src/operator/tensor/elemwise_binary_scalar_op_logic.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_logic.cu b/src/operator/tensor/elemwise_binary_scalar_op_logic.cu
index e400b3b..9fee4e9 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_logic.cu
+++ b/src/operator/tensor/elemwise_binary_scalar_op_logic.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_binary_scalar_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_sum.cc b/src/operator/tensor/elemwise_sum.cc
index 7ae7ae9..652be72 100644
--- a/src/operator/tensor/elemwise_sum.cc
+++ b/src/operator/tensor/elemwise_sum.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file elemwise_sum.cc
  * \brief elementwise sum operator
 */
diff --git a/src/operator/tensor/elemwise_sum.cu b/src/operator/tensor/elemwise_sum.cu
index 63c3a96..ce734ad 100644
--- a/src/operator/tensor/elemwise_sum.cu
+++ b/src/operator/tensor/elemwise_sum.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file elemwise_sum.cu
  * \brief elementwise sum operator
 */
diff --git a/src/operator/tensor/elemwise_sum.h b/src/operator/tensor/elemwise_sum.h
index 2dd6a13..3d6d725 100644
--- a/src/operator/tensor/elemwise_sum.h
+++ b/src/operator/tensor/elemwise_sum.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file elemwise_sum.h
  * \brief elementwise sum
  * \author Bing Xu
diff --git a/src/operator/tensor/elemwise_unary_op.cc b/src/operator/tensor/elemwise_unary_op.cc
index ff03846..defe72d 100644
--- a/src/operator/tensor/elemwise_unary_op.cc
+++ b/src/operator/tensor/elemwise_unary_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_unary_op.cc
  * \brief CPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_unary_op.cu b/src/operator/tensor/elemwise_unary_op.cu
index 67ceb1c..4211ea3 100644
--- a/src/operator/tensor/elemwise_unary_op.cu
+++ b/src/operator/tensor/elemwise_unary_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file elemwise_unary_op.cu
  * \brief GPU Implementation of unary function.
  */
diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h
index 97a7e36..b699484 100644
--- a/src/operator/tensor/elemwise_unary_op.h
+++ b/src/operator/tensor/elemwise_unary_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file elementwise_unary_op-inl.h
  * \brief Function definition of elementwise unary operators
  */
diff --git a/src/operator/tensor/indexing_op-inl.cuh b/src/operator/tensor/indexing_op-inl.cuh
index 2f366c2..4458151 100644
--- a/src/operator/tensor/indexing_op-inl.cuh
+++ b/src/operator/tensor/indexing_op-inl.cuh
@@ -1,293 +1,312 @@
-/*!
- * Copyright (c) 2017 by Contributors
- * \file indexing_op-inl.cuh
- * \brief CUDA implementations for indexing_op.h
- * \author Antti-Pekka Hynninen
-*/
-#ifndef MXNET_OPERATOR_TENSOR_INDEXING_OP_CUH_
-#define MXNET_OPERATOR_TENSOR_INDEXING_OP_CUH_
-#include <cub/device/device_run_length_encode.cuh>
-#include <cub/device/device_scan.cuh>
-
-#if CUDA_VERSION >= 9000
-#define FULLMASK 0xFFFFFFFF
-#define __ballot(x) __ballot_sync(FULLMASK, (x))
-#define __all(x) __all_sync(FULLMASK, (x))
-#endif
-
-namespace mxnet {
-namespace op {
-const int kWarpSize = 32;
-
-template<int SZ, typename DType, typename IdxType>
-__global__ void AddTakeGradLargeBatchKernel(DType* dst,
-                                           // If idx_start == NULL, then in-kernel edge
-                                           // detection is used
-                                           const IdxType *idx_start,
-                                           // idx_start_size_ptr ignored if idx_start == NULL
-                                           const int* idx_start_size_ptr,
-                                           const IdxType *sorted, const IdxType *index,
-                                           const DType *src,
-                                           int ymax, int xmax) {
-  // Size of the shared memory is [blockDim.x*SZ*blockDim.y]*sizeof(DType)
-  extern __shared__ char sh_grad_weight_char[];
-  DType* sh_grad_weight = (DType*)sh_grad_weight_char;
-
-  int iidx_end = (idx_start == NULL) ? ymax : *idx_start_size_ptr;
-
-  for (int iidx = blockIdx.y;iidx < iidx_end;iidx += gridDim.y) {
-
-    // Thread block sums up elements in the range [idx_begin, idx_end-1]
-    int idx_begin, idx_end;
-    int sorted_value;
-    if (idx_start == NULL) {
-      idx_begin = iidx;
-      sorted_value = static_cast<int>(sorted[idx_begin]);
-      if (idx_begin > 0 && sorted_value == static_cast<int>(sorted[idx_begin - 1])) continue;
-      // Algorithm is explained using an example:
-      //   blockDim.x = 32
-      //   blockDim.y = 4
-      //   sorted[idx_begin:] = [4 4 4 9]
-      //   (3,4) denotes threadIdx.x=3, threadIdx.y=4, ":" is used for ranges
-      //   (0:31,0:3) sorted_value = 4
-      idx_end = idx_begin + 1;
-      unsigned int* sh_ballot = (unsigned int*)sh_grad_weight_char;
-      int no_edge = 0;
-      do {
-        int idx = idx_end + threadIdx.x + threadIdx.y*blockDim.x;
-        // Example:
-        //   (0:1,0) sorted_idx = 4
-        //   (rest)  sorted_idx = -1
-        int sorted_idx = (idx < ymax) ? static_cast<int>(sorted[idx]) : -1;
-        // Example:
-        //   (0:31,0) sh_ballot[0]     = 0b100
-        //   (rest)   sh_ballot[1...3] = 0
-        // sh_ballot[] tells us which thread within the warp found the edge
-        sh_ballot[threadIdx.y] = __ballot(sorted_value != sorted_idx);
-        __syncthreads();
-        // No edge if sh_ballot[threadIdx.x] == 0
-        // NOTE: All warps have the same value for no_edge
-        // Example:
-        //   (0,:)  no_edge = 0
-        //   (rest) no_edge = 1
-        no_edge = (threadIdx.x < blockDim.y) ? (sh_ballot[threadIdx.x] == 0) : 1;
-        idx_end += blockDim.x*blockDim.y;
-        // Example:
-        //   __all(no_edge) = 0 since no_edge = 0 for threadIdx.x = 0, hence we leave the loop
-      } while (__all(no_edge));
-      idx_end -= blockDim.x*blockDim.y;
-      // Find the first edge
-      // Example:
-      //   (0,:)  val = 1
-      //   (rest) val = 0
-      unsigned int val = (threadIdx.x < blockDim.y && sh_ballot[threadIdx.x] != 0) ?
-        1 : 0;
-      // NOTE: Set nth bit if thread n in the warp has val = 1
-      // Example:
-      //   (all) val = 1
-      val = __ballot( val );
-      // __ffs() returns the position of first set bit, 1...32. __ffs(1) = 1
-      // j will be the warp index where edge was found
-      // Example:
-      //   (all) j = 1 - 1 = 0
-      int j = __ffs(val) - 1;
-      // j = warp index where the edge was found
-      // __ffs(sh_ballot[j]) - 1 = warp lane where the edge was found
-      // idx_end points to the one over the last value.
-      // Example:
-      //  idx_end += 0*blockDim.x + _ffs(0b100) - 1 = 0 + 3 - 1 = 2
-      //  sorted[idx_end] = 9
-      idx_end += j*blockDim.x + __ffs(sh_ballot[j]) - 1;
-      __syncthreads();
-    } else {
-      idx_begin = idx_start[iidx];
-      idx_end   = ((iidx + 1) < iidx_end) ? idx_start[iidx + 1] : ymax;
-      sorted_value = static_cast<int>(sorted[idx_begin]);
-    }
-
-    const int start_feature = threadIdx.x + blockIdx.x * blockDim.x * SZ;
-    const int dst_row = sorted_value * xmax;
-
-    int num_idx = idx_end - idx_begin;
-    int idx0 = idx_begin + threadIdx.y*num_idx/blockDim.y;
-    int idx1 = idx_begin + (threadIdx.y + 1)*num_idx/blockDim.y;
-
-    // Read and sum data into grad_weight[]
-    DType grad_weight[SZ];
-    #pragma unroll
-    for (int ii = 0; ii < SZ; ii++) {
-      grad_weight[ii] = (DType)0;
-    }
-    for (int idx=idx0; idx < idx1;idx++) {
-      const int src_row = static_cast<int>(index[idx]) * xmax;
-      #pragma unroll
-      for (int ii = 0; ii < SZ; ii++)
-      {
-        int feature_dim = start_feature + ii * blockDim.x;
-        if (feature_dim < xmax)
-        {
-          grad_weight[ii] += src[src_row + feature_dim];
-        }
-      }
-    }
-    #pragma unroll
-    for (int ii = 0; ii < SZ; ii++) {
-      sh_grad_weight[threadIdx.x + ii*blockDim.x + threadIdx.y*blockDim.x*SZ] = grad_weight[ii];
-    }
-    __syncthreads();
-    // We now have grad_weight[] values, reduce within thread block
-    for (int t=1;t < blockDim.y;t <<= 1) {
-      DType tmp[SZ];
-      #pragma unroll
-      for (int ii = 0; ii < SZ; ii++) {
-        tmp[ii] = (threadIdx.y + t < blockDim.y) ?
-          sh_grad_weight[threadIdx.x + ii*blockDim.x + (threadIdx.y + t)*blockDim.x*SZ] : (DType)0;
-      }
-      __syncthreads();
-      #pragma unroll
-      for (int ii = 0; ii < SZ; ii++) {
-        sh_grad_weight[threadIdx.x + ii*blockDim.x + threadIdx.y*blockDim.x*SZ] += tmp[ii];
-      }
-      __syncthreads();
-    }
-    // Result is in sh_grad_weight[threadIdx.x + ii*blockDim.x]
-    if (threadIdx.y == 0) {
-      #pragma unroll
-      for (int ii = 0; ii < SZ; ii++) {
-        int feature_dim = start_feature + ii * blockDim.x;
-        if (feature_dim < xmax) {
-          dst[dst_row + feature_dim] += sh_grad_weight[threadIdx.x + ii*blockDim.x];
-        }
-      }
-    }
-
-  }
-}
-
-template <typename IndexType, typename xpu>
-inline typename std::enable_if<std::is_same<xpu, gpu>::value, size_t>::type
-AddTakeGradLargeBatchWorkspaceSize(size_t num_keys) {
-  size_t encode_bytes = 0;
-  cub::DeviceRunLengthEncode::Encode<IndexType*, IndexType*, IndexType*, int*>
-    (NULL, encode_bytes, NULL, NULL, NULL, NULL, num_keys);
-  size_t exclusivesum_bytes = 0;
-  cub::DeviceScan::ExclusiveSum<IndexType*, IndexType*>(NULL, exclusivesum_bytes,
-    NULL, NULL, num_keys);
-  size_t temporary_bytes = std::max(encode_bytes, exclusivesum_bytes);
-  size_t unique_bytes = num_keys*sizeof(IndexType);
-  size_t counts_bytes = num_keys*sizeof(IndexType);
-  size_t num_runs_bytes = 1*sizeof(int);
-  return (unique_bytes + counts_bytes + num_runs_bytes + temporary_bytes);
-}
-
-template<typename IndexType, typename DType>
-inline void AddTakeGradLargeBatch(mshadow::Tensor<gpu, 2, DType> dst,
-                                  const mshadow::Tensor<gpu, 1, IndexType>& sorted,
-                                  const mshadow::Tensor<gpu, 1, IndexType>& index,
-                                  const mshadow::Tensor<gpu, 2, DType> &src,
-                                  mshadow::Tensor<gpu, 1, char>* workspace) {
-  CHECK_EQ(dst.CheckContiguous(), true);
-  CHECK_EQ(sorted.CheckContiguous(), true);
-  CHECK_EQ(index.CheckContiguous(), true);
-  CHECK_EQ(src.CheckContiguous(), true);
-  // const int kWarpBits = kMemUnitBits;
-  cudaStream_t stream = mshadow::Stream<gpu>::GetStream(dst.stream_);
-  IndexType* sum_counts_ptr = NULL;
-  int* num_runs_ptr = NULL;
-  if (dst.size(0)*4 < src.size(0) && workspace != NULL) {
-    // Workspace given and potentially loops at least 4 times, use CUB to create sum_counts
-    CHECK_EQ(workspace->CheckContiguous(), true);
-    // workspace = [unique_out, counts_out, temporary_storage]
-    size_t unique_bytes = sorted.size(0)*sizeof(IndexType);
-    size_t counts_bytes = sorted.size(0)*sizeof(IndexType);
-    size_t num_runs_bytes = 1*sizeof(int);
-
-    size_t encode_bytes = 0;
-    cub::DeviceRunLengthEncode::Encode<IndexType*, IndexType*, IndexType*, int*>
-      (NULL, encode_bytes, NULL, NULL, NULL, NULL, sorted.size(0), stream);
-    size_t exclusivesum_bytes = 0;
-    cub::DeviceScan::ExclusiveSum<IndexType*, IndexType*>
-      (NULL, exclusivesum_bytes, NULL, NULL, sorted.size(0), stream);
-    size_t temporary_bytes = std::max(encode_bytes, exclusivesum_bytes);
-
-    // Check that we have enough storage
-    CHECK_GE(workspace->size(0), unique_bytes + counts_bytes +
-      num_runs_bytes + temporary_bytes);
-
-    IndexType* unique_out_ptr = reinterpret_cast<IndexType*>(workspace->dptr_);
-    IndexType* counts_out_ptr = reinterpret_cast<IndexType*>(workspace->dptr_ + unique_bytes);
-    num_runs_ptr = reinterpret_cast<int*>(workspace->dptr_ + unique_bytes +
-      counts_bytes);
-    void* temporary_storage = reinterpret_cast<void *>(workspace->dptr_ + unique_bytes +
-      counts_bytes + num_runs_bytes);
-
-    cub::DeviceRunLengthEncode::Encode<IndexType*, IndexType*, IndexType*, int*>
-    (temporary_storage, temporary_bytes, sorted.dptr_, unique_out_ptr, counts_out_ptr,
-      num_runs_ptr, sorted.size(0), stream);
-
-    sum_counts_ptr = unique_out_ptr;
-    cub::DeviceScan::ExclusiveSum<IndexType*, IndexType*>
-    (temporary_storage, temporary_bytes, counts_out_ptr, sum_counts_ptr,
-      sorted.size(0), stream);
-  }
-
-  const int num_unique_est = min(dst.size(0), src.size(0));
-  const int max_nthread = 128;
-  const int num_y = max(src.size(0)/num_unique_est, 1);
-  const int block_dim_x = kWarpSize;
-  const int block_dim_y = min(num_y, max_nthread/block_dim_x);
-  const int SZ = min((src.size(1) + block_dim_x - 1) / block_dim_x, 4);
-  const int grid_dim_x = (src.size(1) + block_dim_x * SZ - 1) / (block_dim_x * SZ);
-  const int grid_dim_y = min(num_unique_est, mshadow::cuda::kBaseGridNum);
-  dim3 dimBlock(block_dim_x, block_dim_y);
-  dim3 dimGrid(grid_dim_x, grid_dim_y);
-  // Maximum shared memory usage: 128*4*sizeof(DType), which is 4K for 64bit DType elements
-  int shmem_size = dimBlock.x*SZ*dimBlock.y*sizeof(DType);
-
-  CHECK_EQ(dst.size(1), src.size(1)) << "AddTakeGradLargeBatch: shape mismatch";
-  CHECK_EQ(index.size(0), src.size(0)) << "AddTakeGradLargeBatch: shape mismatch";
-  mshadow::cuda::CheckLaunchParam(dimGrid, dimBlock, "AddTakeGradLargeBatch");
-
-  switch (SZ) {
-    case 1:
-    AddTakeGradLargeBatchKernel<1, DType>
-        <<<dimGrid, dimBlock, shmem_size, stream>>>
-        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
-         sorted.dptr_, index.dptr_, src.dptr_,
-         static_cast<int>(src.size(0)),
-         static_cast<int>(src.size(1)));
-    break;
-    case 2:
-    AddTakeGradLargeBatchKernel<2, DType>
-        <<<dimGrid, dimBlock, shmem_size, stream>>>
-        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
-         sorted.dptr_, index.dptr_, src.dptr_,
-         static_cast<int>(src.size(0)),
-         static_cast<int>(src.size(1)));
-    break;
-    case 3:
-    AddTakeGradLargeBatchKernel<3, DType>
-        <<<dimGrid, dimBlock, shmem_size, stream>>>
-        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
-         sorted.dptr_, index.dptr_, src.dptr_,
-         static_cast<int>(src.size(0)),
-         static_cast<int>(src.size(1)));
-    break;
-    case 4:
-    AddTakeGradLargeBatchKernel<4, DType>
-        <<<dimGrid, dimBlock, shmem_size, stream>>>
-        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
-         sorted.dptr_, index.dptr_, src.dptr_,
-         static_cast<int>(src.size(0)),
-         static_cast<int>(src.size(1)));
-    break;
-    default:
-    LOG(FATAL) << "AddTakeGradLargeBatch, incorrect value SZ " << SZ;
-    break;
-  }
-  MSHADOW_CUDA_POST_KERNEL_CHECK(AddTakeGradLargeBatchKernel);
-}
-
-}  // namespace op
-}  // namespace mxnet
-#endif  // MXNET_OPERATOR_TENSOR_INDEXING_OP_CUH_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2017 by Contributors
+ * \file indexing_op-inl.cuh
+ * \brief CUDA implementations for indexing_op.h
+ * \author Antti-Pekka Hynninen
+*/
+#ifndef MXNET_OPERATOR_TENSOR_INDEXING_OP_CUH_
+#define MXNET_OPERATOR_TENSOR_INDEXING_OP_CUH_
+#include <cub/device/device_run_length_encode.cuh>
+#include <cub/device/device_scan.cuh>
+
+#if CUDA_VERSION >= 9000
+#define FULLMASK 0xFFFFFFFF
+#define __ballot(x) __ballot_sync(FULLMASK, (x))
+#define __all(x) __all_sync(FULLMASK, (x))
+#endif
+
+namespace mxnet {
+namespace op {
+const int kWarpSize = 32;
+
+template<int SZ, typename DType, typename IdxType>
+__global__ void AddTakeGradLargeBatchKernel(DType* dst,
+                                           // If idx_start == NULL, then in-kernel edge
+                                           // detection is used
+                                           const IdxType *idx_start,
+                                           // idx_start_size_ptr ignored if idx_start == NULL
+                                           const int* idx_start_size_ptr,
+                                           const IdxType *sorted, const IdxType *index,
+                                           const DType *src,
+                                           int ymax, int xmax) {
+  // Size of the shared memory is [blockDim.x*SZ*blockDim.y]*sizeof(DType)
+  extern __shared__ char sh_grad_weight_char[];
+  DType* sh_grad_weight = (DType*)sh_grad_weight_char;
+
+  int iidx_end = (idx_start == NULL) ? ymax : *idx_start_size_ptr;
+
+  for (int iidx = blockIdx.y;iidx < iidx_end;iidx += gridDim.y) {
+
+    // Thread block sums up elements in the range [idx_begin, idx_end-1]
+    int idx_begin, idx_end;
+    int sorted_value;
+    if (idx_start == NULL) {
+      idx_begin = iidx;
+      sorted_value = static_cast<int>(sorted[idx_begin]);
+      if (idx_begin > 0 && sorted_value == static_cast<int>(sorted[idx_begin - 1])) continue;
+      // Algorithm is explained using an example:
+      //   blockDim.x = 32
+      //   blockDim.y = 4
+      //   sorted[idx_begin:] = [4 4 4 9]
+      //   (3,4) denotes threadIdx.x=3, threadIdx.y=4, ":" is used for ranges
+      //   (0:31,0:3) sorted_value = 4
+      idx_end = idx_begin + 1;
+      unsigned int* sh_ballot = (unsigned int*)sh_grad_weight_char;
+      int no_edge = 0;
+      do {
+        int idx = idx_end + threadIdx.x + threadIdx.y*blockDim.x;
+        // Example:
+        //   (0:1,0) sorted_idx = 4
+        //   (rest)  sorted_idx = -1
+        int sorted_idx = (idx < ymax) ? static_cast<int>(sorted[idx]) : -1;
+        // Example:
+        //   (0:31,0) sh_ballot[0]     = 0b100
+        //   (rest)   sh_ballot[1...3] = 0
+        // sh_ballot[] tells us which thread within the warp found the edge
+        sh_ballot[threadIdx.y] = __ballot(sorted_value != sorted_idx);
+        __syncthreads();
+        // No edge if sh_ballot[threadIdx.x] == 0
+        // NOTE: All warps have the same value for no_edge
+        // Example:
+        //   (0,:)  no_edge = 0
+        //   (rest) no_edge = 1
+        no_edge = (threadIdx.x < blockDim.y) ? (sh_ballot[threadIdx.x] == 0) : 1;
+        idx_end += blockDim.x*blockDim.y;
+        // Example:
+        //   __all(no_edge) = 0 since no_edge = 0 for threadIdx.x = 0, hence we leave the loop
+      } while (__all(no_edge));
+      idx_end -= blockDim.x*blockDim.y;
+      // Find the first edge
+      // Example:
+      //   (0,:)  val = 1
+      //   (rest) val = 0
+      unsigned int val = (threadIdx.x < blockDim.y && sh_ballot[threadIdx.x] != 0) ?
+        1 : 0;
+      // NOTE: Set nth bit if thread n in the warp has val = 1
+      // Example:
+      //   (all) val = 1
+      val = __ballot( val );
+      // __ffs() returns the position of first set bit, 1...32. __ffs(1) = 1
+      // j will be the warp index where edge was found
+      // Example:
+      //   (all) j = 1 - 1 = 0
+      int j = __ffs(val) - 1;
+      // j = warp index where the edge was found
+      // __ffs(sh_ballot[j]) - 1 = warp lane where the edge was found
+      // idx_end points to the one over the last value.
+      // Example:
+      //  idx_end += 0*blockDim.x + _ffs(0b100) - 1 = 0 + 3 - 1 = 2
+      //  sorted[idx_end] = 9
+      idx_end += j*blockDim.x + __ffs(sh_ballot[j]) - 1;
+      __syncthreads();
+    } else {
+      idx_begin = idx_start[iidx];
+      idx_end   = ((iidx + 1) < iidx_end) ? idx_start[iidx + 1] : ymax;
+      sorted_value = static_cast<int>(sorted[idx_begin]);
+    }
+
+    const int start_feature = threadIdx.x + blockIdx.x * blockDim.x * SZ;
+    const int dst_row = sorted_value * xmax;
+
+    int num_idx = idx_end - idx_begin;
+    int idx0 = idx_begin + threadIdx.y*num_idx/blockDim.y;
+    int idx1 = idx_begin + (threadIdx.y + 1)*num_idx/blockDim.y;
+
+    // Read and sum data into grad_weight[]
+    DType grad_weight[SZ];
+    #pragma unroll
+    for (int ii = 0; ii < SZ; ii++) {
+      grad_weight[ii] = (DType)0;
+    }
+    for (int idx=idx0; idx < idx1;idx++) {
+      const int src_row = static_cast<int>(index[idx]) * xmax;
+      #pragma unroll
+      for (int ii = 0; ii < SZ; ii++)
+      {
+        int feature_dim = start_feature + ii * blockDim.x;
+        if (feature_dim < xmax)
+        {
+          grad_weight[ii] += src[src_row + feature_dim];
+        }
+      }
+    }
+    #pragma unroll
+    for (int ii = 0; ii < SZ; ii++) {
+      sh_grad_weight[threadIdx.x + ii*blockDim.x + threadIdx.y*blockDim.x*SZ] = grad_weight[ii];
+    }
+    __syncthreads();
+    // We now have grad_weight[] values, reduce within thread block
+    for (int t=1;t < blockDim.y;t <<= 1) {
+      DType tmp[SZ];
+      #pragma unroll
+      for (int ii = 0; ii < SZ; ii++) {
+        tmp[ii] = (threadIdx.y + t < blockDim.y) ?
+          sh_grad_weight[threadIdx.x + ii*blockDim.x + (threadIdx.y + t)*blockDim.x*SZ] : (DType)0;
+      }
+      __syncthreads();
+      #pragma unroll
+      for (int ii = 0; ii < SZ; ii++) {
+        sh_grad_weight[threadIdx.x + ii*blockDim.x + threadIdx.y*blockDim.x*SZ] += tmp[ii];
+      }
+      __syncthreads();
+    }
+    // Result is in sh_grad_weight[threadIdx.x + ii*blockDim.x]
+    if (threadIdx.y == 0) {
+      #pragma unroll
+      for (int ii = 0; ii < SZ; ii++) {
+        int feature_dim = start_feature + ii * blockDim.x;
+        if (feature_dim < xmax) {
+          dst[dst_row + feature_dim] += sh_grad_weight[threadIdx.x + ii*blockDim.x];
+        }
+      }
+    }
+
+  }
+}
+
+template <typename IndexType, typename xpu>
+inline typename std::enable_if<std::is_same<xpu, gpu>::value, size_t>::type
+AddTakeGradLargeBatchWorkspaceSize(size_t num_keys) {
+  size_t encode_bytes = 0;
+  cub::DeviceRunLengthEncode::Encode<IndexType*, IndexType*, IndexType*, int*>
+    (NULL, encode_bytes, NULL, NULL, NULL, NULL, num_keys);
+  size_t exclusivesum_bytes = 0;
+  cub::DeviceScan::ExclusiveSum<IndexType*, IndexType*>(NULL, exclusivesum_bytes,
+    NULL, NULL, num_keys);
+  size_t temporary_bytes = std::max(encode_bytes, exclusivesum_bytes);
+  size_t unique_bytes = num_keys*sizeof(IndexType);
+  size_t counts_bytes = num_keys*sizeof(IndexType);
+  size_t num_runs_bytes = 1*sizeof(int);
+  return (unique_bytes + counts_bytes + num_runs_bytes + temporary_bytes);
+}
+
+template<typename IndexType, typename DType>
+inline void AddTakeGradLargeBatch(mshadow::Tensor<gpu, 2, DType> dst,
+                                  const mshadow::Tensor<gpu, 1, IndexType>& sorted,
+                                  const mshadow::Tensor<gpu, 1, IndexType>& index,
+                                  const mshadow::Tensor<gpu, 2, DType> &src,
+                                  mshadow::Tensor<gpu, 1, char>* workspace) {
+  CHECK_EQ(dst.CheckContiguous(), true);
+  CHECK_EQ(sorted.CheckContiguous(), true);
+  CHECK_EQ(index.CheckContiguous(), true);
+  CHECK_EQ(src.CheckContiguous(), true);
+  // const int kWarpBits = kMemUnitBits;
+  cudaStream_t stream = mshadow::Stream<gpu>::GetStream(dst.stream_);
+  IndexType* sum_counts_ptr = NULL;
+  int* num_runs_ptr = NULL;
+  if (dst.size(0)*4 < src.size(0) && workspace != NULL) {
+    // Workspace given and potentially loops at least 4 times, use CUB to create sum_counts
+    CHECK_EQ(workspace->CheckContiguous(), true);
+    // workspace = [unique_out, counts_out, temporary_storage]
+    size_t unique_bytes = sorted.size(0)*sizeof(IndexType);
+    size_t counts_bytes = sorted.size(0)*sizeof(IndexType);
+    size_t num_runs_bytes = 1*sizeof(int);
+
+    size_t encode_bytes = 0;
+    cub::DeviceRunLengthEncode::Encode<IndexType*, IndexType*, IndexType*, int*>
+      (NULL, encode_bytes, NULL, NULL, NULL, NULL, sorted.size(0), stream);
+    size_t exclusivesum_bytes = 0;
+    cub::DeviceScan::ExclusiveSum<IndexType*, IndexType*>
+      (NULL, exclusivesum_bytes, NULL, NULL, sorted.size(0), stream);
+    size_t temporary_bytes = std::max(encode_bytes, exclusivesum_bytes);
+
+    // Check that we have enough storage
+    CHECK_GE(workspace->size(0), unique_bytes + counts_bytes +
+      num_runs_bytes + temporary_bytes);
+
+    IndexType* unique_out_ptr = reinterpret_cast<IndexType*>(workspace->dptr_);
+    IndexType* counts_out_ptr = reinterpret_cast<IndexType*>(workspace->dptr_ + unique_bytes);
+    num_runs_ptr = reinterpret_cast<int*>(workspace->dptr_ + unique_bytes +
+      counts_bytes);
+    void* temporary_storage = reinterpret_cast<void *>(workspace->dptr_ + unique_bytes +
+      counts_bytes + num_runs_bytes);
+
+    cub::DeviceRunLengthEncode::Encode<IndexType*, IndexType*, IndexType*, int*>
+    (temporary_storage, temporary_bytes, sorted.dptr_, unique_out_ptr, counts_out_ptr,
+      num_runs_ptr, sorted.size(0), stream);
+
+    sum_counts_ptr = unique_out_ptr;
+    cub::DeviceScan::ExclusiveSum<IndexType*, IndexType*>
+    (temporary_storage, temporary_bytes, counts_out_ptr, sum_counts_ptr,
+      sorted.size(0), stream);
+  }
+
+  const int num_unique_est = min(dst.size(0), src.size(0));
+  const int max_nthread = 128;
+  const int num_y = max(src.size(0)/num_unique_est, 1);
+  const int block_dim_x = kWarpSize;
+  const int block_dim_y = min(num_y, max_nthread/block_dim_x);
+  const int SZ = min((src.size(1) + block_dim_x - 1) / block_dim_x, 4);
+  const int grid_dim_x = (src.size(1) + block_dim_x * SZ - 1) / (block_dim_x * SZ);
+  const int grid_dim_y = min(num_unique_est, mshadow::cuda::kBaseGridNum);
+  dim3 dimBlock(block_dim_x, block_dim_y);
+  dim3 dimGrid(grid_dim_x, grid_dim_y);
+  // Maximum shared memory usage: 128*4*sizeof(DType), which is 4K for 64bit DType elements
+  int shmem_size = dimBlock.x*SZ*dimBlock.y*sizeof(DType);
+
+  CHECK_EQ(dst.size(1), src.size(1)) << "AddTakeGradLargeBatch: shape mismatch";
+  CHECK_EQ(index.size(0), src.size(0)) << "AddTakeGradLargeBatch: shape mismatch";
+  mshadow::cuda::CheckLaunchParam(dimGrid, dimBlock, "AddTakeGradLargeBatch");
+
+  switch (SZ) {
+    case 1:
+    AddTakeGradLargeBatchKernel<1, DType>
+        <<<dimGrid, dimBlock, shmem_size, stream>>>
+        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
+         sorted.dptr_, index.dptr_, src.dptr_,
+         static_cast<int>(src.size(0)),
+         static_cast<int>(src.size(1)));
+    break;
+    case 2:
+    AddTakeGradLargeBatchKernel<2, DType>
+        <<<dimGrid, dimBlock, shmem_size, stream>>>
+        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
+         sorted.dptr_, index.dptr_, src.dptr_,
+         static_cast<int>(src.size(0)),
+         static_cast<int>(src.size(1)));
+    break;
+    case 3:
+    AddTakeGradLargeBatchKernel<3, DType>
+        <<<dimGrid, dimBlock, shmem_size, stream>>>
+        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
+         sorted.dptr_, index.dptr_, src.dptr_,
+         static_cast<int>(src.size(0)),
+         static_cast<int>(src.size(1)));
+    break;
+    case 4:
+    AddTakeGradLargeBatchKernel<4, DType>
+        <<<dimGrid, dimBlock, shmem_size, stream>>>
+        (dst.dptr_, sum_counts_ptr, num_runs_ptr,
+         sorted.dptr_, index.dptr_, src.dptr_,
+         static_cast<int>(src.size(0)),
+         static_cast<int>(src.size(1)));
+    break;
+    default:
+    LOG(FATAL) << "AddTakeGradLargeBatch, incorrect value SZ " << SZ;
+    break;
+  }
+  MSHADOW_CUDA_POST_KERNEL_CHECK(AddTakeGradLargeBatchKernel);
+}
+
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_TENSOR_INDEXING_OP_CUH_
diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc
index 5f010fd..e5cb410 100644
--- a/src/operator/tensor/indexing_op.cc
+++ b/src/operator/tensor/indexing_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file indexing_op.cc
  * \brief
  * \author Siyi Li, Chi Zhang
diff --git a/src/operator/tensor/indexing_op.cu b/src/operator/tensor/indexing_op.cu
index 287ec25..d57628a 100644
--- a/src/operator/tensor/indexing_op.cu
+++ b/src/operator/tensor/indexing_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file indexing_op.cu
  * \brief
  * \author Siyi Li, Chi Zhang
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index 5fd6e81..ef42b01 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file indexing_op.h
  * \brief
  * \author Bing Xu, Siyi Li, Chi Zhang
diff --git a/src/operator/tensor/init_op.cc b/src/operator/tensor/init_op.cc
index 16f71fc..8dac22a 100644
--- a/src/operator/tensor/init_op.cc
+++ b/src/operator/tensor/init_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file init_op.cc
  * \brief CPU Implementation of init op
  */
diff --git a/src/operator/tensor/init_op.cu b/src/operator/tensor/init_op.cu
index a798f26..6e2b65c 100644
--- a/src/operator/tensor/init_op.cu
+++ b/src/operator/tensor/init_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file init_op.cu
  * \brief GPU Implementation of init op
  */
diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h
index 5ce132d..bdc74d3 100644
--- a/src/operator/tensor/init_op.h
+++ b/src/operator/tensor/init_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file init_op.h
  * \brief Function definition of initialization op
  */
diff --git a/src/operator/tensor/la_op.cc b/src/operator/tensor/la_op.cc
index 859e761..1b726ce 100644
--- a/src/operator/tensor/la_op.cc
+++ b/src/operator/tensor/la_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file la_op.cc
  * \brief CPU-Operators for advanced linear algebra.
  */
@@ -293,7 +311,7 @@ NNVM_REGISTER_OP(_backward_linalg_trmm)
 .set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 4, 2, trmm_backward>);
 
 NNVM_REGISTER_OP(linalg_trsm)
-.describe(R"code(Solves matrix equations involving a triangular matrix. 
+.describe(R"code(Solves matrix equations involving a triangular matrix.
 Input are two tensors *A*, *B* each of dimension *n >= 2* and each
 having the same shape on the leading *n-2* dimensions. For every *n-2* dimensional index *i* let
 *A*\ :sub:`i`\ , *B*\ :sub:`i`\  be the matrices given by the last *2* dimensions.
diff --git a/src/operator/tensor/la_op.h b/src/operator/tensor/la_op.h
index 488a3ed..9779988 100644
--- a/src/operator/tensor/la_op.h
+++ b/src/operator/tensor/la_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file la_op.h
  * \brief Operators for advanced linear algebra.
  */
diff --git a/src/operator/tensor/la_op_inline.h b/src/operator/tensor/la_op_inline.h
index b83bf81..a032988 100644
--- a/src/operator/tensor/la_op_inline.h
+++ b/src/operator/tensor/la_op_inline.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file la_op_inline.h
  * \brief Operators for advanced linear algebra.
  */
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 26f409a..af0de59 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file matrix_op-inl.h
  * \brief Function definition of matrix related operators
  */
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 4832b13..e7e8f55 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file matrix_op.cc
  * \brief CPU Implementation of matrix operations
  */
diff --git a/src/operator/tensor/matrix_op.cu b/src/operator/tensor/matrix_op.cu
index 8cf656e..ca40419 100644
--- a/src/operator/tensor/matrix_op.cu
+++ b/src/operator/tensor/matrix_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file matrix_op.cu
  * \brief GPU Implementation of matrix operations
  */
diff --git a/src/operator/tensor/ordering_op-inl.h b/src/operator/tensor/ordering_op-inl.h
index a8cee59..eb28b01 100644
--- a/src/operator/tensor/ordering_op-inl.h
+++ b/src/operator/tensor/ordering_op-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file ordering_op-inl.h
  * \brief Function definition of matrix related operators
  */
diff --git a/src/operator/tensor/ordering_op.cc b/src/operator/tensor/ordering_op.cc
index 3308836..22712a8 100644
--- a/src/operator/tensor/ordering_op.cc
+++ b/src/operator/tensor/ordering_op.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2016 by Contributors
  * \file ordering.cc
  * \brief CPU Implementation of the ordering operations
  */
diff --git a/src/operator/tensor/ordering_op.cu b/src/operator/tensor/ordering_op.cu
index 29df70f..8e40b4a 100644
--- a/src/operator/tensor/ordering_op.cu
+++ b/src/operator/tensor/ordering_op.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file matrix_op.cu
  * \brief GPU Implementation of matrix operations
  */
diff --git a/src/operator/tensor/sort_op-inl.cuh b/src/operator/tensor/sort_op-inl.cuh
index 725f9b8..5ad3105 100644
--- a/src/operator/tensor/sort_op-inl.cuh
+++ b/src/operator/tensor/sort_op-inl.cuh
@@ -1,129 +1,148 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- * \file sort_op-inl.cuh
- * \brief CUDA implementations for sort_op.h
- */
-#ifndef MXNET_OPERATOR_TENSOR_SORT_OP_INL_CUH_
-#define MXNET_OPERATOR_TENSOR_SORT_OP_INL_CUH_
-#include <thrust/device_ptr.h>
-#include <thrust/sort.h>
-#if defined(_MSC_VER) && __CUDACC_VER_MAJOR__ == 8 && __CUDACC_VER_BUILD__ != 44
-// Many CUDA 8 compilers other than V8.0.44 crash on Windows
-#pragma warning("Potential crash on CUDA compiler detected. Switching sorting from CUB to Thrust")
-#define SORT_WITH_THRUST
-#else
-#include <cub/device/device_radix_sort.cuh>
-#undef SORT_WITH_THRUST
-#endif
-#if CUDA_VERSION >= 7000
-#include <thrust/system/cuda/execution_policy.h>
-#endif
-
-namespace mxnet {
-namespace op {
-
-template <typename KDType, typename VDType, typename xpu>
-inline typename std::enable_if<std::is_same<xpu, gpu>::value, size_t>::type
-SortByKeyWorkspaceSize(const size_t num_keys) {
-#ifdef SORT_WITH_THRUST
-  return 0;
-#else
-  size_t sortpairs_bytes = 0;
-  cub::DeviceRadixSort::SortPairs<KDType, VDType>(NULL, sortpairs_bytes,
-      NULL, NULL, NULL, NULL, num_keys);
-  size_t keys_bytes = num_keys*sizeof(KDType);
-  size_t values_bytes = num_keys*sizeof(VDType);
-  return (keys_bytes + values_bytes + sortpairs_bytes);
-#endif
-}
-
-template<typename KDType, typename VDType>
-inline void SortByKey(mshadow::Tensor<gpu, 1, KDType> keys, mshadow::Tensor<gpu, 1, VDType> values,
-                      bool is_ascend, mshadow::Tensor<gpu, 1, char>* workspace,
-                      const int begin_bit, const int end_bit) {
-  CHECK_EQ(keys.CheckContiguous(), true);
-  CHECK_EQ(values.CheckContiguous(), true);
-#if CUDA_VERSION >= 7000
-  cudaStream_t stream = mshadow::Stream<gpu>::GetStream(keys.stream_);
-#ifndef SORT_WITH_THRUST
-  if (workspace != NULL) {
-    // Workspace given, sort using CUB
-    CHECK_EQ(workspace->CheckContiguous(), true);
-    // workspace = [keys_out, values_out, temporary_storage]
-    size_t keys_bytes = keys.size(0)*sizeof(KDType);
-    size_t values_bytes = keys.size(0)*sizeof(VDType);
-    // Get the size of internal storage (for checking purposes only)
-    size_t sortpairs_bytes = 0;
-    if (is_ascend) {
-      cub::DeviceRadixSort::SortPairs<KDType, VDType>(NULL, sortpairs_bytes,
-          NULL, NULL, NULL, NULL,
-          keys.size(0), begin_bit, end_bit, stream);
-    } else {
-      cub::DeviceRadixSort::SortPairsDescending<KDType, VDType>(NULL, sortpairs_bytes,
-          NULL, NULL, NULL, NULL,
-          keys.size(0), begin_bit, end_bit, stream);
-    }
-    // Check that we have enough storage
-    CHECK_GE(workspace->size(0), keys_bytes + values_bytes + sortpairs_bytes);
-    //
-    KDType* keys_out_ptr = reinterpret_cast<KDType *>(workspace->dptr_);
-    VDType* values_out_ptr = reinterpret_cast<VDType *>(workspace->dptr_ + keys_bytes);
-    void* temp_storage = reinterpret_cast<void *>(workspace->dptr_ + keys_bytes + values_bytes);
-    // Sort
-    if (is_ascend) {
-      cub::DeviceRadixSort::SortPairs(temp_storage, sortpairs_bytes,
-        keys.dptr_, keys_out_ptr, values.dptr_, values_out_ptr,
-        keys.size(0), begin_bit, end_bit, stream);
-    } else {
-      cub::DeviceRadixSort::SortPairsDescending(temp_storage, sortpairs_bytes,
-        keys.dptr_, keys_out_ptr, values.dptr_, values_out_ptr,
-        keys.size(0), begin_bit, end_bit, stream);
-    }
-    // Copy result back to [keys, values]
-    mshadow::Tensor<gpu, 1, KDType> keys_out(keys_out_ptr, mshadow::Shape1(keys.size(0)),
-      keys.stream_);
-    mshadow::Tensor<gpu, 1, VDType> values_out(values_out_ptr, mshadow::Shape1(keys.size(0)),
-      keys.stream_);
-    mshadow::Copy(keys, keys_out, keys.stream_);
-    mshadow::Copy(values, values_out, values.stream_);
-  } else {
-#endif // SORT_WITH_THRUST
-    // No workspace, sort using thrust
-    thrust::device_ptr<KDType> key_iter = thrust::device_pointer_cast(keys.dptr_);
-    thrust::device_ptr<VDType> value_iter = thrust::device_pointer_cast(values.dptr_);
-    if (is_ascend) {
-      thrust::stable_sort_by_key(
-        thrust::cuda::par.on(stream),
-        key_iter, key_iter + keys.size(0), value_iter, thrust::less<KDType>());
-    } else {
-      thrust::stable_sort_by_key(
-        thrust::cuda::par.on(stream),
-        key_iter, key_iter + keys.size(0), value_iter, thrust::greater<KDType>());
-    }
-#ifndef SORT_WITH_THRUST
-  }
-#endif // SORT_WITH_THRUST
-  MSHADOW_CUDA_POST_KERNEL_CHECK(SortByKey);
-#else
-  LOG(FATAL) << "SortByKey is only supported for CUDA version >=7.0!";
-#endif
-}
-
-template<typename DType>
-inline void SortByKey(mshadow::Tensor<gpu, 1, mshadow::half::half_t> keys,
-  mshadow::Tensor<gpu, 1, DType> values, bool is_ascend,
-  mshadow::Tensor<gpu, 1, char>* workspace, const int begin_bit, const int end_bit) {
-  LOG(FATAL) << "SortByKey for half_t is not implemented!";
-}
-
-template<typename DType>
-inline void SortByKey(mshadow::Tensor<gpu, 1, DType> keys,
-  mshadow::Tensor<gpu, 1, mshadow::half::half_t> values, bool is_ascend,
-  mshadow::Tensor<gpu, 1, char>* workspace, const int begin_bit, const int end_bit) {
-  LOG(FATAL) << "SortByKey for half_t is not implemented!";
-}
-
-}  // namespace op
-}  // namespace mxnet
-
-#endif  // MXNET_OPERATOR_TENSOR_SORT_OP_INL_CUH_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file sort_op-inl.cuh
+ * \brief CUDA implementations for sort_op.h
+ */
+#ifndef MXNET_OPERATOR_TENSOR_SORT_OP_INL_CUH_
+#define MXNET_OPERATOR_TENSOR_SORT_OP_INL_CUH_
+#include <thrust/device_ptr.h>
+#include <thrust/sort.h>
+#if defined(_MSC_VER) && __CUDACC_VER_MAJOR__ == 8 && __CUDACC_VER_BUILD__ != 44
+// Many CUDA 8 compilers other than V8.0.44 crash on Windows
+#pragma warning("Potential crash on CUDA compiler detected. Switching sorting from CUB to Thrust")
+#define SORT_WITH_THRUST
+#else
+#include <cub/device/device_radix_sort.cuh>
+#undef SORT_WITH_THRUST
+#endif
+#if CUDA_VERSION >= 7000
+#include <thrust/system/cuda/execution_policy.h>
+#endif
+
+namespace mxnet {
+namespace op {
+
+template <typename KDType, typename VDType, typename xpu>
+inline typename std::enable_if<std::is_same<xpu, gpu>::value, size_t>::type
+SortByKeyWorkspaceSize(const size_t num_keys) {
+#ifdef SORT_WITH_THRUST
+  return 0;
+#else
+  size_t sortpairs_bytes = 0;
+  cub::DeviceRadixSort::SortPairs<KDType, VDType>(NULL, sortpairs_bytes,
+      NULL, NULL, NULL, NULL, num_keys);
+  size_t keys_bytes = num_keys*sizeof(KDType);
+  size_t values_bytes = num_keys*sizeof(VDType);
+  return (keys_bytes + values_bytes + sortpairs_bytes);
+#endif
+}
+
+template<typename KDType, typename VDType>
+inline void SortByKey(mshadow::Tensor<gpu, 1, KDType> keys, mshadow::Tensor<gpu, 1, VDType> values,
+                      bool is_ascend, mshadow::Tensor<gpu, 1, char>* workspace,
+                      const int begin_bit, const int end_bit) {
+  CHECK_EQ(keys.CheckContiguous(), true);
+  CHECK_EQ(values.CheckContiguous(), true);
+#if CUDA_VERSION >= 7000
+  cudaStream_t stream = mshadow::Stream<gpu>::GetStream(keys.stream_);
+#ifndef SORT_WITH_THRUST
+  if (workspace != NULL) {
+    // Workspace given, sort using CUB
+    CHECK_EQ(workspace->CheckContiguous(), true);
+    // workspace = [keys_out, values_out, temporary_storage]
+    size_t keys_bytes = keys.size(0)*sizeof(KDType);
+    size_t values_bytes = keys.size(0)*sizeof(VDType);
+    // Get the size of internal storage (for checking purposes only)
+    size_t sortpairs_bytes = 0;
+    if (is_ascend) {
+      cub::DeviceRadixSort::SortPairs<KDType, VDType>(NULL, sortpairs_bytes,
+          NULL, NULL, NULL, NULL,
+          keys.size(0), begin_bit, end_bit, stream);
+    } else {
+      cub::DeviceRadixSort::SortPairsDescending<KDType, VDType>(NULL, sortpairs_bytes,
+          NULL, NULL, NULL, NULL,
+          keys.size(0), begin_bit, end_bit, stream);
+    }
+    // Check that we have enough storage
+    CHECK_GE(workspace->size(0), keys_bytes + values_bytes + sortpairs_bytes);
+    //
+    KDType* keys_out_ptr = reinterpret_cast<KDType *>(workspace->dptr_);
+    VDType* values_out_ptr = reinterpret_cast<VDType *>(workspace->dptr_ + keys_bytes);
+    void* temp_storage = reinterpret_cast<void *>(workspace->dptr_ + keys_bytes + values_bytes);
+    // Sort
+    if (is_ascend) {
+      cub::DeviceRadixSort::SortPairs(temp_storage, sortpairs_bytes,
+        keys.dptr_, keys_out_ptr, values.dptr_, values_out_ptr,
+        keys.size(0), begin_bit, end_bit, stream);
+    } else {
+      cub::DeviceRadixSort::SortPairsDescending(temp_storage, sortpairs_bytes,
+        keys.dptr_, keys_out_ptr, values.dptr_, values_out_ptr,
+        keys.size(0), begin_bit, end_bit, stream);
+    }
+    // Copy result back to [keys, values]
+    mshadow::Tensor<gpu, 1, KDType> keys_out(keys_out_ptr, mshadow::Shape1(keys.size(0)),
+      keys.stream_);
+    mshadow::Tensor<gpu, 1, VDType> values_out(values_out_ptr, mshadow::Shape1(keys.size(0)),
+      keys.stream_);
+    mshadow::Copy(keys, keys_out, keys.stream_);
+    mshadow::Copy(values, values_out, values.stream_);
+  } else {
+#endif // SORT_WITH_THRUST
+    // No workspace, sort using thrust
+    thrust::device_ptr<KDType> key_iter = thrust::device_pointer_cast(keys.dptr_);
+    thrust::device_ptr<VDType> value_iter = thrust::device_pointer_cast(values.dptr_);
+    if (is_ascend) {
+      thrust::stable_sort_by_key(
+        thrust::cuda::par.on(stream),
+        key_iter, key_iter + keys.size(0), value_iter, thrust::less<KDType>());
+    } else {
+      thrust::stable_sort_by_key(
+        thrust::cuda::par.on(stream),
+        key_iter, key_iter + keys.size(0), value_iter, thrust::greater<KDType>());
+    }
+#ifndef SORT_WITH_THRUST
+  }
+#endif // SORT_WITH_THRUST
+  MSHADOW_CUDA_POST_KERNEL_CHECK(SortByKey);
+#else
+  LOG(FATAL) << "SortByKey is only supported for CUDA version >=7.0!";
+#endif
+}
+
+template<typename DType>
+inline void SortByKey(mshadow::Tensor<gpu, 1, mshadow::half::half_t> keys,
+  mshadow::Tensor<gpu, 1, DType> values, bool is_ascend,
+  mshadow::Tensor<gpu, 1, char>* workspace, const int begin_bit, const int end_bit) {
+  LOG(FATAL) << "SortByKey for half_t is not implemented!";
+}
+
+template<typename DType>
+inline void SortByKey(mshadow::Tensor<gpu, 1, DType> keys,
+  mshadow::Tensor<gpu, 1, mshadow::half::half_t> values, bool is_ascend,
+  mshadow::Tensor<gpu, 1, char>* workspace, const int begin_bit, const int end_bit) {
+  LOG(FATAL) << "SortByKey for half_t is not implemented!";
+}
+
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_TENSOR_SORT_OP_INL_CUH_
diff --git a/src/operator/tensor/sort_op.h b/src/operator/tensor/sort_op.h
index ac8a698..a0425a5 100644
--- a/src/operator/tensor/sort_op.h
+++ b/src/operator/tensor/sort_op.h
@@ -1,87 +1,105 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- * \file sort_op.h
- * \brief SortByKey function
- */
-#ifndef MXNET_OPERATOR_TENSOR_SORT_OP_H_
-#define MXNET_OPERATOR_TENSOR_SORT_OP_H_
-
-#include <dmlc/logging.h>
-#include <mshadow/tensor.h>
-#include <vector>
-#include <type_traits>
-
-namespace mxnet {
-namespace op {
-/*!
- * \brief CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!)
- * \param keys the keys to sort
- * \param values the values that sorts w.r.t the key
- * \param is_ascend whether to sort key in ascending order
- */
-template<typename KDType, typename VDType>
-inline void SortByKey(mshadow::Tensor<cpu, 1, KDType> keys, mshadow::Tensor<cpu, 1, VDType> values,
-                      bool is_ascend = true, mshadow::Tensor<cpu, 1, char>* workspace = NULL,
-                      const int begin_bit = 0, const int end_bit = sizeof(KDType)*8) {
-  CHECK_EQ(keys.CheckContiguous(), true);
-  CHECK_EQ(values.CheckContiguous(), true);
-  CHECK_EQ(keys.size(0), values.size(0))
-    << "The sizes of key/value are not equal! keys_size: " << keys.size(0)
-    << "values_size: " << values.size(0);
-  std::vector<size_t> idx(keys.size(0));
-  std::vector<KDType> keys_vec(keys.size(0));
-  std::vector<VDType> values_vec(values.size(0));
-  for (index_t i = 0; i < keys.size(0); i++) {
-    idx[i] = i;
-    keys_vec[i] = keys[i];
-    values_vec[i] = values[i];
-  }
-  if (is_ascend) {
-    std::stable_sort(idx.begin(), idx.end(),
-                     [&keys_vec](size_t i1, size_t i2)
-                       {return keys_vec[i1] < keys_vec[i2]; });
-  } else {
-    std::stable_sort(idx.begin(), idx.end(),
-                     [&keys_vec](size_t i1, size_t i2)
-                       {return keys_vec[i1] > keys_vec[i2]; });
-  }
-  for (index_t i = 0; i < values.size(0); i++) {
-    keys[i] = keys_vec[idx[i]];
-    values[i] = values_vec[idx[i]];
-  }
-}
-
-/*!
- * \brief CPU/GPU: Return the amount of temporary storage in bytes required for SortByKey
- * \param num_keys number of keys to sort
- */
-template <typename KDType, typename VDType, typename xpu>
-inline typename std::enable_if<std::is_same<xpu, cpu>::value, size_t>::type
-SortByKeyWorkspaceSize(const size_t num_keys) {
-  return 0;
-}
-
-/*!
- * \brief CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!)
- * \param keys the keys to sort
- * \param values the values that sorts w.r.t the key
- * \param is_ascend whether to sort key in ascending order
- */
-template<typename KDType, typename VDType>
-inline void SortByKey(mshadow::Tensor<gpu, 1, KDType> keys, mshadow::Tensor<gpu, 1, VDType> values,
-                      bool is_ascend = true, mshadow::Tensor<gpu, 1, char>* workspace = NULL,
-                      const int begin_bit = 0, const int end_bit = sizeof(KDType)*8);
-/*!
- * \brief CPU/GPU: Return the amount of temporary storage in bytes required for SortByKey
- * \param num_keys number of keys to sort
- */
-template <typename KDType, typename VDType, typename xpu>
-inline typename std::enable_if<std::is_same<xpu, gpu>::value, size_t>::type
-SortByKeyWorkspaceSize(const size_t num_keys);
-
-}  // namespace op
-}  // namespace mxnet
-#ifdef __CUDACC__
-#include "./sort_op-inl.cuh"
-#endif
-#endif  // MXNET_OPERATOR_TENSOR_SORT_OP_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file sort_op.h
+ * \brief SortByKey function
+ */
+#ifndef MXNET_OPERATOR_TENSOR_SORT_OP_H_
+#define MXNET_OPERATOR_TENSOR_SORT_OP_H_
+
+#include <dmlc/logging.h>
+#include <mshadow/tensor.h>
+#include <vector>
+#include <type_traits>
+
+namespace mxnet {
+namespace op {
+/*!
+ * \brief CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!)
+ * \param keys the keys to sort
+ * \param values the values that sorts w.r.t the key
+ * \param is_ascend whether to sort key in ascending order
+ */
+template<typename KDType, typename VDType>
+inline void SortByKey(mshadow::Tensor<cpu, 1, KDType> keys, mshadow::Tensor<cpu, 1, VDType> values,
+                      bool is_ascend = true, mshadow::Tensor<cpu, 1, char>* workspace = NULL,
+                      const int begin_bit = 0, const int end_bit = sizeof(KDType)*8) {
+  CHECK_EQ(keys.CheckContiguous(), true);
+  CHECK_EQ(values.CheckContiguous(), true);
+  CHECK_EQ(keys.size(0), values.size(0))
+    << "The sizes of key/value are not equal! keys_size: " << keys.size(0)
+    << "values_size: " << values.size(0);
+  std::vector<size_t> idx(keys.size(0));
+  std::vector<KDType> keys_vec(keys.size(0));
+  std::vector<VDType> values_vec(values.size(0));
+  for (index_t i = 0; i < keys.size(0); i++) {
+    idx[i] = i;
+    keys_vec[i] = keys[i];
+    values_vec[i] = values[i];
+  }
+  if (is_ascend) {
+    std::stable_sort(idx.begin(), idx.end(),
+                     [&keys_vec](size_t i1, size_t i2)
+                       {return keys_vec[i1] < keys_vec[i2]; });
+  } else {
+    std::stable_sort(idx.begin(), idx.end(),
+                     [&keys_vec](size_t i1, size_t i2)
+                       {return keys_vec[i1] > keys_vec[i2]; });
+  }
+  for (index_t i = 0; i < values.size(0); i++) {
+    keys[i] = keys_vec[idx[i]];
+    values[i] = values_vec[idx[i]];
+  }
+}
+
+/*!
+ * \brief CPU/GPU: Return the amount of temporary storage in bytes required for SortByKey
+ * \param num_keys number of keys to sort
+ */
+template <typename KDType, typename VDType, typename xpu>
+inline typename std::enable_if<std::is_same<xpu, cpu>::value, size_t>::type
+SortByKeyWorkspaceSize(const size_t num_keys) {
+  return 0;
+}
+
+/*!
+ * \brief CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!)
+ * \param keys the keys to sort
+ * \param values the values that sorts w.r.t the key
+ * \param is_ascend whether to sort key in ascending order
+ */
+template<typename KDType, typename VDType>
+inline void SortByKey(mshadow::Tensor<gpu, 1, KDType> keys, mshadow::Tensor<gpu, 1, VDType> values,
+                      bool is_ascend = true, mshadow::Tensor<gpu, 1, char>* workspace = NULL,
+                      const int begin_bit = 0, const int end_bit = sizeof(KDType)*8);
+/*!
+ * \brief CPU/GPU: Return the amount of temporary storage in bytes required for SortByKey
+ * \param num_keys number of keys to sort
+ */
+template <typename KDType, typename VDType, typename xpu>
+inline typename std::enable_if<std::is_same<xpu, gpu>::value, size_t>::type
+SortByKeyWorkspaceSize(const size_t num_keys);
+
+}  // namespace op
+}  // namespace mxnet
+#ifdef __CUDACC__
+#include "./sort_op-inl.cuh"
+#endif
+#endif  // MXNET_OPERATOR_TENSOR_SORT_OP_H_
diff --git a/src/operator/upsampling-inl.h b/src/operator/upsampling-inl.h
index a10ccb1..fec0f74 100644
--- a/src/operator/upsampling-inl.h
+++ b/src/operator/upsampling-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file upsampling-inl.h
  * \brief
  * \author Bing Xu
diff --git a/src/operator/upsampling.cc b/src/operator/upsampling.cc
index ad89d4a..653b570 100644
--- a/src/operator/upsampling.cc
+++ b/src/operator/upsampling.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file upsampling_nearest.cc
  * \brief
  * \author Bing Xu
diff --git a/src/operator/upsampling.cu b/src/operator/upsampling.cu
index 70466d4..8152535 100644
--- a/src/operator/upsampling.cu
+++ b/src/operator/upsampling.cu
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file upsampling_nearest.cc
  * \brief
  * \author Bing Xu
diff --git a/src/optimizer/sgd-inl.h b/src/optimizer/sgd-inl.h
index 36b45c3..01a330b 100644
--- a/src/optimizer/sgd-inl.h
+++ b/src/optimizer/sgd-inl.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file sgd-inl.h
  * \brief Operator interface of mxnet.
  * \author Junyuan Xie
diff --git a/src/resource.cc b/src/resource.cc
index 60e40d1..4c2dbee 100644
--- a/src/resource.cc
+++ b/src/resource.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file resource.cc
  * \brief Implementation of resource manager.
  */
diff --git a/src/storage/cpu_device_storage.h b/src/storage/cpu_device_storage.h
index 2afb658..ead00da 100644
--- a/src/storage/cpu_device_storage.h
+++ b/src/storage/cpu_device_storage.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cpu_device_storage.h
  * \brief CPU storage implementation.
  */
diff --git a/src/storage/gpu_device_storage.h b/src/storage/gpu_device_storage.h
index 1068490..3c4f732 100644
--- a/src/storage/gpu_device_storage.h
+++ b/src/storage/gpu_device_storage.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file gpu_device_storage.h
  * \brief GPU storage implementation.
  */
diff --git a/src/storage/naive_storage_manager.h b/src/storage/naive_storage_manager.h
index 05a8b10..731f374 100644
--- a/src/storage/naive_storage_manager.h
+++ b/src/storage/naive_storage_manager.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file naive_storage_manager.h
  * \brief Naive storage manager.
  */
diff --git a/src/storage/pinned_memory_storage.h b/src/storage/pinned_memory_storage.h
index 5b0df10..69e05f7 100644
--- a/src/storage/pinned_memory_storage.h
+++ b/src/storage/pinned_memory_storage.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file cpu_device_storage.h
  * \brief CPU storage with pinned memory
  */
diff --git a/src/storage/pooled_storage_manager.h b/src/storage/pooled_storage_manager.h
index 5e0050c..b2c6633 100644
--- a/src/storage/pooled_storage_manager.h
+++ b/src/storage/pooled_storage_manager.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file pooled_storage_manager.h
  * \brief Storage manager with a memory pool.
  */
diff --git a/src/storage/storage.cc b/src/storage/storage.cc
index 353d72d..fa15a44 100644
--- a/src/storage/storage.cc
+++ b/src/storage/storage.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  */
 #include <mxnet/storage.h>
 #include <mshadow/tensor.h>
diff --git a/src/storage/storage_manager.h b/src/storage/storage_manager.h
index de08688..924d2ed 100644
--- a/src/storage/storage_manager.h
+++ b/src/storage/storage_manager.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2015 by Contributors
  * \file storage_manager.h
  * \brief Storage manager.
  */
diff --git a/tests/ci_build/ci_build.sh b/tests/ci_build/ci_build.sh
index fa2d37e..79fcd86 100755
--- a/tests/ci_build/ci_build.sh
+++ b/tests/ci_build/ci_build.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 #
 # Execute command within a docker container
 #
diff --git a/tests/ci_build/install/install_julia.sh b/tests/ci_build/install/install_julia.sh
index 80232a1..5007c94 100644
--- a/tests/ci_build/install/install_julia.sh
+++ b/tests/ci_build/install/install_julia.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 wget https://julialang.s3.amazonaws.com/bin/linux/x64/0.5/julia-0.5.0-linux-x86_64.tar.gz
diff --git a/tests/ci_build/install/install_library.sh b/tests/ci_build/install/install_library.sh
index d65ab21..399f6a0 100644
--- a/tests/ci_build/install/install_library.sh
+++ b/tests/ci_build/install/install_library.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 yum install graphviz
 pip install graphviz
 pip install opencv-python
diff --git a/tests/ci_build/install/install_maven.sh b/tests/ci_build/install/install_maven.sh
index 66459be..666ebde 100644
--- a/tests/ci_build/install/install_maven.sh
+++ b/tests/ci_build/install/install_maven.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 wget http://mirrors.ocf.berkeley.edu/apache/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
diff --git a/tests/ci_build/install/install_openblas.sh b/tests/ci_build/install/install_openblas.sh
index 3ac6421..2ec5eeb 100644
--- a/tests/ci_build/install/install_openblas.sh
+++ b/tests/ci_build/install/install_openblas.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 git clone https://github.com/xianyi/OpenBLAS
diff --git a/tests/ci_build/install/install_opencv.sh b/tests/ci_build/install/install_opencv.sh
index 70c7ddb..08a4d9b 100644
--- a/tests/ci_build/install/install_opencv.sh
+++ b/tests/ci_build/install/install_opencv.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 yum groupinstall -y "Development Tools"
diff --git a/tests/ci_build/install/install_python2.sh b/tests/ci_build/install/install_python2.sh
index c818c5d..ec4bbb9 100644
--- a/tests/ci_build/install/install_python2.sh
+++ b/tests/ci_build/install/install_python2.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 yum groupinstall -y "Development Tools"
diff --git a/tests/ci_build/install/install_python3.sh b/tests/ci_build/install/install_python3.sh
index 5aa1d80..ee89161 100644
--- a/tests/ci_build/install/install_python3.sh
+++ b/tests/ci_build/install/install_python3.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 wget https://bootstrap.pypa.io/get-pip.py || exit 1
diff --git a/tests/ci_build/install/install_testdeps.sh b/tests/ci_build/install/install_testdeps.sh
index 975eec4..c777348 100644
--- a/tests/ci_build/install/install_testdeps.sh
+++ b/tests/ci_build/install/install_testdeps.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 pip install cpplint 'pylint==1.4.4' 'astroid==1.3.6'
diff --git a/tests/ci_build/install/ubuntu_install_core.sh b/tests/ci_build/install/ubuntu_install_core.sh
index 9ee7697..4947574 100755
--- a/tests/ci_build/install/ubuntu_install_core.sh
+++ b/tests/ci_build/install/ubuntu_install_core.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for building mxnet c++ core on ubuntu
 
 apt-get update && apt-get install -y \
diff --git a/tests/ci_build/install/ubuntu_install_perl.sh b/tests/ci_build/install/ubuntu_install_perl.sh
index da4df67..a981746 100755
--- a/tests/ci_build/install/ubuntu_install_perl.sh
+++ b/tests/ci_build/install/ubuntu_install_perl.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's perl package on ubuntu
 apt-get update && apt-get install -y libmouse-perl pdl cpanminus swig libgraphviz-perl
 cpanm -q Function::Parameters
diff --git a/tests/ci_build/install/ubuntu_install_python.sh b/tests/ci_build/install/ubuntu_install_python.sh
index 973523d..bb67e34 100755
--- a/tests/ci_build/install/ubuntu_install_python.sh
+++ b/tests/ci_build/install/ubuntu_install_python.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's python package on ubuntu
 
 apt-get update && apt-get install -y python-dev python3-dev
diff --git a/tests/ci_build/install/ubuntu_install_r.sh b/tests/ci_build/install/ubuntu_install_r.sh
index 10851a6..38d89a3 100755
--- a/tests/ci_build/install/ubuntu_install_r.sh
+++ b/tests/ci_build/install/ubuntu_install_r.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's r package on ubuntu
 
 echo "deb http://cran.rstudio.com/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list
diff --git a/tests/ci_build/install/ubuntu_install_scala.sh b/tests/ci_build/install/ubuntu_install_scala.sh
index dcdd4bc..712eff9 100755
--- a/tests/ci_build/install/ubuntu_install_scala.sh
+++ b/tests/ci_build/install/ubuntu_install_scala.sh
@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # install libraries for mxnet's scala package on ubuntu
 
 apt-get update && apt-get install -y \
diff --git a/tests/cpp/engine/threaded_engine_test.cc b/tests/cpp/engine/threaded_engine_test.cc
index 73dc530..58b7e57 100644
--- a/tests/cpp/engine/threaded_engine_test.cc
+++ b/tests/cpp/engine/threaded_engine_test.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file threaded_engine_test.cc
  * \brief threaded engine tests
 */
diff --git a/tests/cpp/include/test_op.h b/tests/cpp/include/test_op.h
index 57fda19..d8f90df 100644
--- a/tests/cpp/include/test_op.h
+++ b/tests/cpp/include/test_op.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file test_op.h
  * \brief operator unit test utility functions
  * \author Chris Olivier
diff --git a/tests/cpp/include/test_perf.h b/tests/cpp/include/test_perf.h
index 93b7863..d74d4d5 100644
--- a/tests/cpp/include/test_perf.h
+++ b/tests/cpp/include/test_perf.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file test_perf.h
  * \brief operator unit test utility functions
  * \author Chris Olivier
diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h
index 3fa8268..3f5f4ec 100644
--- a/tests/cpp/include/test_util.h
+++ b/tests/cpp/include/test_util.h
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file test_util.h
  * \brief unit test performance analysis functions
  * \author Chris Olivier
diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc
index 719980b..3fef28f 100644
--- a/tests/cpp/operator/batchnorm_test.cc
+++ b/tests/cpp/operator/batchnorm_test.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file batchnorm_test.cc
  * \brief operator unit test utility functions
  * \author Chris Olivier
diff --git a/tests/cpp/operator/krprod_test.cc b/tests/cpp/operator/krprod_test.cc
index 6e10221..31b8ab9 100644
--- a/tests/cpp/operator/krprod_test.cc
+++ b/tests/cpp/operator/krprod_test.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2017 by Contributors
  *  \file krprod_test.cc
  *  \brief Test Khatri-Rao product
  *  \author Jencir Lee
diff --git a/tests/cpp/storage/storage_test.cc b/tests/cpp/storage/storage_test.cc
index b2bbc49..8af3984 100644
--- a/tests/cpp/storage/storage_test.cc
+++ b/tests/cpp/storage/storage_test.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file storage_test.cc
  * \brief cpu/gpu storage tests
 */
diff --git a/tests/cpp/test_main.cc b/tests/cpp/test_main.cc
index 28cdf7b..b8ffbbd 100644
--- a/tests/cpp/test_main.cc
+++ b/tests/cpp/test_main.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- * Copyright (c) 2017 by Contributors
  * \file test_main.cc
  * \brief operator unit test utility functions
  * \author Chris Olivier
diff --git a/tests/jenkins/run_as_user.sh b/tests/jenkins/run_as_user.sh
index db90f0b..7ed3cdf 100755
--- a/tests/jenkins/run_as_user.sh
+++ b/tests/jenkins/run_as_user.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # Exit script with error if any errors occur
 set -e
 
diff --git a/tests/jenkins/run_test.sh b/tests/jenkins/run_test.sh
index a856432..bc69ca1 100755
--- a/tests/jenkins/run_test.sh
+++ b/tests/jenkins/run_test.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # Exit script with error if any errors occur
 
 echo "BUILD make"
diff --git a/tests/jenkins/run_test_amzn_linux_gpu.sh b/tests/jenkins/run_test_amzn_linux_gpu.sh
index 42c037e..ecfb521 100755
--- a/tests/jenkins/run_test_amzn_linux_gpu.sh
+++ b/tests/jenkins/run_test_amzn_linux_gpu.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # Exit script with error if any errors occur
 
 echo "BUILD make"
diff --git a/tests/jenkins/run_test_installation_docs.sh b/tests/jenkins/run_test_installation_docs.sh
index 921c89a..90eecb9 100755
--- a/tests/jenkins/run_test_installation_docs.sh
+++ b/tests/jenkins/run_test_installation_docs.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 # Given an array of numbers, removes any numbers of it that fall outside a given range.
@@ -30,9 +48,9 @@ function remove_out_of_range() {
         echo "Error: Min must be less than or equal to Max"
         exit 1
     fi
-   
+
     return_arr=()
- 
+
     for number in "${lineno_array[@]}"
     do
         if (( ${number} > ${min} && ${number} < ${max} ))
diff --git a/tests/jenkins/run_test_pip_installations.sh b/tests/jenkins/run_test_pip_installations.sh
index de235a0..44788bf 100755
--- a/tests/jenkins/run_test_pip_installations.sh
+++ b/tests/jenkins/run_test_pip_installations.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 if (( $# < 1 )); then
diff --git a/tests/jenkins/run_test_ubuntu.sh b/tests/jenkins/run_test_ubuntu.sh
index 2e458b5..cdddd28 100755
--- a/tests/jenkins/run_test_ubuntu.sh
+++ b/tests/jenkins/run_test_ubuntu.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 set -e
 
 echo "BUILD make"
diff --git a/tests/jenkins/set_user_permissions.sh b/tests/jenkins/set_user_permissions.sh
index d03a97b..51034c4 100644
--- a/tests/jenkins/set_user_permissions.sh
+++ b/tests/jenkins/set_user_permissions.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # Exit script with error if any errors occur
 set -e
 
diff --git a/tests/nightly/TestDoc/doc_spell_checker.py b/tests/nightly/TestDoc/doc_spell_checker.py
index 20a5c07..a7b8b25 100644
--- a/tests/nightly/TestDoc/doc_spell_checker.py
+++ b/tests/nightly/TestDoc/doc_spell_checker.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 #pylint: disable=no-member, too-many-instance-attributes
 """This script uses pyenchant to check spelling for MXNet
     documentation website.
diff --git a/tests/nightly/TestDoc/doc_spell_grammar.sh b/tests/nightly/TestDoc/doc_spell_grammar.sh
index 05b0be1..77c7b86 100755
--- a/tests/nightly/TestDoc/doc_spell_grammar.sh
+++ b/tests/nightly/TestDoc/doc_spell_grammar.sh
@@ -1,4 +1,22 @@
 #!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 echo "BUILD make"
 cp ./make/config.mk .
 echo "USE_CUDA=0" >> ./config.mk
diff --git a/tests/nightly/compilation_warnings/compilation_warnings.sh b/tests/nightly/compilation_warnings/compilation_warnings.sh
index 871d849..a6c4863 100644
--- a/tests/nightly/compilation_warnings/compilation_warnings.sh
+++ b/tests/nightly/compilation_warnings/compilation_warnings.sh
@@ -1,4 +1,22 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 set -e
 runme() {
 	cmd=$*
diff --git a/tests/nightly/compilation_warnings/process_output.py b/tests/nightly/compilation_warnings/process_output.py
index 622598d..5f85af5 100644
--- a/tests/nightly/compilation_warnings/process_output.py
+++ b/tests/nightly/compilation_warnings/process_output.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import re
 import sys
 import operator
diff --git a/tests/nightly/dist_lenet.py b/tests/nightly/dist_lenet.py
index a7ae84c..35f55c8 100644
--- a/tests/nightly/dist_lenet.py
+++ b/tests/nightly/dist_lenet.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # distributed lenet
 import os, sys
 curr_path = os.path.abspath(os.path.dirname(__file__))
diff --git a/tests/nightly/dist_sync_kvstore.py b/tests/nightly/dist_sync_kvstore.py
index ebed6c5..3fbf9f9 100644
--- a/tests/nightly/dist_sync_kvstore.py
+++ b/tests/nightly/dist_sync_kvstore.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import sys
 sys.path.insert(0, "../../python/")
diff --git a/tests/nightly/download.sh b/tests/nightly/download.sh
index 56f822e..d07fc6f 100644
--- a/tests/nightly/download.sh
+++ b/tests/nightly/download.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 dmlc_download() {
     url=http://data.mxnet.io/mxnet/datasets/
     dir=$1
diff --git a/tests/nightly/multi_lenet.py b/tests/nightly/multi_lenet.py
index 1fb2dfa..687588b 100644
--- a/tests/nightly/multi_lenet.py
+++ b/tests/nightly/multi_lenet.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # lenet with multiple gpus
 #
 # using different kvstore will get almost identical results
diff --git a/tests/nightly/mxnet_keras_integration_tests/assertion_util.py b/tests/nightly/mxnet_keras_integration_tests/assertion_util.py
index 1fad6a1..eb3d3bd 100644
--- a/tests/nightly/mxnet_keras_integration_tests/assertion_util.py
+++ b/tests/nightly/mxnet_keras_integration_tests/assertion_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 
 from nose.tools import assert_true
 
diff --git a/tests/nightly/mxnet_keras_integration_tests/model_util.py b/tests/nightly/mxnet_keras_integration_tests/model_util.py
index 9f73ab6..bb9d637 100644
--- a/tests/nightly/mxnet_keras_integration_tests/model_util.py
+++ b/tests/nightly/mxnet_keras_integration_tests/model_util.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 from keras import backend as K
 from keras.models import Model
diff --git a/tests/nightly/mxnet_keras_integration_tests/profiler.py b/tests/nightly/mxnet_keras_integration_tests/profiler.py
index 4b6446a..b0d39e1 100644
--- a/tests/nightly/mxnet_keras_integration_tests/profiler.py
+++ b/tests/nightly/mxnet_keras_integration_tests/profiler.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import signal
 import time
diff --git a/tests/nightly/mxnet_keras_integration_tests/test_mnist_mlp.py b/tests/nightly/mxnet_keras_integration_tests/test_mnist_mlp.py
index 7a0c629..89bd280 100644
--- a/tests/nightly/mxnet_keras_integration_tests/test_mnist_mlp.py
+++ b/tests/nightly/mxnet_keras_integration_tests/test_mnist_mlp.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 '''
 This code is forked from https://github.com/fchollet/keras/blob/master/examples/mnist_mlp.py
 and modified to use as MXNet-Keras integration testing for functionality and sanity performance
diff --git a/tests/nightly/sh2ju.sh b/tests/nightly/sh2ju.sh
index 84c1427..4465cd8 100644
--- a/tests/nightly/sh2ju.sh
+++ b/tests/nightly/sh2ju.sh
@@ -1,4 +1,22 @@
 #!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ### Copyright 2010 Manuel Carrasco Moñino. (manolo at apache.org)
 ###
 ### Licensed under the Apache License, Version 2.0.
diff --git a/tests/nightly/test_all.sh b/tests/nightly/test_all.sh
index 33c39f5..32913c9 100755
--- a/tests/nightly/test_all.sh
+++ b/tests/nightly/test_all.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # setup
 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 cd `pwd`/`dirname $0`
diff --git a/tests/nightly/test_kvstore.py b/tests/nightly/test_kvstore.py
index c954c18..b39ec89 100644
--- a/tests/nightly/test_kvstore.py
+++ b/tests/nightly/test_kvstore.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 sys.path.insert(0, "../../python/")
 import mxnet as mx
diff --git a/tests/nightly/test_mxnet_keras_integration_cpu.sh b/tests/nightly/test_mxnet_keras_integration_cpu.sh
index 25a1da4..95cc0d0 100755
--- a/tests/nightly/test_mxnet_keras_integration_cpu.sh
+++ b/tests/nightly/test_mxnet_keras_integration_cpu.sh
@@ -1,4 +1,22 @@
 #!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 set -e
 ### Build MXNet with CPU support
 echo "BUILD make"
diff --git a/tests/nightly/test_mxnet_keras_integration_gpu.sh b/tests/nightly/test_mxnet_keras_integration_gpu.sh
index 86fb37a..5d541fa 100755
--- a/tests/nightly/test_mxnet_keras_integration_gpu.sh
+++ b/tests/nightly/test_mxnet_keras_integration_gpu.sh
@@ -1,4 +1,22 @@
 #!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 set -e
 
 ### Install git
diff --git a/tests/nightly/test_tutorial.py b/tests/nightly/test_tutorial.py
index 304642c..56b530a 100644
--- a/tests/nightly/test_tutorial.py
+++ b/tests/nightly/test_tutorial.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 #pylint: disable=no-member, too-many-locals, too-many-branches, no-self-use, broad-except, lost-exception, too-many-nested-blocks, too-few-public-methods, invalid-name
 """
     This script converts all python tutorials into python script
diff --git a/tests/python/common/get_data.py b/tests/python/common/get_data.py
index e385a71..35482f8 100644
--- a/tests/python/common/get_data.py
+++ b/tests/python/common/get_data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import os, gzip
 import pickle as pickle
diff --git a/tests/python/common/models.py b/tests/python/common/models.py
index 2c998af..b563adc 100644
--- a/tests/python/common/models.py
+++ b/tests/python/common/models.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """This file defines various models used in the test"""
 import mxnet as mx
 
diff --git a/tests/python/doctest/test_docstring.py b/tests/python/doctest/test_docstring.py
index e457e7b..23a2958 100644
--- a/tests/python/doctest/test_docstring.py
+++ b/tests/python/doctest/test_docstring.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import doctest
 import logging
 import mxnet
diff --git a/tests/python/gpu/test_forward.py b/tests/python/gpu/test_forward.py
index dc2c129..cddf9af 100644
--- a/tests/python/gpu/test_forward.py
+++ b/tests/python/gpu/test_forward.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import numpy as np
 import mxnet as mx
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index 6fef4e2..c80b9e3 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 import os
 import time
diff --git a/tests/python/gpu/test_rtc.py b/tests/python/gpu/test_rtc.py
index d38f038..756c3d7 100644
--- a/tests/python/gpu/test_rtc.py
+++ b/tests/python/gpu/test_rtc.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 import numpy as np
@@ -13,4 +30,4 @@ if __name__ == '__main__':
         s_rec[threadIdx.x] = x[threadIdx.x];
         y[threadIdx.x] = expf(s_rec[threadIdx.x]*5.0);""")
     rtc.push([x], [y], (1, 1, 1), (10,1,1))
-    assert_allclose(y.asnumpy(), np.exp(x.asnumpy()*5.0))
\ No newline at end of file
+    assert_allclose(y.asnumpy(), np.exp(x.asnumpy()*5.0))
diff --git a/tests/python/predict/mxnet_predict_example.py b/tests/python/predict/mxnet_predict_example.py
index 745a1f8..1db3f5c 100644
--- a/tests/python/predict/mxnet_predict_example.py
+++ b/tests/python/predict/mxnet_predict_example.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys, os
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 sys.path.append("../../../amalgamation/python/")
diff --git a/tests/python/train/common.py b/tests/python/train/common.py
index 1622e02..38718fa 100644
--- a/tests/python/train/common.py
+++ b/tests/python/train/common.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys, os
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 sys.path.append(os.path.join(curr_path, '../common/'))
diff --git a/tests/python/train/test_autograd.py b/tests/python/train/test_autograd.py
index 8d67bfb..c9921ec 100644
--- a/tests/python/train/test_autograd.py
+++ b/tests/python/train/test_autograd.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from __future__ import print_function
 
diff --git a/tests/python/train/test_bucketing.py b/tests/python/train/test_bucketing.py
index 85ea107..1303db0 100644
--- a/tests/python/train/test_bucketing.py
+++ b/tests/python/train/test_bucketing.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import numpy as np
 import mxnet as mx
diff --git a/tests/python/train/test_conv.py b/tests/python/train/test_conv.py
index 039790e..46e0684 100644
--- a/tests/python/train/test_conv.py
+++ b/tests/python/train/test_conv.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import sys
 sys.path.insert(0, '../../python')
diff --git a/tests/python/train/test_dtype.py b/tests/python/train/test_dtype.py
index 3371f4b..b0a5248 100644
--- a/tests/python/train/test_dtype.py
+++ b/tests/python/train/test_dtype.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import sys
 sys.path.insert(0, '../../python')
@@ -173,6 +190,6 @@ def test_cifar10():
     (train, val) = get_iterator_uint8(kv)
     run_cifar10(train, val, use_module=False)
     run_cifar10(train, val, use_module=True)
-    
+
 if __name__ == "__main__":
     test_cifar10()
diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py
index c983b6e..a0a45b4 100644
--- a/tests/python/train/test_mlp.py
+++ b/tests/python/train/test_mlp.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 import numpy as np
diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py
index 38b4bd6..12ed60d 100644
--- a/tests/python/unittest/common.py
+++ b/tests/python/unittest/common.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys, os
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 sys.path.append(os.path.join(curr_path, '../common/'))
diff --git a/tests/python/unittest/test_attr.py b/tests/python/unittest/test_attr.py
index 4cdecaf..0d7e67d 100644
--- a/tests/python/unittest/test_attr.py
+++ b/tests/python/unittest/test_attr.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import mxnet as mx
 from common import models
diff --git a/tests/python/unittest/test_autograd.py b/tests/python/unittest/test_autograd.py
index 7ee3500..5be3d35 100644
--- a/tests/python/unittest/test_autograd.py
+++ b/tests/python/unittest/test_autograd.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import functools
 import mxnet.ndarray as nd
 from mxnet.ndarray import zeros_like
diff --git a/tests/python/unittest/test_contrib_autograd.py b/tests/python/unittest/test_contrib_autograd.py
index e7b0ce3..a144c34 100644
--- a/tests/python/unittest/test_contrib_autograd.py
+++ b/tests/python/unittest/test_contrib_autograd.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet.ndarray as nd
 from mxnet.contrib.autograd import *
 from mxnet.test_utils import *
diff --git a/tests/python/unittest/test_executor.py b/tests/python/unittest/test_executor.py
index c1cc013..e3d977d 100644
--- a/tests/python/unittest/test_executor.py
+++ b/tests/python/unittest/test_executor.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import mxnet as mx
 
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index 8256c71..4fff23f 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from mxnet import gluon
 from mxnet.gluon import nn
diff --git a/tests/python/unittest/test_gluon_data.py b/tests/python/unittest/test_gluon_data.py
index 2407f8e..da1de6b 100644
--- a/tests/python/unittest/test_gluon_data.py
+++ b/tests/python/unittest/test_gluon_data.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import mxnet as mx
 import numpy as np
diff --git a/tests/python/unittest/test_gluon_model_zoo.py b/tests/python/unittest/test_gluon_model_zoo.py
index db26fd4..6fbcf8b 100644
--- a/tests/python/unittest/test_gluon_model_zoo.py
+++ b/tests/python/unittest/test_gluon_model_zoo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 from mxnet.gluon import nn
diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py
index c5c9414..ac671e5 100644
--- a/tests/python/unittest/test_gluon_rnn.py
+++ b/tests/python/unittest/test_gluon_rnn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 from mxnet import gluon
 import numpy as np
diff --git a/tests/python/unittest/test_image.py b/tests/python/unittest/test_image.py
index 638dbf0..04b878d 100644
--- a/tests/python/unittest/test_image.py
+++ b/tests/python/unittest/test_image.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 from mxnet.test_utils import *
diff --git a/tests/python/unittest/test_infer_shape.py b/tests/python/unittest/test_infer_shape.py
index 35598bc..d7f52e2 100644
--- a/tests/python/unittest/test_infer_shape.py
+++ b/tests/python/unittest/test_infer_shape.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 from common import models
diff --git a/tests/python/unittest/test_init.py b/tests/python/unittest/test_init.py
index 7986226..e642e65 100644
--- a/tests/python/unittest/test_init.py
+++ b/tests/python/unittest/test_init.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 
diff --git a/tests/python/unittest/test_io.py b/tests/python/unittest/test_io.py
index 1832675..c0f2acd 100644
--- a/tests/python/unittest/test_io.py
+++ b/tests/python/unittest/test_io.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 import numpy as np
diff --git a/tests/python/unittest/test_kvstore.py b/tests/python/unittest/test_kvstore.py
index 87e5e00..f1e10c7 100644
--- a/tests/python/unittest/test_kvstore.py
+++ b/tests/python/unittest/test_kvstore.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import mxnet as mx
 import numpy as np
diff --git a/tests/python/unittest/test_loss.py b/tests/python/unittest/test_loss.py
index 7d4c586..8eced7b 100644
--- a/tests/python/unittest/test_loss.py
+++ b/tests/python/unittest/test_loss.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 from mxnet import gluon
diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py
index 54b58b2..7ae93bf 100644
--- a/tests/python/unittest/test_metric.py
+++ b/tests/python/unittest/test_metric.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import json
 
diff --git a/tests/python/unittest/test_model_parallel.py b/tests/python/unittest/test_model_parallel.py
index 96990e7..8ff09d5 100644
--- a/tests/python/unittest/test_model_parallel.py
+++ b/tests/python/unittest/test_model_parallel.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import mxnet as mx
 
diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py
index 766995d..f522f29 100644
--- a/tests/python/unittest/test_module.py
+++ b/tests/python/unittest/test_module.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import mxnet.ndarray as nd
 import numpy as np
diff --git a/tests/python/unittest/test_multi_device_exec.py b/tests/python/unittest/test_multi_device_exec.py
index 8956c4e..6f8eb17 100644
--- a/tests/python/unittest/test_multi_device_exec.py
+++ b/tests/python/unittest/test_multi_device_exec.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import mxnet as mx
 
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index 79a022b..eae364e 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import mxnet as mx
 import numpy as np
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 62a064a..718e3df 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 from __future__ import print_function
 import numpy as np
diff --git a/tests/python/unittest/test_optimizer.py b/tests/python/unittest/test_optimizer.py
index cf7b82e..3b3b92b 100644
--- a/tests/python/unittest/test_optimizer.py
+++ b/tests/python/unittest/test_optimizer.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import mxnet as mx
 import math
diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py
index 9a0deab..724ed3a 100644
--- a/tests/python/unittest/test_profiler.py
+++ b/tests/python/unittest/test_profiler.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from __future__ import print_function
 import mxnet as mx
 from mxnet import profiler
diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py
index 1f88b6b..6b8311c 100644
--- a/tests/python/unittest/test_random.py
+++ b/tests/python/unittest/test_random.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import mxnet as mx
 import numpy as np
diff --git a/tests/python/unittest/test_recordio.py b/tests/python/unittest/test_recordio.py
index f4489bd..7de582e 100644
--- a/tests/python/unittest/test_recordio.py
+++ b/tests/python/unittest/test_recordio.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: skip-file
 import sys
 import mxnet as mx
@@ -68,4 +85,4 @@ def test_recordio_pack_label():
 if __name__ == '__main__':
     test_recordio_pack_label()
     test_recordio()
-    test_indexed_recordio()
\ No newline at end of file
+    test_indexed_recordio()
diff --git a/tests/python/unittest/test_rnn.py b/tests/python/unittest/test_rnn.py
index e8176bb..9fe22ae 100644
--- a/tests/python/unittest/test_rnn.py
+++ b/tests/python/unittest/test_rnn.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 import numpy as np
 from numpy.testing import assert_allclose
diff --git a/tests/python/unittest/test_symbol.py b/tests/python/unittest/test_symbol.py
index ee9e9dc..c570325 100644
--- a/tests/python/unittest/test_symbol.py
+++ b/tests/python/unittest/test_symbol.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import copy
 import os
 import re
diff --git a/tests/python/unittest/test_viz.py b/tests/python/unittest/test_viz.py
index 79c8668..73cfa94 100644
--- a/tests/python/unittest/test_viz.py
+++ b/tests/python/unittest/test_viz.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import mxnet as mx
 
 def test_print_summary():
diff --git a/tests/travis/is_core_changed.sh b/tests/travis/is_core_changed.sh
index 1e32d60..7b9eb61 100755
--- a/tests/travis/is_core_changed.sh
+++ b/tests/travis/is_core_changed.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # this is a util script to test whether the "core" of
 # mxnet has changed. Please modify the regex patterns here
 # to ensure the components are covered if you add new "core"
diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh
index cff4196..fb1869f 100755
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 if ! tests/travis/is_core_changed.sh
 then
   exit 0
diff --git a/tests/travis/setup.sh b/tests/travis/setup.sh
index ec07100..94d674f 100755
--- a/tests/travis/setup.sh
+++ b/tests/travis/setup.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 if ! tests/travis/is_core_changed.sh
 then
   exit 0
diff --git a/tests/travis/travis_after_failure.sh b/tests/travis/travis_after_failure.sh
index 5a3940a..50754c9 100755
--- a/tests/travis/travis_after_failure.sh
+++ b/tests/travis/travis_after_failure.sh
@@ -1,5 +1,23 @@
 #!/bin/bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 if [ ${TASK} == "r_test" ]; then
     echo "Print the install log..."
     cat mxnet.Rcheck/*.out
diff --git a/tools/accnn/acc_conv.py b/tools/accnn/acc_conv.py
index 095e386..07717c7 100644
--- a/tools/accnn/acc_conv.py
+++ b/tools/accnn/acc_conv.py
@@ -1,77 +1,94 @@
-import numpy as np
-from scipy import linalg as LA
-import mxnet as mx
-import argparse
-import utils
-
-def conv_vh_decomposition(model, args):    
-  W = model.arg_params[args.layer+'_weight'].asnumpy()  
-  N, C, y, x = W.shape
-  b = model.arg_params[args.layer+'_bias'].asnumpy()  
-  W = W.transpose((1,2,0,3)).reshape((C*y, -1))
-
-  U, D, Q = np.linalg.svd(W, full_matrices=False)
-  sqrt_D = LA.sqrtm(np.diag(D))
-  K = args.K  
-  V = U[:,:K].dot(sqrt_D[:K, :K])
-  H = Q.T[:,:K].dot(sqrt_D[:K, :K])  
-  V = V.T.reshape(K, C, y, 1)
-  b_1 = np.zeros((K, ))
-  H = H.reshape(N, x, 1, K).transpose((0,3,2,1))
-  b_2 = b
-
-  W1, b1, W2, b2 = V, b_1, H, b_2  
-  def sym_handle(data, node):
-    kernel = eval(node['param']['kernel'])      
-    pad = eval(node['param']['pad'])            
-    name = node['name']
-
-    name1 = name + '_v'
-    kernel1 = tuple((kernel[0], 1))
-    pad1 = tuple((pad[0], 0))
-    num_filter = W1.shape[0]
-    sym1 = mx.symbol.Convolution(data=data, kernel=kernel1, pad=pad1, num_filter=num_filter, name=name1)
-
-    name2 = name + '_h'
-    kernel2 = tuple((1, kernel[1]))
-    pad2 = tuple((0, pad[1]))
-    num_filter = W2.shape[0]
-    sym2 = mx.symbol.Convolution(data=sym1, kernel=kernel2, pad=pad2, num_filter=num_filter, name=name2)  
-    return sym2
-
-  def arg_handle(arg_shape_dic, arg_params):
-    name1 = args.layer + '_v'
-    name2 = args.layer + '_h'    
-    weight1 = mx.ndarray.array(W1)
-    bias1 = mx.ndarray.array(b1)    
-    weight2 = mx.ndarray.array(W2)
-    bias2 = mx.ndarray.array(b2)    
-    assert weight1.shape == arg_shape_dic[name1+'_weight'], 'weight1'
-    assert weight2.shape == arg_shape_dic[name2+'_weight'], 'weight2'
-    assert bias1.shape == arg_shape_dic[name1+'_bias'], 'bias1'
-    assert bias2.shape == arg_shape_dic[name2+'_bias'], 'bias2'
-
-    arg_params[name1 + '_weight'] = weight1
-    arg_params[name1 + '_bias'] = bias1
-    arg_params[name2 + '_weight'] = weight2
-    arg_params[name2 + '_bias'] = bias2
-    
-  new_model = utils.replace_conv_layer(args.layer, model, sym_handle, arg_handle)
-  return new_model
-
-def main():
-  model = utils.load_model(args)  
-  new_model = conv_vh_decomposition(model, args)
-  new_model.save(args.save_model)
-
-if __name__ == '__main__':
-  parser=argparse.ArgumentParser()
-  parser.add_argument('-m', '--model', help='the model to speed up')
-  parser.add_argument('-g', '--gpus', default='0', help='the gpus to be used in ctx')
-  parser.add_argument('--load-epoch',type=int,default=1)
-  parser.add_argument('--layer')
-  parser.add_argument('--K', type=int)
-  parser.add_argument('--save-model')
-  args = parser.parse_args()
-  main()
-  
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import numpy as np
+from scipy import linalg as LA
+import mxnet as mx
+import argparse
+import utils
+
+def conv_vh_decomposition(model, args):
+  W = model.arg_params[args.layer+'_weight'].asnumpy()
+  N, C, y, x = W.shape
+  b = model.arg_params[args.layer+'_bias'].asnumpy()
+  W = W.transpose((1,2,0,3)).reshape((C*y, -1))
+
+  U, D, Q = np.linalg.svd(W, full_matrices=False)
+  sqrt_D = LA.sqrtm(np.diag(D))
+  K = args.K
+  V = U[:,:K].dot(sqrt_D[:K, :K])
+  H = Q.T[:,:K].dot(sqrt_D[:K, :K])
+  V = V.T.reshape(K, C, y, 1)
+  b_1 = np.zeros((K, ))
+  H = H.reshape(N, x, 1, K).transpose((0,3,2,1))
+  b_2 = b
+
+  W1, b1, W2, b2 = V, b_1, H, b_2
+  def sym_handle(data, node):
+    kernel = eval(node['param']['kernel'])
+    pad = eval(node['param']['pad'])
+    name = node['name']
+
+    name1 = name + '_v'
+    kernel1 = tuple((kernel[0], 1))
+    pad1 = tuple((pad[0], 0))
+    num_filter = W1.shape[0]
+    sym1 = mx.symbol.Convolution(data=data, kernel=kernel1, pad=pad1, num_filter=num_filter, name=name1)
+
+    name2 = name + '_h'
+    kernel2 = tuple((1, kernel[1]))
+    pad2 = tuple((0, pad[1]))
+    num_filter = W2.shape[0]
+    sym2 = mx.symbol.Convolution(data=sym1, kernel=kernel2, pad=pad2, num_filter=num_filter, name=name2)
+    return sym2
+
+  def arg_handle(arg_shape_dic, arg_params):
+    name1 = args.layer + '_v'
+    name2 = args.layer + '_h'
+    weight1 = mx.ndarray.array(W1)
+    bias1 = mx.ndarray.array(b1)
+    weight2 = mx.ndarray.array(W2)
+    bias2 = mx.ndarray.array(b2)
+    assert weight1.shape == arg_shape_dic[name1+'_weight'], 'weight1'
+    assert weight2.shape == arg_shape_dic[name2+'_weight'], 'weight2'
+    assert bias1.shape == arg_shape_dic[name1+'_bias'], 'bias1'
+    assert bias2.shape == arg_shape_dic[name2+'_bias'], 'bias2'
+
+    arg_params[name1 + '_weight'] = weight1
+    arg_params[name1 + '_bias'] = bias1
+    arg_params[name2 + '_weight'] = weight2
+    arg_params[name2 + '_bias'] = bias2
+
+  new_model = utils.replace_conv_layer(args.layer, model, sym_handle, arg_handle)
+  return new_model
+
+def main():
+  model = utils.load_model(args)
+  new_model = conv_vh_decomposition(model, args)
+  new_model.save(args.save_model)
+
+if __name__ == '__main__':
+  parser=argparse.ArgumentParser()
+  parser.add_argument('-m', '--model', help='the model to speed up')
+  parser.add_argument('-g', '--gpus', default='0', help='the gpus to be used in ctx')
+  parser.add_argument('--load-epoch',type=int,default=1)
+  parser.add_argument('--layer')
+  parser.add_argument('--K', type=int)
+  parser.add_argument('--save-model')
+  args = parser.parse_args()
+  main()
+
diff --git a/tools/accnn/acc_fc.py b/tools/accnn/acc_fc.py
index dcc2554..b66b328 100644
--- a/tools/accnn/acc_fc.py
+++ b/tools/accnn/acc_fc.py
@@ -1,57 +1,74 @@
-import numpy as np
-from scipy import linalg as LA
-import mxnet as mx
-import argparse
-import utils
-import pdb
-
-def fc_decomposition(model, args):
-  W = model.arg_params[args.layer+'_weight'].asnumpy()
-  b = model.arg_params[args.layer+'_bias'].asnumpy()
-  W = W.reshape((W.shape[0],-1))
-  b = b.reshape((b.shape[0],-1))  
-  u, s, v = LA.svd(W, full_matrices=False)
-  s = np.diag(s)
-  t = u.dot(s.dot(v))    
-  rk = args.K
-  P = u[:,:rk]
-  Q = s[:rk,:rk].dot(v[:rk,:])
-
-  name1 = args.layer + '_red'
-  name2 = args.layer + '_rec'
-  def sym_handle(data, node):
-    W1, W2 = Q, P
-    sym1 = mx.symbol.FullyConnected(data=data, num_hidden=W1.shape[0], no_bias=True,  name=name1)
-    sym2 = mx.symbol.FullyConnected(data=sym1, num_hidden=W2.shape[0], no_bias=False, name=name2)
-    return sym2
-
-  def arg_handle(arg_shape_dic, arg_params):    
-    W1, W2 = Q, P
-    W1 = W1.reshape(arg_shape_dic[name1+'_weight'])
-    weight1 = mx.ndarray.array(W1)      
-    W2 = W2.reshape(arg_shape_dic[name2+'_weight'])
-    b2 = b.reshape(arg_shape_dic[name2+'_bias'])
-    weight2 = mx.ndarray.array(W2)
-    bias2 = mx.ndarray.array(b2)
-    arg_params[name1 + '_weight'] = weight1
-    arg_params[name2 + '_weight'] = weight2
-    arg_params[name2 + '_bias'] = bias2
-
-  new_model = utils.replace_conv_layer(args.layer, model, sym_handle, arg_handle)
-  return new_model
-
-def main():
-  model = utils.load_model(args)  
-  new_model = fc_decomposition(model, args)
-  new_model.save(args.save_model)
-
-if __name__ == '__main__':
-  parser=argparse.ArgumentParser()
-  parser.add_argument('-m', '--model', help='the model to speed up')
-  parser.add_argument('-g', '--gpus', default='0', help='the gpus to be used in ctx')
-  parser.add_argument('--load-epoch',type=int,default=1)
-  parser.add_argument('--layer')
-  parser.add_argument('--K', type=int)
-  parser.add_argument('--save-model')
-  args = parser.parse_args()
-  main()
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import numpy as np
+from scipy import linalg as LA
+import mxnet as mx
+import argparse
+import utils
+import pdb
+
+def fc_decomposition(model, args):
+  W = model.arg_params[args.layer+'_weight'].asnumpy()
+  b = model.arg_params[args.layer+'_bias'].asnumpy()
+  W = W.reshape((W.shape[0],-1))
+  b = b.reshape((b.shape[0],-1))
+  u, s, v = LA.svd(W, full_matrices=False)
+  s = np.diag(s)
+  t = u.dot(s.dot(v))
+  rk = args.K
+  P = u[:,:rk]
+  Q = s[:rk,:rk].dot(v[:rk,:])
+
+  name1 = args.layer + '_red'
+  name2 = args.layer + '_rec'
+  def sym_handle(data, node):
+    W1, W2 = Q, P
+    sym1 = mx.symbol.FullyConnected(data=data, num_hidden=W1.shape[0], no_bias=True,  name=name1)
+    sym2 = mx.symbol.FullyConnected(data=sym1, num_hidden=W2.shape[0], no_bias=False, name=name2)
+    return sym2
+
+  def arg_handle(arg_shape_dic, arg_params):
+    W1, W2 = Q, P
+    W1 = W1.reshape(arg_shape_dic[name1+'_weight'])
+    weight1 = mx.ndarray.array(W1)
+    W2 = W2.reshape(arg_shape_dic[name2+'_weight'])
+    b2 = b.reshape(arg_shape_dic[name2+'_bias'])
+    weight2 = mx.ndarray.array(W2)
+    bias2 = mx.ndarray.array(b2)
+    arg_params[name1 + '_weight'] = weight1
+    arg_params[name2 + '_weight'] = weight2
+    arg_params[name2 + '_bias'] = bias2
+
+  new_model = utils.replace_conv_layer(args.layer, model, sym_handle, arg_handle)
+  return new_model
+
+def main():
+  model = utils.load_model(args)
+  new_model = fc_decomposition(model, args)
+  new_model.save(args.save_model)
+
+if __name__ == '__main__':
+  parser=argparse.ArgumentParser()
+  parser.add_argument('-m', '--model', help='the model to speed up')
+  parser.add_argument('-g', '--gpus', default='0', help='the gpus to be used in ctx')
+  parser.add_argument('--load-epoch',type=int,default=1)
+  parser.add_argument('--layer')
+  parser.add_argument('--K', type=int)
+  parser.add_argument('--save-model')
+  args = parser.parse_args()
+  main()
diff --git a/tools/accnn/accnn.py b/tools/accnn/accnn.py
index 1af78ef..ec5b101 100644
--- a/tools/accnn/accnn.py
+++ b/tools/accnn/accnn.py
@@ -1,38 +1,55 @@
-import mxnet as mx
-import argparse
-import utils
-import acc_conv
-import acc_fc
-import rank_selection
-import collections
-import json
-import sys
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-m', '--model',  help='the model to speed up')
-parser.add_argument('-g', '--gpus', default='0', help='the gpus will be used, e.g "0,1,2,3"')
-parser.add_argument('--load-epoch',type=int, default=1, help="load the model on an epoch using the model-prefix")
-parser.add_argument('--save-model', type=str, default='new-model', help='output model prefix')
-parser.add_argument('--config', default=None, help='specify the config file')
-parser.add_argument('--ratio', type=float, default=2, help='speed up ratio')
-args = parser.parse_args()
-
-model = utils.load_model(args)
-if args.config:
-  args.config = json.load(open(args.config, 'r'))
-else:
-  config = {}
-  config['conv_params'] = rank_selection.get_ranksel(model, args.ratio)
-  config['fc_params'] = {}
-  json.dump(config, open('config-rksel-%.1f.json'%(args.ratio), 'w'), indent=2)
-  args.config = config
-
-new_model = model
-Args = collections.namedtuple('ConvArgs', 'layer K')
-for layer, K in args.config['conv_params'].items():
-  arg = Args(layer=layer, K=K)  
-  new_model = acc_conv.conv_vh_decomposition(new_model, arg)
-for layer, K in args.config['fc_params'].items():
-  arg = Args(layer=layer, K=K)  
-  new_model = acc_fc.fc_decomposition(new_model, arg)
-new_model.save(args.save_model, 1)
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+import argparse
+import utils
+import acc_conv
+import acc_fc
+import rank_selection
+import collections
+import json
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-m', '--model',  help='the model to speed up')
+parser.add_argument('-g', '--gpus', default='0', help='the gpus will be used, e.g "0,1,2,3"')
+parser.add_argument('--load-epoch',type=int, default=1, help="load the model on an epoch using the model-prefix")
+parser.add_argument('--save-model', type=str, default='new-model', help='output model prefix')
+parser.add_argument('--config', default=None, help='specify the config file')
+parser.add_argument('--ratio', type=float, default=2, help='speed up ratio')
+args = parser.parse_args()
+
+model = utils.load_model(args)
+if args.config:
+  args.config = json.load(open(args.config, 'r'))
+else:
+  config = {}
+  config['conv_params'] = rank_selection.get_ranksel(model, args.ratio)
+  config['fc_params'] = {}
+  json.dump(config, open('config-rksel-%.1f.json'%(args.ratio), 'w'), indent=2)
+  args.config = config
+
+new_model = model
+Args = collections.namedtuple('ConvArgs', 'layer K')
+for layer, K in args.config['conv_params'].items():
+  arg = Args(layer=layer, K=K)
+  new_model = acc_conv.conv_vh_decomposition(new_model, arg)
+for layer, K in args.config['fc_params'].items():
+  arg = Args(layer=layer, K=K)
+  new_model = acc_fc.fc_decomposition(new_model, arg)
+new_model.save(args.save_model, 1)
diff --git a/tools/accnn/rank_selection.py b/tools/accnn/rank_selection.py
index ee3eca9..66937b2 100644
--- a/tools/accnn/rank_selection.py
+++ b/tools/accnn/rank_selection.py
@@ -1,87 +1,104 @@
-import numpy as np
-import mxnet as mx
-import json
-import utils
-import math
-import sys
-
-def calc_complexity(ishape, node):
-  y, x = map(int, eval(node['param']['kernel']))
-  N = int(node['param']['num_filter'])
-  C, Y, X = ishape  
-  return x*(N+C)*X*Y, x*y*N*C*X*Y
-
-def calc_eigenvalue(model, node):
-  W = model.arg_params[node['name'] + '_weight'].asnumpy()
-  N, C, y, x = W.shape  
-  W = W.transpose((1,2,0,3)).reshape((C*y, -1))
-  U, D, Q = np.linalg.svd(W, full_matrices=False)
-  return D
-
-def get_ranksel(model, ratio):  
-  conf = json.loads(model.symbol.tojson())
-  _, output_shapes, _ = model.symbol.get_internals().infer_shape(data=(1,3,224,224))
-  out_names = model.symbol.get_internals().list_outputs()    
-  out_shape_dic = dict(zip(out_names, output_shapes)) 
-  nodes = conf['nodes']
-  nodes = utils.topsort(nodes)
-  C = []
-  D = []
-  S = []
-  conv_names = []
-  EC = 0
-  for node in nodes:
-    if node['op'] == 'Convolution':        
-      input_nodes = [nodes[int(j[0])] for j in node['inputs']]
-      data = [input_node for input_node in input_nodes\
-                                  if not input_node['name'].startswith(node['name'])][0]      
-
-      if utils.is_input(data):
-        ishape = (3, 224, 224)
-      else:
-        ishape = out_shape_dic[data['name'] + '_output'][1:]
-      C.append(calc_complexity(ishape, node))
-      D.append(int(node['param']['num_filter']))
-      S.append(calc_eigenvalue(model, node))
-      conv_names.append(node['name'])
-      EC += C[-1][1]  
-  for s in S:
-    ss = sum(s)
-    for i in xrange(1, len(s)):
-      s[i] += s[i-1]      
-  n = len(C)
-  EC /= ratio
-  dp = [{}, {}]
-  dpc = [{} for _ in xrange(n)]
-  now, nxt = 0, 1
-  dp[now][0] = 0
-  for i in xrange(n):
-    dp[nxt] = {}    
-    sys.stdout.flush()
-    for now_c, now_v in dp[now].items():
-      for d in xrange(min(len(S[i]), D[i])):
-        nxt_c = now_c + (d+1)*C[i][0]
-        if nxt_c > EC:
-          continue
-        nxt_v = dp[now][now_c] + math.log(S[i][d])                
-        if dp[nxt].has_key(nxt_c):
-          if nxt_v > dp[nxt][nxt_c]:
-            dp[nxt][nxt_c] = nxt_v
-            dpc[i][nxt_c] = (d,now_c)
-        else:
-          dp[nxt][nxt_c] = nxt_v
-          dpc[i][nxt_c] = (d,now_c)
-    now, nxt = nxt, now    
-  maxv = -1e9
-  target_c = 0
-  for c,v in dp[now].items():
-    assert c <= EC, 'False'    
-    if v > maxv:
-      maxv = v
-      target_c = c  
-  res = [0]*n
-  nowc = target_c
-  for i in xrange(n-1,-1,-1):    
-    res[i] = dpc[i][nowc][0] + 1
-    nowc = dpc[i][nowc][1]
-  return dict(zip(conv_names, res))
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import numpy as np
+import mxnet as mx
+import json
+import utils
+import math
+import sys
+
+def calc_complexity(ishape, node):
+  y, x = map(int, eval(node['param']['kernel']))
+  N = int(node['param']['num_filter'])
+  C, Y, X = ishape
+  return x*(N+C)*X*Y, x*y*N*C*X*Y
+
+def calc_eigenvalue(model, node):
+  W = model.arg_params[node['name'] + '_weight'].asnumpy()
+  N, C, y, x = W.shape
+  W = W.transpose((1,2,0,3)).reshape((C*y, -1))
+  U, D, Q = np.linalg.svd(W, full_matrices=False)
+  return D
+
+def get_ranksel(model, ratio):
+  conf = json.loads(model.symbol.tojson())
+  _, output_shapes, _ = model.symbol.get_internals().infer_shape(data=(1,3,224,224))
+  out_names = model.symbol.get_internals().list_outputs()
+  out_shape_dic = dict(zip(out_names, output_shapes))
+  nodes = conf['nodes']
+  nodes = utils.topsort(nodes)
+  C = []
+  D = []
+  S = []
+  conv_names = []
+  EC = 0
+  for node in nodes:
+    if node['op'] == 'Convolution':
+      input_nodes = [nodes[int(j[0])] for j in node['inputs']]
+      data = [input_node for input_node in input_nodes\
+                                  if not input_node['name'].startswith(node['name'])][0]
+
+      if utils.is_input(data):
+        ishape = (3, 224, 224)
+      else:
+        ishape = out_shape_dic[data['name'] + '_output'][1:]
+      C.append(calc_complexity(ishape, node))
+      D.append(int(node['param']['num_filter']))
+      S.append(calc_eigenvalue(model, node))
+      conv_names.append(node['name'])
+      EC += C[-1][1]
+  for s in S:
+    ss = sum(s)
+    for i in xrange(1, len(s)):
+      s[i] += s[i-1]
+  n = len(C)
+  EC /= ratio
+  dp = [{}, {}]
+  dpc = [{} for _ in xrange(n)]
+  now, nxt = 0, 1
+  dp[now][0] = 0
+  for i in xrange(n):
+    dp[nxt] = {}
+    sys.stdout.flush()
+    for now_c, now_v in dp[now].items():
+      for d in xrange(min(len(S[i]), D[i])):
+        nxt_c = now_c + (d+1)*C[i][0]
+        if nxt_c > EC:
+          continue
+        nxt_v = dp[now][now_c] + math.log(S[i][d])
+        if dp[nxt].has_key(nxt_c):
+          if nxt_v > dp[nxt][nxt_c]:
+            dp[nxt][nxt_c] = nxt_v
+            dpc[i][nxt_c] = (d,now_c)
+        else:
+          dp[nxt][nxt_c] = nxt_v
+          dpc[i][nxt_c] = (d,now_c)
+    now, nxt = nxt, now
+  maxv = -1e9
+  target_c = 0
+  for c,v in dp[now].items():
+    assert c <= EC, 'False'
+    if v > maxv:
+      maxv = v
+      target_c = c
+  res = [0]*n
+  nowc = target_c
+  for i in xrange(n-1,-1,-1):
+    res[i] = dpc[i][nowc][0] + 1
+    nowc = dpc[i][nowc][1]
+  return dict(zip(conv_names, res))
diff --git a/tools/accnn/utils.py b/tools/accnn/utils.py
index 4c0290a..25fb188 100644
--- a/tools/accnn/utils.py
+++ b/tools/accnn/utils.py
@@ -1,101 +1,118 @@
-import mxnet as mx
-import copy
-import json
-import ast
-
-def load_model(args):
-  devs = mx.cpu() if args.gpus == None else [mx.gpu(int(i)) for i in args.gpus.split(',')]  
-  return mx.model.FeedForward.load(args.model, args.load_epoch, ctx=devs)
-
-def topsort(nodes):
-  n = len(nodes)
-  deg = [0]*n
-  g = [[] for _ in xrange(n)]  
-  for i,node in enumerate(nodes):
-    if node.has_key('inputs'):
-      for j in node['inputs']:
-        deg[i] += 1
-        g[j[0]].append(i)        
-  from collections import deque
-  q = deque([i for i in xrange(n) if deg[i]==0])
-  res = []  
-  for its in xrange(n):
-    i = q.popleft()        
-    res.append(nodes[i])
-    for j in g[i]:
-      deg[j] -= 1
-      if deg[j] == 0:
-        q.append(j)  
-  new_ids=dict([(node['name'],i) for i,node in enumerate(res)])
-  for node in res:
-    if node.has_key('inputs'):
-      for j in node['inputs']:
-        j[0]=new_ids[nodes[j[0]]['name']]
-  return res
-
-def is_input(node):
-  name = node['name']
-  return len(node['inputs']) == 0 and ('weight' not in name) and ('bias' not in name) and ('label' not in name)
-
-def sym_factory(node, data):
-  name = node['name']
-  params = {}
-  if 'param' in node:    
-    for k, v in node['param'].items():
-      try:
-        params[k] = ast.literal_eval(v)
-      except ValueError, e:
-        params[k] = v
-  return getattr(mx.symbol, node['op'])(data=data, name=name, **params)
-
-def replace_conv_layer(layer_name, old_model, sym_handle, arg_handle):
-  conf = json.loads(old_model.symbol.tojson())
-  sym_dict = {}
-  nodes = conf['nodes']
-  nodes = topsort(nodes)
-  res_sym = None
-  new_model = old_model  
-  for i,node in enumerate(nodes):
-    sym = None    
-    if is_input(node):
-      sym = mx.symbol.Variable(name='data')
-    elif node['op'] != 'null':
-      input_nodes = [nodes[int(j[0])] for j in node['inputs']]
-      datas = [input_node['name'] for input_node in input_nodes\
-                                  if not input_node['name'].startswith(node['name'])]
-      try:
-        data=sym_dict[datas[0]]
-      except Exception, e:
-        print 'can not find symbol %s'%(datas[0])
-        raise e    
-      if node['name'] == layer_name:
-        sym = sym_handle(data, node)          
-      else:
-        sym = sym_factory(node, data)        
-    if sym:
-      sym_dict[node['name']] = sym
-      res_sym = sym
-
-  arg_params = copy.deepcopy(old_model.arg_params)
-  if layer_name:  
-    arg_shapes, _, _ = res_sym.infer_shape(data=(1,3,224,224))
-    arg_names = res_sym.list_arguments()
-    arg_shape_dic = dict(zip(arg_names, arg_shapes))
-    try:
-      arg_handle(arg_shape_dic, arg_params)
-    except Exception, e:
-      raise Exception('Exception in arg_handle')
-
-  new_model = mx.model.FeedForward(
-                symbol=res_sym,
-                ctx=old_model.ctx,
-                num_epoch=1,                                
-                epoch_size=old_model.epoch_size,
-                optimizer='sgd',
-                initializer=old_model.initializer,
-                numpy_batch_size=old_model.numpy_batch_size,
-                arg_params=arg_params,
-                aux_params=old_model.aux_params,
-                allow_extra_params=True,
-                begin_epoch=old_model.begin_epoch)  
-  return new_model
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+import copy
+import json
+import ast
+
+def load_model(args):
+  devs = mx.cpu() if args.gpus == None else [mx.gpu(int(i)) for i in args.gpus.split(',')]
+  return mx.model.FeedForward.load(args.model, args.load_epoch, ctx=devs)
+
+def topsort(nodes):
+  n = len(nodes)
+  deg = [0]*n
+  g = [[] for _ in xrange(n)]
+  for i,node in enumerate(nodes):
+    if node.has_key('inputs'):
+      for j in node['inputs']:
+        deg[i] += 1
+        g[j[0]].append(i)
+  from collections import deque
+  q = deque([i for i in xrange(n) if deg[i]==0])
+  res = []
+  for its in xrange(n):
+    i = q.popleft()
+    res.append(nodes[i])
+    for j in g[i]:
+      deg[j] -= 1
+      if deg[j] == 0:
+        q.append(j)
+  new_ids=dict([(node['name'],i) for i,node in enumerate(res)])
+  for node in res:
+    if node.has_key('inputs'):
+      for j in node['inputs']:
+        j[0]=new_ids[nodes[j[0]]['name']]
+  return res
+
+def is_input(node):
+  name = node['name']
+  return len(node['inputs']) == 0 and ('weight' not in name) and ('bias' not in name) and ('label' not in name)
+
+def sym_factory(node, data):
+  name = node['name']
+  params = {}
+  if 'param' in node:
+    for k, v in node['param'].items():
+      try:
+        params[k] = ast.literal_eval(v)
+      except ValueError, e:
+        params[k] = v
+  return getattr(mx.symbol, node['op'])(data=data, name=name, **params)
+
+def replace_conv_layer(layer_name, old_model, sym_handle, arg_handle):
+  conf = json.loads(old_model.symbol.tojson())
+  sym_dict = {}
+  nodes = conf['nodes']
+  nodes = topsort(nodes)
+  res_sym = None
+  new_model = old_model
+  for i,node in enumerate(nodes):
+    sym = None
+    if is_input(node):
+      sym = mx.symbol.Variable(name='data')
+    elif node['op'] != 'null':
+      input_nodes = [nodes[int(j[0])] for j in node['inputs']]
+      datas = [input_node['name'] for input_node in input_nodes\
+                                  if not input_node['name'].startswith(node['name'])]
+      try:
+        data=sym_dict[datas[0]]
+      except Exception, e:
+        print 'can not find symbol %s'%(datas[0])
+        raise e
+      if node['name'] == layer_name:
+        sym = sym_handle(data, node)
+      else:
+        sym = sym_factory(node, data)
+    if sym:
+      sym_dict[node['name']] = sym
+      res_sym = sym
+
+  arg_params = copy.deepcopy(old_model.arg_params)
+  if layer_name:
+    arg_shapes, _, _ = res_sym.infer_shape(data=(1,3,224,224))
+    arg_names = res_sym.list_arguments()
+    arg_shape_dic = dict(zip(arg_names, arg_shapes))
+    try:
+      arg_handle(arg_shape_dic, arg_params)
+    except Exception, e:
+      raise Exception('Exception in arg_handle')
+
+  new_model = mx.model.FeedForward(
+                symbol=res_sym,
+                ctx=old_model.ctx,
+                num_epoch=1,
+                epoch_size=old_model.epoch_size,
+                optimizer='sgd',
+                initializer=old_model.initializer,
+                numpy_batch_size=old_model.numpy_batch_size,
+                arg_params=arg_params,
+                aux_params=old_model.aux_params,
+                allow_extra_params=True,
+                begin_epoch=old_model.begin_epoch)
+  return new_model
diff --git a/tools/bandwidth/measure.py b/tools/bandwidth/measure.py
index 0cac3de..66ef737 100644
--- a/tools/bandwidth/measure.py
+++ b/tools/bandwidth/measure.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os, sys
 curr_path = os.path.abspath(os.path.dirname(__file__))
 sys.path.insert(0, os.path.join(curr_path, "../../python"))
diff --git a/tools/bandwidth/test_measure.py b/tools/bandwidth/test_measure.py
index b490af1..375290f 100644
--- a/tools/bandwidth/test_measure.py
+++ b/tools/bandwidth/test_measure.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 test measure.py
 """
diff --git a/tools/caffe_converter/caffe_parser.py b/tools/caffe_converter/caffe_parser.py
index d4abc8e..2ff490c 100644
--- a/tools/caffe_converter/caffe_parser.py
+++ b/tools/caffe_converter/caffe_parser.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Parse caffe's protobuf
 """
 import re
diff --git a/tools/caffe_converter/caffe_proto_utils.py b/tools/caffe_converter/caffe_proto_utils.py
index 940e8a4..8d61834 100644
--- a/tools/caffe_converter/caffe_proto_utils.py
+++ b/tools/caffe_converter/caffe_proto_utils.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Helper functions for parsing caffe prototxt into a workable DAG
 """
 
diff --git a/tools/caffe_converter/compare_layers.py b/tools/caffe_converter/compare_layers.py
index bb4451d..12568ed 100644
--- a/tools/caffe_converter/compare_layers.py
+++ b/tools/caffe_converter/compare_layers.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Test converted models layer by layer
 """
 import os
diff --git a/tools/caffe_converter/convert_caffe_modelzoo.py b/tools/caffe_converter/convert_caffe_modelzoo.py
index cb6d3ba..ab9042f 100644
--- a/tools/caffe_converter/convert_caffe_modelzoo.py
+++ b/tools/caffe_converter/convert_caffe_modelzoo.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Convert Caffe's modelzoo
 """
 import os
diff --git a/tools/caffe_converter/convert_mean.py b/tools/caffe_converter/convert_mean.py
index 69cf50c..3b6dc42 100644
--- a/tools/caffe_converter/convert_mean.py
+++ b/tools/caffe_converter/convert_mean.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Convert caffe mean
 """
 import argparse
diff --git a/tools/caffe_converter/convert_model.py b/tools/caffe_converter/convert_model.py
index d1e4cd0..c04a2aa 100644
--- a/tools/caffe_converter/convert_model.py
+++ b/tools/caffe_converter/convert_model.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Convert caffe model
 """
 from __future__ import print_function
diff --git a/tools/caffe_converter/convert_symbol.py b/tools/caffe_converter/convert_symbol.py
index 100a64f..dde3c26 100644
--- a/tools/caffe_converter/convert_symbol.py
+++ b/tools/caffe_converter/convert_symbol.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Convert caffe prototxt to symbol
 """
 from __future__ import print_function
diff --git a/tools/caffe_converter/run.sh b/tools/caffe_converter/run.sh
index 65876cc..bdf5481 100755
--- a/tools/caffe_converter/run.sh
+++ b/tools/caffe_converter/run.sh
@@ -1,4 +1,22 @@
 #!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 if [[ $# -ne 1 ]]; then
     echo "usage: $0 model_name"
     echo "   model_name: [vgg16|vgg19], ..."
diff --git a/tools/caffe_converter/test_converter.py b/tools/caffe_converter/test_converter.py
index 7572d29..db17c64 100644
--- a/tools/caffe_converter/test_converter.py
+++ b/tools/caffe_converter/test_converter.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """Test converted models
 """
 import os
diff --git a/tools/im2rec.cc b/tools/im2rec.cc
index a7ccfb6..8568140 100644
--- a/tools/im2rec.cc
+++ b/tools/im2rec.cc
@@ -1,5 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 /*!
- *  Copyright (c) 2015 by Contributors
  * \file im2rec.cc
  * \brief convert images into image recordio format
  *  Image Record Format: zeropad[64bit] imid[64bit] img-binary-content
diff --git a/tools/im2rec.py b/tools/im2rec.py
index 30ee3ec..ec6de19 100644
--- a/tools/im2rec.py
+++ b/tools/im2rec.py
@@ -1,307 +1,324 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-import os
-import sys
-
-curr_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(os.path.join(curr_path, "../python"))
-import mxnet as mx
-import random
-import argparse
-import cv2
-import time
-import traceback
-from builtins import range
-
-try:
-    import multiprocessing
-except ImportError:
-    multiprocessing = None
-
-def list_image(root, recursive, exts):
-    i = 0
-    if recursive:
-        cat = {}
-        for path, dirs, files in os.walk(root, followlinks=True):
-            dirs.sort()
-            files.sort()
-            for fname in files:
-                fpath = os.path.join(path, fname)
-                suffix = os.path.splitext(fname)[1].lower()
-                if os.path.isfile(fpath) and (suffix in exts):
-                    if path not in cat:
-                        cat[path] = len(cat)
-                    yield (i, os.path.relpath(fpath, root), cat[path])
-                    i += 1
-        for k, v in sorted(cat.items(), key=lambda x: x[1]):
-            print(os.path.relpath(k, root), v)
-    else:
-        for fname in sorted(os.listdir(root)):
-            fpath = os.path.join(root, fname)
-            suffix = os.path.splitext(fname)[1].lower()
-            if os.path.isfile(fpath) and (suffix in exts):
-                yield (i, os.path.relpath(fpath, root), 0)
-                i += 1
-
-def write_list(path_out, image_list):
-    with open(path_out, 'w') as fout:
-        for i, item in enumerate(image_list):
-            line = '%d\t' % item[0]
-            for j in item[2:]:
-                line += '%f\t' % j
-            line += '%s\n' % item[1]
-            fout.write(line)
-
-def make_list(args):
-    image_list = list_image(args.root, args.recursive, args.exts)
-    image_list = list(image_list)
-    if args.shuffle is True:
-        random.seed(100)
-        random.shuffle(image_list)
-    N = len(image_list)
-    chunk_size = (N + args.chunks - 1) // args.chunks
-    for i in range(args.chunks):
-        chunk = image_list[i * chunk_size:(i + 1) * chunk_size]
-        if args.chunks > 1:
-            str_chunk = '_%d' % i
-        else:
-            str_chunk = ''
-        sep = int(chunk_size * args.train_ratio)
-        sep_test = int(chunk_size * args.test_ratio)
-        if args.train_ratio == 1.0:
-            write_list(args.prefix + str_chunk + '.lst', chunk)
-        else:
-            if args.test_ratio:
-                write_list(args.prefix + str_chunk + '_test.lst', chunk[:sep_test])
-            if args.train_ratio + args.test_ratio < 1.0:
-                write_list(args.prefix + str_chunk + '_val.lst', chunk[sep_test + sep:])
-            write_list(args.prefix + str_chunk + '_train.lst', chunk[sep_test:sep_test + sep])
-
-def read_list(path_in):
-    with open(path_in) as fin:
-        while True:
-            line = fin.readline()
-            if not line:
-                break
-            line = [i.strip() for i in line.strip().split('\t')]
-            line_len = len(line)
-            if line_len < 3:
-                print('lst should at least has three parts, but only has %s parts for %s' %(line_len, line))
-                continue
-            try:
-                item = [int(line[0])] + [line[-1]] + [float(i) for i in line[1:-1]]
-            except Exception as e:
-                print('Parsing lst met error for %s, detail: %s' %(line, e))
-                continue
-            yield item
-
-def image_encode(args, i, item, q_out):
-    fullpath = os.path.join(args.root, item[1])
-
-    if len(item) > 3 and args.pack_label:
-        header = mx.recordio.IRHeader(0, item[2:], item[0], 0)
-    else:
-        header = mx.recordio.IRHeader(0, item[2], item[0], 0)
-
-    if args.pass_through:
-        try:
-            with open(fullpath, 'rb') as fin:
-                img = fin.read()
-            s = mx.recordio.pack(header, img)
-            q_out.put((i, s, item))
-        except Exception as e:
-            traceback.print_exc()
-            print('pack_img error:', item[1], e)
-            q_out.put((i, None, item))
-        return
-
-    try:
-        img = cv2.imread(fullpath, args.color)
-    except:
-        traceback.print_exc()
-        print('imread error trying to load file: %s ' % fullpath)
-        q_out.put((i, None, item))
-        return
-    if img is None:
-        print('imread read blank (None) image for file: %s' % fullpath)
-        q_out.put((i, None, item))
-        return
-    if args.center_crop:
-        if img.shape[0] > img.shape[1]:
-            margin = (img.shape[0] - img.shape[1]) // 2;
-            img = img[margin:margin + img.shape[1], :]
-        else:
-            margin = (img.shape[1] - img.shape[0]) // 2;
-            img = img[:, margin:margin + img.shape[0]]
-    if args.resize:
-        if img.shape[0] > img.shape[1]:
-            newsize = (args.resize, img.shape[0] * args.resize // img.shape[1])
-        else:
-            newsize = (img.shape[1] * args.resize // img.shape[0], args.resize)
-        img = cv2.resize(img, newsize)
-
-    try:
-        s = mx.recordio.pack_img(header, img, quality=args.quality, img_fmt=args.encoding)
-        q_out.put((i, s, item))
-    except Exception as e:
-        traceback.print_exc()
-        print('pack_img error on file: %s' % fullpath, e)
-        q_out.put((i, None, item))
-        return
-
-def read_worker(args, q_in, q_out):
-    while True:
-        deq = q_in.get()
-        if deq is None:
-            break
-        i, item = deq
-        image_encode(args, i, item, q_out)
-
-def write_worker(q_out, fname, working_dir):
-    pre_time = time.time()
-    count = 0
-    fname = os.path.basename(fname)
-    fname_rec = os.path.splitext(fname)[0] + '.rec'
-    fname_idx = os.path.splitext(fname)[0] + '.idx'
-    record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx),
-                                           os.path.join(working_dir, fname_rec), 'w')
-    buf = {}
-    more = True
-    while more:
-        deq = q_out.get()
-        if deq is not None:
-            i, s, item = deq
-            buf[i] = (s, item)
-        else:
-            more = False
-        while count in buf:
-            s, item = buf[count]
-            del buf[count]
-            if s is not None:
-                record.write_idx(item[0], s)
-
-            if count % 1000 == 0:
-                cur_time = time.time()
-                print('time:', cur_time - pre_time, ' count:', count)
-                pre_time = cur_time
-            count += 1
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        description='Create an image list or \
-        make a record database by reading from an image list')
-    parser.add_argument('prefix', help='prefix of input/output lst and rec files.')
-    parser.add_argument('root', help='path to folder containing images.')
-
-    cgroup = parser.add_argument_group('Options for creating image lists')
-    cgroup.add_argument('--list', type=bool, default=False,
-                        help='If this is set im2rec will create image list(s) by traversing root folder\
-        and output to <prefix>.lst.\
-        Otherwise im2rec will read <prefix>.lst and create a database at <prefix>.rec')
-    cgroup.add_argument('--exts', nargs='+', default=['.jpeg', '.jpg'],
-                        help='list of acceptable image extensions.')
-    cgroup.add_argument('--chunks', type=int, default=1, help='number of chunks.')
-    cgroup.add_argument('--train-ratio', type=float, default=1.0,
-                        help='Ratio of images to use for training.')
-    cgroup.add_argument('--test-ratio', type=float, default=0,
-                        help='Ratio of images to use for testing.')
-    cgroup.add_argument('--recursive', type=bool, default=False,
-                        help='If true recursively walk through subdirs and assign an unique label\
-        to images in each folder. Otherwise only include images in the root folder\
-        and give them label 0.')
-    cgroup.add_argument('--shuffle', type=bool, default=True, help='If this is set as True, \
-        im2rec will randomize the image order in <prefix>.lst')
-
-    rgroup = parser.add_argument_group('Options for creating database')
-    rgroup.add_argument('--pass-through', type=bool, default=False,
-                        help='whether to skip transformation and save image as is')
-    rgroup.add_argument('--resize', type=int, default=0,
-                        help='resize the shorter edge of image to the newsize, original images will\
-        be packed by default.')
-    rgroup.add_argument('--center-crop', type=bool, default=False,
-                        help='specify whether to crop the center image to make it rectangular.')
-    rgroup.add_argument('--quality', type=int, default=95,
-                        help='JPEG quality for encoding, 1-100; or PNG compression for encoding, 1-9')
-    rgroup.add_argument('--num-thread', type=int, default=1,
-                        help='number of thread to use for encoding. order of images will be different\
-        from the input list if >1. the input list will be modified to match the\
-        resulting order.')
-    rgroup.add_argument('--color', type=int, default=1, choices=[-1, 0, 1],
-                        help='specify the color mode of the loaded image.\
-        1: Loads a color image. Any transparency of image will be neglected. It is the default flag.\
-        0: Loads image in grayscale mode.\
-        -1:Loads image as such including alpha channel.')
-    rgroup.add_argument('--encoding', type=str, default='.jpg', choices=['.jpg', '.png'],
-                        help='specify the encoding of the images.')
-    rgroup.add_argument('--pack-label', type=bool, default=False,
-        help='Whether to also pack multi dimensional label in the record file')
-    args = parser.parse_args()
-    args.prefix = os.path.abspath(args.prefix)
-    args.root = os.path.abspath(args.root)
-    return args
-
-if __name__ == '__main__':
-    args = parse_args()
-    if args.list:
-        make_list(args)
-    else:
-        if os.path.isdir(args.prefix):
-            working_dir = args.prefix
-        else:
-            working_dir = os.path.dirname(args.prefix)
-        files = [os.path.join(working_dir, fname) for fname in os.listdir(working_dir)
-                    if os.path.isfile(os.path.join(working_dir, fname))]
-        count = 0
-        for fname in files:
-            if fname.startswith(args.prefix) and fname.endswith('.lst'):
-                print('Creating .rec file from', fname, 'in', working_dir)
-                count += 1
-                image_list = read_list(fname)
-                # -- write_record -- #
-                if args.num_thread > 1 and multiprocessing is not None:
-                    q_in = [multiprocessing.Queue(1024) for i in range(args.num_thread)]
-                    q_out = multiprocessing.Queue(1024)
-                    read_process = [multiprocessing.Process(target=read_worker, args=(args, q_in[i], q_out)) \
-                                    for i in range(args.num_thread)]
-                    for p in read_process:
-                        p.start()
-                    write_process = multiprocessing.Process(target=write_worker, args=(q_out, fname, working_dir))
-                    write_process.start()
-
-                    for i, item in enumerate(image_list):
-                        q_in[i % len(q_in)].put((i, item))
-                    for q in q_in:
-                        q.put(None)
-                    for p in read_process:
-                        p.join()
-
-                    q_out.put(None)
-                    write_process.join()
-                else:
-                    print('multiprocessing not available, fall back to single threaded encoding')
-                    try:
-                        import Queue as queue
-                    except ImportError:
-                        import queue
-                    q_out = queue.Queue()
-                    fname = os.path.basename(fname)
-                    fname_rec = os.path.splitext(fname)[0] + '.rec'
-                    fname_idx = os.path.splitext(fname)[0] + '.idx'
-                    record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx),
-                                                           os.path.join(working_dir, fname_rec), 'w')
-                    cnt = 0
-                    pre_time = time.time()
-                    for i, item in enumerate(image_list):
-                        image_encode(args, i, item, q_out)
-                        if q_out.empty():
-                            continue
-                        _, s, _ = q_out.get()
-                        record.write_idx(item[0], s)
-                        if cnt % 1000 == 0:
-                            cur_time = time.time()
-                            print('time:', cur_time - pre_time, ' count:', cnt)
-                            pre_time = cur_time
-                        cnt += 1
-        if not count:
-            print('Did not find and list file with prefix %s'%args.prefix)
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# -*- coding: utf-8 -*-
+from __future__ import print_function
+import os
+import sys
+
+curr_path = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(curr_path, "../python"))
+import mxnet as mx
+import random
+import argparse
+import cv2
+import time
+import traceback
+from builtins import range
+
+try:
+    import multiprocessing
+except ImportError:
+    multiprocessing = None
+
+def list_image(root, recursive, exts):
+    i = 0
+    if recursive:
+        cat = {}
+        for path, dirs, files in os.walk(root, followlinks=True):
+            dirs.sort()
+            files.sort()
+            for fname in files:
+                fpath = os.path.join(path, fname)
+                suffix = os.path.splitext(fname)[1].lower()
+                if os.path.isfile(fpath) and (suffix in exts):
+                    if path not in cat:
+                        cat[path] = len(cat)
+                    yield (i, os.path.relpath(fpath, root), cat[path])
+                    i += 1
+        for k, v in sorted(cat.items(), key=lambda x: x[1]):
+            print(os.path.relpath(k, root), v)
+    else:
+        for fname in sorted(os.listdir(root)):
+            fpath = os.path.join(root, fname)
+            suffix = os.path.splitext(fname)[1].lower()
+            if os.path.isfile(fpath) and (suffix in exts):
+                yield (i, os.path.relpath(fpath, root), 0)
+                i += 1
+
+def write_list(path_out, image_list):
+    with open(path_out, 'w') as fout:
+        for i, item in enumerate(image_list):
+            line = '%d\t' % item[0]
+            for j in item[2:]:
+                line += '%f\t' % j
+            line += '%s\n' % item[1]
+            fout.write(line)
+
+def make_list(args):
+    image_list = list_image(args.root, args.recursive, args.exts)
+    image_list = list(image_list)
+    if args.shuffle is True:
+        random.seed(100)
+        random.shuffle(image_list)
+    N = len(image_list)
+    chunk_size = (N + args.chunks - 1) // args.chunks
+    for i in range(args.chunks):
+        chunk = image_list[i * chunk_size:(i + 1) * chunk_size]
+        if args.chunks > 1:
+            str_chunk = '_%d' % i
+        else:
+            str_chunk = ''
+        sep = int(chunk_size * args.train_ratio)
+        sep_test = int(chunk_size * args.test_ratio)
+        if args.train_ratio == 1.0:
+            write_list(args.prefix + str_chunk + '.lst', chunk)
+        else:
+            if args.test_ratio:
+                write_list(args.prefix + str_chunk + '_test.lst', chunk[:sep_test])
+            if args.train_ratio + args.test_ratio < 1.0:
+                write_list(args.prefix + str_chunk + '_val.lst', chunk[sep_test + sep:])
+            write_list(args.prefix + str_chunk + '_train.lst', chunk[sep_test:sep_test + sep])
+
+def read_list(path_in):
+    with open(path_in) as fin:
+        while True:
+            line = fin.readline()
+            if not line:
+                break
+            line = [i.strip() for i in line.strip().split('\t')]
+            line_len = len(line)
+            if line_len < 3:
+                print('lst should at least has three parts, but only has %s parts for %s' %(line_len, line))
+                continue
+            try:
+                item = [int(line[0])] + [line[-1]] + [float(i) for i in line[1:-1]]
+            except Exception as e:
+                print('Parsing lst met error for %s, detail: %s' %(line, e))
+                continue
+            yield item
+
+def image_encode(args, i, item, q_out):
+    fullpath = os.path.join(args.root, item[1])
+
+    if len(item) > 3 and args.pack_label:
+        header = mx.recordio.IRHeader(0, item[2:], item[0], 0)
+    else:
+        header = mx.recordio.IRHeader(0, item[2], item[0], 0)
+
+    if args.pass_through:
+        try:
+            with open(fullpath, 'rb') as fin:
+                img = fin.read()
+            s = mx.recordio.pack(header, img)
+            q_out.put((i, s, item))
+        except Exception as e:
+            traceback.print_exc()
+            print('pack_img error:', item[1], e)
+            q_out.put((i, None, item))
+        return
+
+    try:
+        img = cv2.imread(fullpath, args.color)
+    except:
+        traceback.print_exc()
+        print('imread error trying to load file: %s ' % fullpath)
+        q_out.put((i, None, item))
+        return
+    if img is None:
+        print('imread read blank (None) image for file: %s' % fullpath)
+        q_out.put((i, None, item))
+        return
+    if args.center_crop:
+        if img.shape[0] > img.shape[1]:
+            margin = (img.shape[0] - img.shape[1]) // 2;
+            img = img[margin:margin + img.shape[1], :]
+        else:
+            margin = (img.shape[1] - img.shape[0]) // 2;
+            img = img[:, margin:margin + img.shape[0]]
+    if args.resize:
+        if img.shape[0] > img.shape[1]:
+            newsize = (args.resize, img.shape[0] * args.resize // img.shape[1])
+        else:
+            newsize = (img.shape[1] * args.resize // img.shape[0], args.resize)
+        img = cv2.resize(img, newsize)
+
+    try:
+        s = mx.recordio.pack_img(header, img, quality=args.quality, img_fmt=args.encoding)
+        q_out.put((i, s, item))
+    except Exception as e:
+        traceback.print_exc()
+        print('pack_img error on file: %s' % fullpath, e)
+        q_out.put((i, None, item))
+        return
+
+def read_worker(args, q_in, q_out):
+    while True:
+        deq = q_in.get()
+        if deq is None:
+            break
+        i, item = deq
+        image_encode(args, i, item, q_out)
+
+def write_worker(q_out, fname, working_dir):
+    pre_time = time.time()
+    count = 0
+    fname = os.path.basename(fname)
+    fname_rec = os.path.splitext(fname)[0] + '.rec'
+    fname_idx = os.path.splitext(fname)[0] + '.idx'
+    record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx),
+                                           os.path.join(working_dir, fname_rec), 'w')
+    buf = {}
+    more = True
+    while more:
+        deq = q_out.get()
+        if deq is not None:
+            i, s, item = deq
+            buf[i] = (s, item)
+        else:
+            more = False
+        while count in buf:
+            s, item = buf[count]
+            del buf[count]
+            if s is not None:
+                record.write_idx(item[0], s)
+
+            if count % 1000 == 0:
+                cur_time = time.time()
+                print('time:', cur_time - pre_time, ' count:', count)
+                pre_time = cur_time
+            count += 1
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description='Create an image list or \
+        make a record database by reading from an image list')
+    parser.add_argument('prefix', help='prefix of input/output lst and rec files.')
+    parser.add_argument('root', help='path to folder containing images.')
+
+    cgroup = parser.add_argument_group('Options for creating image lists')
+    cgroup.add_argument('--list', type=bool, default=False,
+                        help='If this is set im2rec will create image list(s) by traversing root folder\
+        and output to <prefix>.lst.\
+        Otherwise im2rec will read <prefix>.lst and create a database at <prefix>.rec')
+    cgroup.add_argument('--exts', nargs='+', default=['.jpeg', '.jpg'],
+                        help='list of acceptable image extensions.')
+    cgroup.add_argument('--chunks', type=int, default=1, help='number of chunks.')
+    cgroup.add_argument('--train-ratio', type=float, default=1.0,
+                        help='Ratio of images to use for training.')
+    cgroup.add_argument('--test-ratio', type=float, default=0,
+                        help='Ratio of images to use for testing.')
+    cgroup.add_argument('--recursive', type=bool, default=False,
+                        help='If true recursively walk through subdirs and assign an unique label\
+        to images in each folder. Otherwise only include images in the root folder\
+        and give them label 0.')
+    cgroup.add_argument('--shuffle', type=bool, default=True, help='If this is set as True, \
+        im2rec will randomize the image order in <prefix>.lst')
+
+    rgroup = parser.add_argument_group('Options for creating database')
+    rgroup.add_argument('--pass-through', type=bool, default=False,
+                        help='whether to skip transformation and save image as is')
+    rgroup.add_argument('--resize', type=int, default=0,
+                        help='resize the shorter edge of image to the newsize, original images will\
+        be packed by default.')
+    rgroup.add_argument('--center-crop', type=bool, default=False,
+                        help='specify whether to crop the center image to make it rectangular.')
+    rgroup.add_argument('--quality', type=int, default=95,
+                        help='JPEG quality for encoding, 1-100; or PNG compression for encoding, 1-9')
+    rgroup.add_argument('--num-thread', type=int, default=1,
+                        help='number of thread to use for encoding. order of images will be different\
+        from the input list if >1. the input list will be modified to match the\
+        resulting order.')
+    rgroup.add_argument('--color', type=int, default=1, choices=[-1, 0, 1],
+                        help='specify the color mode of the loaded image.\
+        1: Loads a color image. Any transparency of image will be neglected. It is the default flag.\
+        0: Loads image in grayscale mode.\
+        -1:Loads image as such including alpha channel.')
+    rgroup.add_argument('--encoding', type=str, default='.jpg', choices=['.jpg', '.png'],
+                        help='specify the encoding of the images.')
+    rgroup.add_argument('--pack-label', type=bool, default=False,
+        help='Whether to also pack multi dimensional label in the record file')
+    args = parser.parse_args()
+    args.prefix = os.path.abspath(args.prefix)
+    args.root = os.path.abspath(args.root)
+    return args
+
+if __name__ == '__main__':
+    args = parse_args()
+    if args.list:
+        make_list(args)
+    else:
+        if os.path.isdir(args.prefix):
+            working_dir = args.prefix
+        else:
+            working_dir = os.path.dirname(args.prefix)
+        files = [os.path.join(working_dir, fname) for fname in os.listdir(working_dir)
+                    if os.path.isfile(os.path.join(working_dir, fname))]
+        count = 0
+        for fname in files:
+            if fname.startswith(args.prefix) and fname.endswith('.lst'):
+                print('Creating .rec file from', fname, 'in', working_dir)
+                count += 1
+                image_list = read_list(fname)
+                # -- write_record -- #
+                if args.num_thread > 1 and multiprocessing is not None:
+                    q_in = [multiprocessing.Queue(1024) for i in range(args.num_thread)]
+                    q_out = multiprocessing.Queue(1024)
+                    read_process = [multiprocessing.Process(target=read_worker, args=(args, q_in[i], q_out)) \
+                                    for i in range(args.num_thread)]
+                    for p in read_process:
+                        p.start()
+                    write_process = multiprocessing.Process(target=write_worker, args=(q_out, fname, working_dir))
+                    write_process.start()
+
+                    for i, item in enumerate(image_list):
+                        q_in[i % len(q_in)].put((i, item))
+                    for q in q_in:
+                        q.put(None)
+                    for p in read_process:
+                        p.join()
+
+                    q_out.put(None)
+                    write_process.join()
+                else:
+                    print('multiprocessing not available, fall back to single threaded encoding')
+                    try:
+                        import Queue as queue
+                    except ImportError:
+                        import queue
+                    q_out = queue.Queue()
+                    fname = os.path.basename(fname)
+                    fname_rec = os.path.splitext(fname)[0] + '.rec'
+                    fname_idx = os.path.splitext(fname)[0] + '.idx'
+                    record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx),
+                                                           os.path.join(working_dir, fname_rec), 'w')
+                    cnt = 0
+                    pre_time = time.time()
+                    for i, item in enumerate(image_list):
+                        image_encode(args, i, item, q_out)
+                        if q_out.empty():
+                            continue
+                        _, s, _ = q_out.get()
+                        record.write_idx(item[0], s)
+                        if cnt % 1000 == 0:
+                            cur_time = time.time()
+                            print('time:', cur_time - pre_time, ' count:', cnt)
+                            pre_time = cur_time
+                        cnt += 1
+        if not count:
+            print('Did not find and list file with prefix %s'%args.prefix)
diff --git a/tools/ipynb2md.py b/tools/ipynb2md.py
index 426fa72..227174c 100755
--- a/tools/ipynb2md.py
+++ b/tools/ipynb2md.py
@@ -1,5 +1,23 @@
 #!/usr/bin/env python
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 """
 Convert jupyter notebook into the markdown format. The notebook outputs will be
 removed.
diff --git a/tools/kill-mxnet.py b/tools/kill-mxnet.py
index 2bdf949..2a4a430 100644
--- a/tools/kill-mxnet.py
+++ b/tools/kill-mxnet.py
@@ -1,5 +1,23 @@
 #!/usr/bin/env python
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 import os, sys
 import subprocess
 
diff --git a/tools/launch.py b/tools/launch.py
index f536665..de42ea2 100755
--- a/tools/launch.py
+++ b/tools/launch.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 Launch a distributed job
 """
@@ -54,7 +72,7 @@ def main():
         args.num_servers = args.num_workers
 
     args = dmlc_opts(args)
-    
+
     if args.host_file is None or args.host_file == 'None':
       if args.cluster == 'yarn':
           from dmlc_tracker import yarn
diff --git a/tools/license_header.py b/tools/license_header.py
new file mode 100644
index 0000000..d0782b2
--- /dev/null
+++ b/tools/license_header.py
@@ -0,0 +1,157 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Add or check license header
+
+Usuage:
+
+- add the default license header to source files that do not contain a valid
+  license:
+
+  python license_header.py add
+
+- check if every files has a license header
+
+  python license_header.py check
+"""
+
+import re
+import os
+import argparse
+
+# the default apache license
+_LICENSE = """Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License."""
+
+# if a file contains any str in the list, then consider it has been licensed
+_LICENSE_PATTERNS = ['Licensed to the Apache Software Foundation']
+
+# the folders or files that will be ignored
+_WHITE_LIST = ['R-package/',
+               'cub/',
+               'dlpack/',
+               'dmlc-core/',
+               'mshadow/',
+               'nnvm',
+               'ps-lite',
+               'src/operator/mkl/',
+               'src/operator/contrib/ctc_include/']
+
+# language extensions and the according commment mark
+_LANGS = {'.cc':'*', '.h':'*', '.cu':'*', '.cuh':'*', '.py':'#',
+          '.pm':'#', '.scala':'*', '.cc':'*', '.sh':'#', '.cmake':'#'}
+
+# Previous license header, which will be removed
+_OLD_LICENSE = re.compile('.*Copyright.*by Contributors')
+
+def _has_license(lines):
+    return any([any([p in l.decode('utf-8') for p in _LICENSE_PATTERNS]) for l in lines])
+
+def _get_license(comment_mark):
+    if comment_mark == '*':
+        body = '/*\n'
+    else:
+        body = ''
+    for l in _LICENSE.split('\n'):
+        if comment_mark == '*':
+            body += ' '
+        body += comment_mark
+        if len(l):
+            body += ' ' + l
+        body += '\n'
+
+    if comment_mark == '*':
+        body += ' */\n'
+    body += '\n'
+    return body
+
+def _valid_file(fname, verbose=False):
+    if any([l in fname for l in _WHITE_LIST]):
+        if verbose:
+            print('skip ' + fname + ', it matches the white list')
+        return False
+    _, ext = os.path.splitext(fname)
+    if ext not in _LANGS:
+        if verbose:
+            print('skip ' + fname + ', unknown file extension')
+        return False
+    return True
+
+def process_file(fname, action, verbose=False):
+    if not _valid_file(fname, verbose):
+        return True
+    with open(fname, 'rb') as f:
+        lines = f.readlines()
+    if not lines:
+        return True
+    if _has_license(lines):
+        return True
+    elif action == 'check':
+        return False
+    _, ext = os.path.splitext(fname)
+    # remove old license
+    if ext == '.h' or ext == '.cc' or ext == '.cu':
+        for i, l in enumerate(lines):
+            if _OLD_LICENSE.match(l.decode('utf-8')):
+                del lines[i]
+                break
+    with open(fname, 'wb') as f:
+        # shebang line
+        if lines[0].startswith(b'#!'):
+            f.write(lines[0].rstrip()+b'\n\n')
+            del lines[0]
+        f.write(str.encode(_get_license(_LANGS[ext])))
+        for l in lines:
+            f.write(l.rstrip()+b'\n')
+    print('added license header to ' + fname)
+    return False
+
+def process_folder(root, action):
+    excepts = []
+    for root, _, files in os.walk(root):
+        for f in files:
+            fname = os.path.normpath(os.path.join(root, f))
+            if not process_file(fname, action):
+                excepts.append(fname)
+    if action == 'check' and excepts:
+        raise Exception('The following files do not contain a valid license, '+
+                        'you can use `python tools/license_header.py add` to add'+
+                        'them automatically', excepts)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Add or check source license header')
+    parser.add_argument(
+        'action', nargs=1, type=str,
+        choices=['add', 'check'], default='add',
+        help = 'add or check')
+    args = parser.parse_args()
+    process_folder(os.path.join(os.path.dirname(__file__), '..'), args.action[0])
diff --git a/tools/parse_log.py b/tools/parse_log.py
index 070f770..f0ce53d 100755
--- a/tools/parse_log.py
+++ b/tools/parse_log.py
@@ -1,4 +1,22 @@
 #!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 """
 parse mxnet output log into a markdown table
 """
diff --git a/tools/pip_package/make_pip_package.sh b/tools/pip_package/make_pip_package.sh
index a1af18b..46b4938 100755
--- a/tools/pip_package/make_pip_package.sh
+++ b/tools/pip_package/make_pip_package.sh
@@ -1,5 +1,23 @@
 #!/usr/bin/env bash
 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
 # Assuming the script is run at mxnet/tools/pip_package
 # This script builds from scratch the dependencies of mxnet into static
 # librareis and statically links them to produce a (mostly) standalone
diff --git a/tools/pip_package/setup.py b/tools/pip_package/setup.py
index 45d761e..e4bf482 100644
--- a/tools/pip_package/setup.py
+++ b/tools/pip_package/setup.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # pylint: disable=invalid-name, exec-used
 """Setup mxnet package."""
 from __future__ import absolute_import

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].