You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by bg...@apache.org on 2022/09/16 11:01:47 UTC

[incubator-mxnet] branch master updated: Python string formatting (#21136)

This is an automated email from the ASF dual-hosted git repository.

bgawrych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new c8922fedff Python string formatting (#21136)
c8922fedff is described below

commit c8922fedff48cf0c4f15bf0e5fe805be8be34512
Author: hankaj <ha...@intel.com>
AuthorDate: Fri Sep 16 13:01:27 2022 +0200

    Python string formatting (#21136)
    
    * Improve string formatting
    
    * Another formatting fix
    
    * Minor fixes
    
    * Review suggestion
    
    * Review suggestions
    
    * Review suggestions once again
    
    * Review additional suggestions
    
    * A few more fixes
    
    * Remove f-strings from logging functions
    
    * Fix lack of 'f'
    
    * Minor fixes
    
    * Fix lint errors
---
 benchmark/opperf/opperf.py                         |   6 +-
 benchmark/opperf/utils/benchmark_utils.py          |   6 +-
 benchmark/opperf/utils/common_utils.py             |   9 +-
 benchmark/python/control_flow/rnn.py               |   8 +-
 benchmark/python/dnnl/fc_add.py                    |   4 +-
 benchmark/python/einsum/benchmark_einsum.py        |  20 ++--
 benchmark/python/ffi/benchmark_ffi.py              |  10 +-
 benchmark/python/metric/benchmark_metric.py        |  16 ++-
 benchmark/python/quantization/benchmark_op.py      |   9 +-
 benchmark/python/sparse/cast_storage.py            |   6 +-
 benchmark/python/sparse/dot.py                     |  35 +++---
 benchmark/python/sparse/sparse_op.py               |  18 ++--
 conftest.py                                        |   8 +-
 contrib/tvmop/opdef.py                             |   2 +-
 contrib/tvmop/space.py                             |  10 +-
 .../multi_threaded_inference/get_model.py          |   2 +-
 cpp-package/scripts/OpWrapperGenerator.py          |  40 +++----
 cpp-package/scripts/lint.py                        |   7 +-
 docs/python_docs/python/scripts/conf.py            |   6 +-
 docs/python_docs/python/scripts/md2ipynb.py        |   4 +-
 docs/python_docs/python/scripts/process_rst.py     |   2 +-
 .../distributed_training-horovod/gluon_mnist.py    |  17 ++-
 .../resnet50_imagenet.py                           |   2 +-
 example/distributed_training/cifar10_dist.py       |   2 +-
 .../distributed_training/cifar10_kvstore_hvd.py    |   2 +-
 example/extensions/lib_pass/test_pass.py           |   2 +-
 example/extensions/lib_subgraph/test_subgraph.py   |   6 +-
 .../house_prices/kaggle_k_fold_cross_validation.py |   7 +-
 example/gluon/image_classification.py              |  12 +--
 example/gluon/mnist/mnist.py                       |   6 +-
 example/gluon/super_resolution/super_resolution.py |   4 +-
 example/profiler/profiler_ndarray.py               |   6 +-
 example/quantization/imagenet_gen_qsym_onednn.py   |  31 +++---
 example/quantization/imagenet_inference.py         |  22 ++--
 example/recommenders/movielens_data.py             |  12 +--
 python/mxnet/_ffi/_ctypes/function.py              |   4 +-
 python/mxnet/_ffi/function.py                      |   2 +-
 python/mxnet/_ffi/node_generic.py                  |   2 +-
 python/mxnet/amp/amp.py                            |   8 +-
 python/mxnet/autograd.py                           |  12 +--
 python/mxnet/base.py                               |  41 ++++---
 python/mxnet/contrib/quantization.py               |  76 +++++++------
 python/mxnet/contrib/tensorboard.py                |   2 +-
 python/mxnet/contrib/text/embedding.py             |  42 ++++----
 python/mxnet/contrib/text/vocab.py                 |   2 +-
 python/mxnet/device.py                             |   2 +-
 python/mxnet/executor.py                           |   4 +-
 python/mxnet/gluon/block.py                        |  50 +++++----
 python/mxnet/gluon/contrib/estimator/estimator.py  |   6 +-
 .../mxnet/gluon/contrib/estimator/event_handler.py |  61 +++++------
 python/mxnet/gluon/data/_internal.py               |  14 ++-
 python/mxnet/gluon/data/batchify.py                |   4 +-
 python/mxnet/gluon/data/dataset.py                 |   9 +-
 python/mxnet/gluon/data/sampler.py                 |   4 +-
 python/mxnet/gluon/data/vision/datasets.py         |   5 +-
 python/mxnet/gluon/loss.py                         |   7 +-
 python/mxnet/gluon/metric.py                       |   8 +-
 python/mxnet/gluon/model_zoo/vision/__init__.py    |   3 +-
 python/mxnet/gluon/model_zoo/vision/densenet.py    |   2 +-
 python/mxnet/gluon/model_zoo/vision/mobilenet.py   |   4 +-
 python/mxnet/gluon/model_zoo/vision/resnet.py      |   7 +-
 python/mxnet/gluon/model_zoo/vision/squeezenet.py  |   2 +-
 python/mxnet/gluon/model_zoo/vision/vgg.py         |   2 +-
 python/mxnet/gluon/nn/basic_layers.py              |   8 +-
 python/mxnet/gluon/parameter.py                    | 120 ++++++++++-----------
 python/mxnet/gluon/rnn/conv_rnn_cell.py            |   2 +-
 python/mxnet/gluon/rnn/rnn_cell.py                 |   2 +-
 python/mxnet/gluon/rnn/rnn_layer.py                |   5 +-
 python/mxnet/gluon/trainer.py                      |  13 ++-
 python/mxnet/gluon/utils.py                        |   9 +-
 python/mxnet/image/detection.py                    |  11 +-
 python/mxnet/image/image.py                        |   2 +-
 python/mxnet/initializer.py                        |  11 +-
 python/mxnet/io/io.py                              |  10 +-
 python/mxnet/io/utils.py                           |   4 +-
 python/mxnet/kvstore/base.py                       |   8 +-
 python/mxnet/kvstore/kvstore_server.py             |   3 +-
 python/mxnet/library.py                            |   6 +-
 python/mxnet/lr_scheduler.py                       |   2 +-
 python/mxnet/model.py                              |  20 ++--
 python/mxnet/name.py                               |   2 +-
 python/mxnet/ndarray/contrib.py                    |  14 +--
 python/mxnet/ndarray/ndarray.py                    |  36 +++----
 python/mxnet/ndarray/numpy/_op.py                  |   2 +-
 .../mxnet/ndarray/numpy_extension/control_flow.py  |   4 +-
 python/mxnet/ndarray/random.py                     |   6 +-
 python/mxnet/ndarray/register.py                   |   8 +-
 python/mxnet/ndarray/sparse.py                     |  11 +-
 python/mxnet/ndarray_doc.py                        |   7 +-
 python/mxnet/numpy/function_base.py                |   3 +-
 python/mxnet/numpy/multiarray.py                   |   2 +-
 python/mxnet/onnx/mx2onnx/_export_onnx.py          |  10 +-
 .../_op_translations/_op_translations_opset12.py   |   4 +-
 .../_op_translations/_op_translations_opset13.py   |   2 +-
 python/mxnet/operator.py                           | 115 ++++++++++----------
 python/mxnet/optimizer/optimizer.py                |   9 +-
 python/mxnet/recordio.py                           |   4 +-
 python/mxnet/registry.py                           |  23 ++--
 python/mxnet/rtc.py                                |  15 ++-
 python/mxnet/symbol/contrib.py                     |  15 ++-
 python/mxnet/symbol/numpy/_symbol.py               |   8 +-
 python/mxnet/symbol/random.py                      |   6 +-
 python/mxnet/symbol/register.py                    |   8 +-
 python/mxnet/symbol/symbol.py                      |  72 ++++++-------
 python/mxnet/symbol_doc.py                         |   7 +-
 python/mxnet/test_utils.py                         |  75 ++++++-------
 python/mxnet/util.py                               |   2 +-
 python/setup.py                                    |   8 +-
 tests/nightly/TestDoc/doc_spell_checker.py         |   4 +-
 .../model_backwards_compatibility_check/common.py  |  16 +--
 .../model_backwards_compat_inference.py            |  28 ++---
 .../model_backwards_compat_train.py                |  10 +-
 tests/python/doctest/test_docstring.py             |   5 +-
 tests/python/gpu/test_extensions_gpu.py            |   6 +-
 tests/python/gpu/test_gluon_model_zoo_gpu.py       |   4 +-
 tests/python/gpu/test_numpy_fallback.py            |   4 +-
 tests/python/gpu/test_operator_gpu.py              |   6 +-
 tests/python/gpu/test_profiler_gpu.py              |   6 +-
 tests/python/onnx/test_models.py                   |   8 +-
 tests/python/train/test_autograd.py                |   2 +-
 tests/python/unittest/common.py                    |   8 +-
 tests/python/unittest/test_extensions.py           |  10 +-
 tests/python/unittest/test_gluon_model_zoo.py      |   2 +-
 tests/python/unittest/test_gluon_rnn.py            |  16 +--
 tests/python/unittest/test_memory_opt.py           |   8 +-
 tests/python/unittest/test_ndarray.py              |   8 +-
 tests/python/unittest/test_numpy_op.py             |   2 +-
 tests/python/unittest/test_operator.py             |  79 +++++++-------
 tests/python/unittest/test_profiler.py             |   8 +-
 tests/python/unittest/test_random.py               |  18 ++--
 tests/python/unittest/test_sparse_ndarray.py       |   2 +-
 tests/python/unittest/test_sparse_operator.py      |   2 +-
 tools/bandwidth/measure.py                         |   5 +-
 tools/im2rec.py                                    |  20 ++--
 tools/kill-mxnet.py                                |   4 +-
 tools/launch.py                                    |   4 +-
 tools/parse_log.py                                 |   8 +-
 tools/rec2idx.py                                   |   2 +-
 138 files changed, 835 insertions(+), 928 deletions(-)

diff --git a/benchmark/opperf/opperf.py b/benchmark/opperf/opperf.py
index 47bd970f93..8cc48694d5 100644
--- a/benchmark/opperf/opperf.py
+++ b/benchmark/opperf/opperf.py
@@ -193,9 +193,9 @@ def main():
                              'Valid Inputs - positive integers') 
 
     args = parser.parse_args()
-    logging.info("Running MXNet operator benchmarks with the following options: {args}".format(args=args))
+    logging.info(f"Running MXNet operator benchmarks with the following options: {args}")
     assert not os.path.isfile(args.output_file),\
-        "Output file {output_file} already exists.".format(output_file=args.output_file)
+        f"Output file {args.output_file} already exists."
 
     # 2. RUN BENCHMARKS
     ctx = _parse_mxnet_context(args.ctx)
@@ -218,7 +218,7 @@ def main():
     # 4. Generate list of MXNet operators not covered in benchmarks
     ops_not_covered = get_operators_with_no_benchmark(final_benchmark_results.keys())
     for idx, op in enumerate(ops_not_covered):
-        print("{idx}. {op}".format(idx=idx, op=op))
+        print(f"{idx}. {op}")
 
     return 0
 
diff --git a/benchmark/opperf/utils/benchmark_utils.py b/benchmark/opperf/utils/benchmark_utils.py
index fc756f42b4..99d6bc0cc9 100644
--- a/benchmark/opperf/utils/benchmark_utils.py
+++ b/benchmark/opperf/utils/benchmark_utils.py
@@ -189,7 +189,7 @@ def _run_operator_performance_test(op, inputs, run_backward, warmup, runs, kwarg
 
     # Run Benchmarks
     op_benchmark_result = {op.__name__: []}
-    logging.info("Begin Benchmark - {name}".format(name=op.__name__))
+    logging.info(f"Begin Benchmark - {op.__name__}")
 
     for idx, kwargs in enumerate(kwargs_list):
         _, profiler_output = benchmark_helper_func(op, runs, **kwargs)
@@ -199,7 +199,7 @@ def _run_operator_performance_test(op, inputs, run_backward, warmup, runs, kwarg
         new_inp = parse_input_ndarray(inputs[idx])
         profiler_output = merge_map_list([{"inputs": new_inp}] + [profiler_output])
         op_benchmark_result[op.__name__].append(profiler_output)
-    logging.info("Complete Benchmark - {name}".format(name=op.__name__))
+    logging.info(f"Complete Benchmark - {op.__name__}")
     return op_benchmark_result
 
 
@@ -250,7 +250,7 @@ def run_performance_test(ops, inputs, run_backward=True,
             kwargs_list = _prepare_op_inputs(inputs, run_backward, dtype, ctx, op.__module__)
             benchmark_result = _run_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler)
         else:
-            raise ValueError("Unknown {0} operator provided to benchmark. - {1}".format(op.__module__,  op.__name__))
+            raise ValueError(f"Unknown {op.__module__} operator provided to benchmark. - {op.__name__}")
         op_benchmark_result.append(benchmark_result)
     return op_benchmark_result
 
diff --git a/benchmark/opperf/utils/common_utils.py b/benchmark/opperf/utils/common_utils.py
index fcf52d4377..5f189f9e97 100644
--- a/benchmark/opperf/utils/common_utils.py
+++ b/benchmark/opperf/utils/common_utils.py
@@ -76,7 +76,7 @@ def save_to_file(inp_dict, out_filepath, out_format='json', runtime_features=Non
         with open(out_filepath, "w") as result_file:
             result_file.write(_prepare_markdown(inp_dict, runtime_features, profiler))
     else:
-        raise ValueError("Invalid output file format provided - '{}'. Supported - json, md".format(format))
+        raise ValueError(f"Invalid output file format provided - '{out_format}'. Supported - json, md")
 
 
 def get_json(inp_dict):
@@ -126,10 +126,9 @@ def _prepare_op_benchmark_result(op, op_bench_result, profiler):
 
     result = ""
     if profiler == "native":
-        result = "| {} | {} | {} | {} | {} |".format(operator_name,
-                 inputs, max_mem_usage, avg_forward_time, avg_backward_time)
+        result = f"| {operator_name} | {inputs} | {max_mem_usage} | {avg_forward_time} | {avg_backward_time} |"
     elif profiler == "python":
-        result = "| {} | {} | {} | {} | {} | {} |".format(operator_name, avg_time, p50_time, p90_time, p99_time, inputs)
+        result = f"| {operator_name} | {avg_time} | {p50_time} | {p90_time} | {p99_time} | {inputs} |"
     return result
 
 
@@ -139,7 +138,7 @@ def _prepare_markdown(results, runtime_features=None, profiler='native'):
         results_markdown.append("# Runtime Features")
         idx = 0
         for key, value in runtime_features['runtime_features'].items():
-            results_markdown.append('{}. {} : {}'.format(idx, key, value))
+            results_markdown.append(f'{idx}. {key} : {value}')
 
     results_markdown.append("# Benchmark Results")
     if profiler == 'native':
diff --git a/benchmark/python/control_flow/rnn.py b/benchmark/python/control_flow/rnn.py
index 8f69e6be15..80134ad82c 100644
--- a/benchmark/python/control_flow/rnn.py
+++ b/benchmark/python/control_flow/rnn.py
@@ -97,7 +97,8 @@ def run_benchmark(cell_type, ctx, seq_len, batch_size, hidden_dim):
         layer.initialize(ctx=ctx)
         if is_hyb_layer:
             layer.hybridize(static_alloc=True)
-        print("is_train = %r, hybridize_cell = %r, hybridize_layer = %r" % (is_train, is_hyb_cell, is_hyb_layer))
+        print(
+            f"is_train = {repr(is_train)}, hybridize_cell = {repr(is_hyb_cell)}, hybridize_layer = {repr(is_hyb_layer)}")
         times = []
         for _ in range(args.warmup_rounds + args.test_rounds):
             tick = time()
@@ -112,7 +113,7 @@ def run_benchmark(cell_type, ctx, seq_len, batch_size, hidden_dim):
             tock = time()
             times.append((tock - tick) * 1000.0)
         times = times[args.warmup_rounds: ]
-        print("Time used: mean = %.3f ms, std = %.3f ms" % (onp.mean(times), onp.std(times)))
+        print(f"Time used: mean = {onp.mean(times):.3f} ms, std = {onp.std(times):.3f} ms")
 
 
 def main():
@@ -131,8 +132,7 @@ def main():
     for cell_type, ctx, seq_len, batch_size, hidden_dim in product(  \
         cell_types, ctxs, seq_lens, batch_sizes, hidden_dims):
         print("--------------------------------------")
-        print("cell: %s  ctx: %s  length: %d  batch size: %d dim: %d" % \
-              (cell_type.__name__, str(ctx), seq_len, batch_size, hidden_dim))
+        print(f"cell: {cell_type.__name__}  ctx: {str(ctx)}  length: {seq_len}  batch size: {batch_size} dim: {hidden_dim}")
         run_benchmark(cell_type, ctx, seq_len, batch_size, hidden_dim)
 
 
diff --git a/benchmark/python/dnnl/fc_add.py b/benchmark/python/dnnl/fc_add.py
index 6cf2f929ec..4318dff042 100644
--- a/benchmark/python/dnnl/fc_add.py
+++ b/benchmark/python/dnnl/fc_add.py
@@ -59,9 +59,9 @@ def print_header(header):
 
 def print_value(shape, hidden, mean):
     if table_left_colums:
-        print("| ({:4},{:4}) | {:6} | {:9.3f} |".format(shape[0], shape[1], hidden, mean))
+        print(f"| ({shape[0]:4},{shape[1]:4}) | {hidden:6} | {mean:9.3f} |")
     else:
-        print(" {:9.3f} |".format(mean))
+        print(f" {mean:9.3f} |")
 
 
 def measure(net, data0, data1, data2, shape, nhid):
diff --git a/benchmark/python/einsum/benchmark_einsum.py b/benchmark/python/einsum/benchmark_einsum.py
index 3d1a708d86..0de1efa4b9 100644
--- a/benchmark/python/einsum/benchmark_einsum.py
+++ b/benchmark/python/einsum/benchmark_einsum.py
@@ -38,7 +38,7 @@ def test_np_einsum():
     a = np.ones(64).reshape(2,4,8)
     args = ['ijk,ilm,njm,nlk,abc->', a, a, a, a, a]
     cost = measure_cost(500, np.einsum, *args)
-    print("Basic einsum: {} ms".format(cost * 1000))
+    print(f"Basic einsum: {cost * 1000} ms")
 
     # Sub-optimal einsum
     # cost = measure_cost(500, np.einsum, *args, optimize='optimal')
@@ -46,40 +46,40 @@ def test_np_einsum():
 
     # Greedy einsum
     cost = measure_cost(500, np.einsum, *args, optimize=True)
-    print("Greedy einsum: {} ms".format(cost * 1000))
+    print(f"Greedy einsum: {cost * 1000} ms")
 
     print("RNN Use Case:")
     a = np.random.uniform(0, 1, size=(64, 128, 512))
     b = np.random.uniform(0, 1, size=(128, 512, 2, 2))
     args = ['bij, ijkl->bkl', a, b]
     cost = measure_cost(2, np.einsum, *args, optimize=True)
-    print('Greedy einsum: {} ms'.format(cost * 1000))
+    print(f'Greedy einsum: {cost * 1000} ms')
     cost = measure_cost(2, np.einsum, *args)
-    print('Basic einsum: {} ms'.format(cost * 1000))
+    print(f'Basic einsum: {cost * 1000} ms')
 
     print('Inner Product:')
     a = np.ones(6000000)
     b = np.ones(6000000)
     args = [a, b]
     cost = measure_cost(50, np.tensordot, *args, axes=([0],[0]))
-    print('Tensordot: {} ms'.format(cost * 1000))
+    print(f'Tensordot: {cost * 1000} ms')
     args = ['i, i', a, b]
     cost = measure_cost(50, np.einsum, *args, optimize=True)
-    print('Greedy einsum: {} ms'.format(cost * 1000))
+    print(f'Greedy einsum: {cost * 1000} ms')
     cost = measure_cost(50, np.einsum, *args)
-    print('Basic einsum: {} ms'.format(cost * 1000))
+    print(f'Basic einsum: {cost * 1000} ms')
 
     print('Matrix Product:')
     a = np.ones(600000).reshape(200, 3000)
     b = np.ones(600000).reshape(3000, 200)
     args = [a, b]
     cost = measure_cost(50, np.tensordot, *args, axes=([1],[0]))
-    print('Tensordot: {} ms'.format(cost * 1000))
+    print(f'Tensordot: {cost * 1000} ms')
     args = ['ij, jk', a, b]
     cost = measure_cost(50, np.einsum, *args, optimize=True)
-    print('Greedy einsum: {} ms'.format(cost * 1000))
+    print(f'Greedy einsum: {cost * 1000} ms')
     cost = measure_cost(50, np.einsum, *args)
-    print('Basic einsum: {} ms'.format(cost * 1000))
+    print(f'Basic einsum: {cost * 1000} ms')
 
 
 if __name__ == "__main__":
diff --git a/benchmark/python/ffi/benchmark_ffi.py b/benchmark/python/ffi/benchmark_ffi.py
index 919c72f34a..7ede83c582 100644
--- a/benchmark/python/ffi/benchmark_ffi.py
+++ b/benchmark/python/ffi/benchmark_ffi.py
@@ -31,7 +31,7 @@ class OpArgMngr(object):
             _specifier = kwargs["_specififer"]
             del kwargs["_specififer"]
         if _specifier in OpArgMngr.args:
-            raise ValueError("duplicate {}".format(_specifier))
+            raise ValueError(f"duplicate {_specifier}")
         OpArgMngr.args[_specifier] = {'args': args, 'kwargs': kwargs, 'funcname': funcname}
 
 
@@ -43,7 +43,7 @@ def generate_workloads():
     for shape in shapes:
         name = 'x'.join(str(i) for i in shape)
         if name in array_pool:
-            raise ValueError("duplicate array {}".format(name))
+            raise ValueError(f"duplicate array {name}")
         array_pool[name] = dnp.ones(shape)
     return array_pool
 
@@ -229,7 +229,7 @@ def run_benchmark(packages):
     for (k, v) in OpArgMngr.args.items():
         result = {}
         for (name, package) in packages.items():
-            print('{}.{} running...'.format(name, k))
+            print(f'{name}.{k} running...')
             op = get_op(package["module"], v["funcname"])
             args = [package["data"](arg) for arg in v["args"]]
             kwargs = {k: package["data"](v) for (k, v) in v["kwargs"].items()}
@@ -240,10 +240,10 @@ def run_benchmark(packages):
 
 
 def show_results(results):
-    print("{:>24}{:>24}{:>24}".format("name", "package", "time(us)"))
+    print(f'{"name":>24}{"package":>24}{"time(us)":>24}')
     for (specifier, d) in results.items():
         for (k, v) in d.items():
-            print("{:>24}{:>24}{:>24}".format(specifier, k, v * 10 ** 6))
+            print(f"{specifier:>24}{k:>24}{v * 10 ** 6:>24}")
 
 
 if __name__ == "__main__":
diff --git a/benchmark/python/metric/benchmark_metric.py b/benchmark/python/metric/benchmark_metric.py
index 3c9abf6e3c..8503c178a3 100644
--- a/benchmark/python/metric/benchmark_metric.py
+++ b/benchmark/python/metric/benchmark_metric.py
@@ -75,12 +75,10 @@ def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs):
         metric.update([label] * i, [pred] * i)
         mx.nd.waitall()
         elapsed = time.time() - before
-        elapsed_str = "{:<.5}".format(elapsed)
+        elapsed_str = f"{elapsed:<.5}"
     except mx.MXNetError:
         elapsed_str = "FAILED"
-    print("{metric:<15}{pctx:<10}{lctx:<12}{niter:<12}{bs:<15}{out_dim:<15}{elapsed:<}".format(
-        metric=name, pctx=str(pred_ctx), lctx=str(label_ctx), niter=i * n, bs=data_gen.batch_size,
-        out_dim=data_gen.output_dim, elapsed=elapsed_str), file=sys.stderr)
+    print(f"{name:<15}{pred_ctx:<10}{label_ctx:<12}{i * n:<12}{data_gen.batch_size:<15}{data_gen.output_dim:<15}{elapsed_str:<}", file=sys.stderr)
 
 
 def test_metric_performance():
@@ -107,14 +105,12 @@ def test_metric_performance():
 
     print("\nmx.gluon.metric benchmarks", file=sys.stderr)
     print(
-        "{:15}{:10}{:12}{:12}{:15}{:15}{}".format(
-            'Metric', 'Data-Ctx', 'Label-Ctx', 'Data Size', 'Batch Size', 'Output Dim', 'Elapsed Time'),
+        f"{'Metric':15}{'Data-Ctx':10}{'Label-Ctx':12}{'Data Size':12}{'Batch Size':15}{'Output Dim':15}{'Elapsed Time'}",
         file=sys.stderr)
-    print("{:-^90}".format(''), file=sys.stderr)
+    print(f"{'':-^90}", file=sys.stderr)
     for k, v in metrics:
         for c in output_dims:
             for n in batch_sizes:
                 for pred_ctx, label_ctx in itertools.product(ctxs, ctxs):
-                    run_metric(k, v[1], (data_size * 128)//(n * c), n, c, pred_ctx, label_ctx, **v[0])
-                print("{:-^90}".format(''), file=sys.stderr)
-
+                    run_metric(k, v[1], (data_size * 128), (n * c), n, c, pred_ctx, label_ctx, **v[0])
+                print(f"{'':-^90}", file=sys.stderr)
diff --git a/benchmark/python/quantization/benchmark_op.py b/benchmark/python/quantization/benchmark_op.py
index 5ba7740cc9..de955eec55 100644
--- a/benchmark/python/quantization/benchmark_op.py
+++ b/benchmark/python/quantization/benchmark_op.py
@@ -61,11 +61,10 @@ def benchmark_convolution(data_shape, kernel, num_filter, pad, stride, no_bias=T
                              grad_req='null', typ='forward') * 1000
 
     print('==================================================================================================')
-    print('data=%s, kernel=%s, num_filter=%s, pad=%s, stride=%s, no_bias=%s, layout=%s, repeats=%s'
-          % (data_shape, kernel, num_filter, pad, stride, no_bias, layout, repeats))
-    print('%s , ctx=%s, time=%.2f ms' % (conv_cudnn.name + '-FP32', ctx_gpu, conv_cudnn_time))
-    print('%s, ctx=%s, time=%.2f ms' % (quantized_conv2d.name, ctx_gpu, qconv_time))
-    print('quantization speedup:               %.1fX' % (conv_cudnn_time / qconv_time))
+    print(f'data={data_shape}, kernel={kernel}, num_filter={num_filter}, pad={pad}, stride={stride}, no_bias={no_bias}, layout={layout}, repeats={repeats}')
+    print(f'{conv_cudnn.name}-FP32 , ctx={ctx_gpu}, time={conv_cudnn_time:.2f} ms')
+    print(f'{quantized_conv2d.name}, ctx={ctx_gpu}, time={qconv_time:.2f} ms')
+    print(f'quantization speedup:               {conv_cudnn_time / qconv_time:.1f}X')
     print('\n')
 
 
diff --git a/benchmark/python/sparse/cast_storage.py b/benchmark/python/sparse/cast_storage.py
index 6f4fc413ed..5b3d92bc35 100644
--- a/benchmark/python/sparse/cast_storage.py
+++ b/benchmark/python/sparse/cast_storage.py
@@ -51,7 +51,7 @@ def run_cast_storage_synthetic():
 
         # start benchmarking
         cost = measure_cost(repeat, mx.nd.cast_storage, dns_data, stype)
-        results = '{:10.1f} {:>10} {:8d} {:8d} {:10.2f}'.format(density*100, str(ctx), m, n, cost*1000)
+        results = f'{density*100:10.1f} {str(ctx):>10} {m:8d} {n:8d} {cost * 1000:10.2f}'
         print(results)
 
     check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads)))
@@ -82,10 +82,10 @@ def run_cast_storage_synthetic():
             stype = 'row_sparse'
             print(" cast_storage benchmark: dense to rsp, size m x n ")
         else:
-            print("invalid benchmark: %s" %b)
+            print(f"invalid benchmark: {b}")
             continue
         print("==================================================")
-        headline = '{:>10} {:>10} {:>8} {:>8} {:>10}'.format('density(%)', 'context', 'm', 'n', 'time(ms)')
+        headline = f"{'density(%)':>10} {'context':>10} {'m':>8} {'n':>8} {'time(ms)':>10}"
         print(headline)
         for i in range(len(n)):
             for ctx in contexts:
diff --git a/benchmark/python/sparse/dot.py b/benchmark/python/sparse/dot.py
index a2dfd03a6b..926dc2b186 100644
--- a/benchmark/python/sparse/dot.py
+++ b/benchmark/python/sparse/dot.py
@@ -72,7 +72,7 @@ CRITEO = {
     'data_mini': 'criteo.t.mini',
     'data_name': 'criteo.t',
     'data_origin_name': 'criteo.t.bz2',
-    'url' : "https://s3-us-west-2.amazonaws.com/sparse-dataset/criteo.t.bz2",
+    'url': "https://s3-us-west-2.amazonaws.com/sparse-dataset/criteo.t.bz2",
     'feature_dim': 8388621,
     'm': [1, 8, 16, 32, 64],
     'batch_size': [64, 128],
@@ -148,11 +148,10 @@ def _compare_sparse_dense(data_dir, file_name, mini_file_name, feature_dim,
             last = _line_count(path) - num_batches * batch_size
             last = last if last >= 1 else 1
             start = int(rnd.uniform(1, last))
-            os.system("sed -n '%d,%dp' %r > %r"
-                      %(start, start + num_batches * batch_size, path, mini_path))
+            os.system("sed -n '{},{}p' {} > {}".format(
+                start, start + num_batches * batch_size, repr(path), repr(mini_path)))
             assert os.path.exists(mini_path)
 
-
     def run_benchmark(mini_path):
         """Run benchmarks
         """
@@ -181,7 +180,6 @@ def _compare_sparse_dense(data_dir, file_name, mini_file_name, feature_dim,
         average_cost["dense"] = total_cost["dense"] / count
         return (average_cost["sparse"], average_cost["dense"])
 
-
     def print_result(average_cost_sparse, average_cost_dense):
         """Print result of comparison between sparse and dense
         """
@@ -224,17 +222,16 @@ def test_dot_real(data_dict):
     assert default_batch_size_index < len(batch_size_list)
     assert default_output_index < len(m)
     if ARGS.verbose:
-        print("Running Benchmarking on %r data") % data_dict['data_mini']
+        print(f"Running Benchmarking on {repr(data_dict['data_mini'])} data")
     print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'.format('density(%)',
-                                                                                 'n',
-                                                                                 'm',
-                                                                                 'k',
-                                                                                 't_dense/t_sparse',
-                                                                                 't_dense(ms)',
-                                                                                 't_sparse(ms)',
-                                                                                 'is_transpose',
-                                                                                 'rhs_rsp'))
-
+                                                                                  'n',
+                                                                                  'm',
+                                                                                  'k',
+                                                                                  't_dense/t_sparse',
+                                                                                  't_dense(ms)',
+                                                                                  't_sparse(ms)',
+                                                                                  'is_transpose',
+                                                                                  'rhs_rsp'))
 
     for output_dim in m:
         _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
@@ -324,8 +321,9 @@ def test_dot_synthetic(data_dict):
     def print_benchmark_info(lhs, rhs, lhs_trans, fw):
         trans_str = "^T" if lhs_trans else ""
         print("========================================================")
-        print("  %s sparse dot benchmark: dot(%s, %s) = %s  ") % (fw, lhs, rhs, rhs)
-        print("  (matrix multiplication: (m x k)%s * (k x n) = m x n)  ") % (trans_str)
+        print(f"  {fw} sparse dot benchmark: dot({lhs}, {rhs}) = {rhs}  ")
+        print(
+            f"  (matrix multiplication: (m x k){trans_str} * (k x n) = m x n)  ")
         print("========================================================")
         headline_pattern = '{:>15} {:>15} {:>10} {:>8} {:>8} {:>8} {:>13} {:>13} {:>8}'
         headline = headline_pattern.format('lhs_density(%)',
@@ -337,7 +335,6 @@ def test_dot_synthetic(data_dict):
                                            'speedup')
         print(headline)
 
-
     def run_benchmark(ctx=None, lhs="csr", lhs_trans=False, rhs="dns", fw="mxnet", rhs_density=1,
                       distribution="uniform"):
 
@@ -463,4 +460,4 @@ if __name__ == "__main__":
     test_dot_synthetic(SYNTHETIC1)
     test_dot_synthetic(SYNTHETIC2)
     total_time = time.time() - begin_time
-    print("total time is %f") % total_time
+    print(f"total time is {total_time}")
diff --git a/benchmark/python/sparse/sparse_op.py b/benchmark/python/sparse/sparse_op.py
index 6c4fe8188c..b01120db0d 100644
--- a/benchmark/python/sparse/sparse_op.py
+++ b/benchmark/python/sparse/sparse_op.py
@@ -94,12 +94,12 @@ def test_dot_real(data_dict):
 
     mini_path = os.path.join(data_dir, data_dict['data_mini'])
     if not os.path.exists(mini_path):
-        os.system("head -n 2000 %r > %r" % (path, mini_path))
+        os.system(f"head -n 2000 {repr(path)} > {repr(mini_path)}")
         assert os.path.exists(mini_path)
 
-    print("Running Benchmarking on %r data" % data_dict['data_mini'])
+    print(f"Running Benchmarking on {repr(data_dict['data_mini'])} data")
     for batch_size in data_dict['batch_size']:  # iterator through different batch size of choice
-        print("batch_size is %d" % batch_size)
+        print(f"batch_size is {batch_size}")
         # model
         data_shape = (k, )
         train_iter = get_iter(mini_path, data_shape, batch_size)
@@ -129,8 +129,8 @@ def test_dot_real(data_dict):
         t_dense = costs[1]
         ratio = t_dense / t_sparse
         print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse')
-        fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f"
-        print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, t_sparse))
+        fmt = "{:0.4f}\t\t{}\t{}\t{}\t{:0.2f}\t\t\t{:0.4f}\t{:0.6f}"
+        print(fmt.format(density * 100, batch_size, m, k, ratio, t_dense, t_sparse))
 
 
 def test_dot_synthetic():
@@ -179,8 +179,8 @@ def test_dot_synthetic():
         cost = measure_cost_forward_baseline(repeat, sp.spmatrix.dot, lhs_csr_sp, rhs_dns_np)
         costs_baseline.append(cost)
         ratio_baseline = costs_baseline[0] / costs_baseline[1]
-        fmt = "%0.1f\t\t%s\t%d\t%d\t%d\t%0.2f\t\t\t%0.2f\t%0.5f\t\t%0.2f\t\t\t\t%0.6f\t%0.5f"
-        print(fmt % (density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1],
+        fmt = "{:0.1f}\t\t{}\t{}\t{}\t{}\t{:0.2f}\t\t\t{:0.2f}\t{:0.5f}\t\t{:0.2f}\t\t\t\t{:0.6f}\t{:0.5f}"
+        print(fmt.format(density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1],
                      ratio_baseline, costs_baseline[0], costs_baseline[1]))
 
     def bench_dot_backward(m, k, n, density, ctx, repeat):
@@ -208,8 +208,8 @@ def test_dot_synthetic():
         cost = measure_cost_backward_baseline(repeat, sp.spmatrix.dot, sp.spmatrix.transpose, lhs_csr_sp, rhs_dns_np)
         costs_baseline.append(cost)
         ratio_baseline = costs_baseline[0] / costs_baseline[1]
-        fmt = "%0.1f\t\t%s\t%d\t%d\t%d\t%0.2f\t\t\t%0.2f\t%0.5f\t\t%0.2f\t\t\t\t%0.6f\t%0.5f"
-        print(fmt % (density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1],
+        fmt = "{:0.1f}\t\t{}\t{}\t{}\t{}\t{:0.2f}\t\t\t{:0.2f}\t{:0.5f}\t\t{:0.2f}\t\t\t\t{:0.6f}\t{:0.5f}"
+        print(fmt.format(density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1],
                      ratio_baseline, costs_baseline[0], costs_baseline[1]))
 
     print("A = sparse NDArray of shape(m, k)")
diff --git a/conftest.py b/conftest.py
index c8814c4d8b..bfe2e2387f 100644
--- a/conftest.py
+++ b/conftest.py
@@ -128,7 +128,7 @@ def module_scope_seed(request):
         # shown to users. https://github.com/pytest-dev/pytest/issues/7819
         print('*** module-level seed is set: all tests running deterministically ***')
     print('Setting module np/mx/python random seeds, '
-                    'use MXNET_MODULE_SEED={} to reproduce.'.format(seed))
+          f'use MXNET_MODULE_SEED={seed} to reproduce.')
     old_state = random.getstate()
     random.seed(seed)
     try:
@@ -204,8 +204,7 @@ def function_scope_seed(request):
     except:
         logging.warning('Unable to import numpy/mxnet. Skip setting function-level seed.')
 
-    seed_message = 'Setting np/mx/python random seeds to {}. Use MXNET_TEST_SEED={} to reproduce.'
-    seed_message = seed_message.format(seed, seed)
+    seed_message = f'Setting np/mx/python random seeds to {seed}. Use MXNET_TEST_SEED={seed} to reproduce.'
 
     # Always log seed on DEBUG log level. This makes sure we can find out the
     # value of the seed even if the test case causes a segfault and subsequent
@@ -220,8 +219,7 @@ def function_scope_seed(request):
         # Either request.node.rep_setup.failed or request.node.rep_setup.passed should be True
         assert request.node.rep_setup.passed
         # On failure also log seed on WARNING log level
-        error_message = 'Error seen with seeded test, use MXNET_TEST_SEED={} to reproduce'
-        error_message = error_message.format(seed)
+        error_message = f'Error seen with seeded test, use MXNET_TEST_SEED={seed} to reproduce'
         logging.warning(error_message)
 
     random.setstate(old_state)
diff --git a/contrib/tvmop/opdef.py b/contrib/tvmop/opdef.py
index 7a7c27fe95..f725ba6f15 100644
--- a/contrib/tvmop/opdef.py
+++ b/contrib/tvmop/opdef.py
@@ -102,7 +102,7 @@ class OpDef:
                     yield sch, args, subname
 
     def get_op_name(self, name, args):
-        return name + ''.join(["%s_%d" % (arg.dtype, len(arg.shape)) for arg in args if hasattr(arg, 'shape')])
+        return name + ''.join([f"{arg.dtype}_{len(arg.shape)}" for arg in args if hasattr(arg, 'shape')])
 
     def get_config_spaces(self):
         for each_kwargs in self.arg_combination:
diff --git a/contrib/tvmop/space.py b/contrib/tvmop/space.py
index 589b931bc3..a1c05d4c50 100644
--- a/contrib/tvmop/space.py
+++ b/contrib/tvmop/space.py
@@ -33,7 +33,7 @@ class OtherOptionSpace(object):
         return len(self.entities)
 
     def __repr__(self):
-        return "OtherOption(%s) len=%d" % (self.entities, len(self))
+        return f"OtherOption({self.entities}) len={len(self)}"
 
 
 class OtherOptionEntity(object):
@@ -97,9 +97,9 @@ class ConfigSpace(object):
         return self._length
 
     def __repr__(self):
-        res = "ConfigSpace (len=%d, space_map=\n" % len(self)
+        res = f"ConfigSpace (len={len(self)}, space_map=\n"
         for i, (name, space) in enumerate(self.space_map.items()):
-            res += "  %2d %s: %s\n" % (i, name, space)
+            res += f"  {i:2} {name}: {space}\n"
         return res + ")"
 
     def to_json_dict(self):
@@ -172,9 +172,9 @@ class ConfigSpaces(object):
         return len(self.spaces)
 
     def __repr__(self):
-        res = "ConfigSpaces (len=%d, config_space=\n" % len(self)
+        res = f"ConfigSpaces (len={len(self)}, config_space=\n"
         for i, (key, val) in enumerate(self.spaces.items()):
-            res += "  %2d %s:\n %s\n" % (i, key, val)
+            res += f"  {i:2} {key}:\n {val}\n"
         return res + ")"
 
     def to_json_dict(self):
diff --git a/cpp-package/example/inference/multi_threaded_inference/get_model.py b/cpp-package/example/inference/multi_threaded_inference/get_model.py
index 75a5d039c6..e6c2949cbd 100644
--- a/cpp-package/example/inference/multi_threaded_inference/get_model.py
+++ b/cpp-package/example/inference/multi_threaded_inference/get_model.py
@@ -78,7 +78,7 @@ def download(url, fname=None, dirname=None, overwrite=False, retries=5):
         # pylint: disable=W0703
         try:
             r = requests.get(url, stream=True)
-            assert r.status_code == 200, "failed to open %s" % url
+            assert r.status_code == 200, f"failed to open {url}"
             with open(fname, 'wb') as f:
                 for chunk in r.iter_content(chunk_size=1024):
                     if chunk: # filter out keep-alive new chunks
diff --git a/cpp-package/scripts/OpWrapperGenerator.py b/cpp-package/scripts/OpWrapperGenerator.py
index 9b08e6b487..e50af2242c 100644
--- a/cpp-package/scripts/OpWrapperGenerator.py
+++ b/cpp-package/scripts/OpWrapperGenerator.py
@@ -48,12 +48,12 @@ class EnumType:
             for i in range(0, len(self.enumValues)):
                 self.enumValues[i] = self.enumValues[i].strip().strip("'")
         else:
-            logging.warn("trying to parse none-enum type as enum: %s" % typeString)
+            logging.warn(f"trying to parse none-enum type as enum: {typeString}")
     def GetDefinitionString(self, indent = 0):
         indentStr = ' ' * indent
-        ret = indentStr + 'enum class %s {\n' % self.name
+        ret = indentStr + 'enum class {} {{\n'.format(self.name)
         for i in range(0, len(self.enumValues)):
-            ret = ret + indentStr + '  %s = %d' % (gen_enum_value(self.enumValues[i]), i)
+            ret = ret + indentStr + f'  {gen_enum_value(self.enumValues[i])} = {i}'
             if (i != len(self.enumValues) -1):
                 ret = ret + ","
             ret = ret + "\n"
@@ -63,16 +63,16 @@ class EnumType:
         return self.name + "::" + gen_enum_value(value)
     def GetEnumStringArray(self, indent = 0):
         indentStr = ' ' * indent
-        ret = indentStr + 'static const char *%sValues[] = {\n' % self.name
+        ret = indentStr + 'static const char *{}Values[] = {{\n'.format(self.name)
         for i in range(0, len(self.enumValues)):
-            ret = ret + indentStr + '  "%s"' % self.enumValues[i]
+            ret = ret + indentStr + f'  "{self.enumValues[i]}"'
             if (i != len(self.enumValues) -1):
                 ret = ret + ","
             ret = ret + "\n"
         ret = ret + indentStr + "};\n"
         return ret
     def GetConvertEnumVariableToString(self, variable=''):
-        return "%sValues[int(%s)]" % (self.name, variable)
+        return f"{self.name}Values[int({variable})]"
 
 
 class Arg:
@@ -120,7 +120,7 @@ class Arg:
             try:
                 self.type = self.typeDict[typeString.split(',')[0]]
             except:
-                print('argument "%s" of operator "%s" has unknown type "%s"' % (argName, opName, typeString))
+                print(f'argument "{argName}" of operator "{opName}" has unknown type "{typeString}"')
                 pass
         if typeString.find('default=') != -1:
             self.hasDefault = True
@@ -244,7 +244,7 @@ class Op:
         ret = ret + " * \\return new symbol\n"
         ret = ret + " */\n"
         # create function header
-        declFirstLine = indentStr + 'inline Symbol %s(' % self.name
+        declFirstLine = indentStr + f'inline Symbol {self.name}('
         ret = ret + declFirstLine
         argIndentStr = ' ' * len(declFirstLine)
         arg_start = 0 if use_name else 1
@@ -260,7 +260,7 @@ class Op:
             if arg.isEnum:
                 ret = ret + arg.enum.GetEnumStringArray(indent + 2)
         # now generate code
-        ret = ret + indentStr + '  return Operator(\"%s\")\n' % self.name
+        ret = ret + indentStr + f'  return Operator(\"{self.name}\")\n'
         for arg in self.args:   # set params
             if arg.type == 'Symbol' or \
                 arg.type == 'const std::string&' or \
@@ -270,7 +270,7 @@ class Op:
             if arg.isEnum:
                 v = arg.enum.GetConvertEnumVariableToString(v)
             ret = ret + indentStr + ' ' * 11 + \
-                '.SetParam(\"%s\", %s)\n' % (arg.name, v)
+                f'.SetParam(\"{arg.name}\", {v})\n'
         #ret = ret[:-1]  # get rid of the last \n
         symbols = ''
         inputAlreadySet = False
@@ -282,15 +282,15 @@ class Op:
             #    symbols = symbols + ', '
             #symbols = symbols + arg.name
             ret = ret + indentStr + ' ' * 11 + \
-                '.SetInput(\"%s\", %s)\n' % (arg.name, arg.name)
+                f'.SetInput(\"{arg.name}\", {arg.name})\n'
         for arg in self.args:   # set input arrays vector<Symbol>
             if arg.type != 'const std::vector<Symbol>&':
                 continue
             if (inputAlreadySet):
-                logging.error("op %s has both Symbol[] and Symbol inputs!" % self.name)
+                logging.error(f"op {self.name} has both Symbol[] and Symbol inputs!")
             inputAlreadySet = True
             symbols = arg.name
-            ret = ret + '(%s)\n' % symbols
+            ret = ret + f'({symbols})\n'
         ret = ret + indentStr + ' ' * 11
         if use_name:
             ret = ret + '.CreateSymbol(symbol_name);\n'
@@ -300,7 +300,7 @@ class Op:
         return ret
 
     def GetArgString(self, arg):
-        ret = '%s %s' % (arg.type, arg.name)
+        ret = f'{arg.type} {arg.name}'
         if arg.hasDefault:
             ret = ret + ' = ' + arg.defaultString
         return ret
@@ -412,12 +412,12 @@ if __name__ == "__main__":
                       "#include \"dmlc/optional.h\"\n"
                       "#include \"nnvm/tuple.h\"\n"
                       "\n"
-                      "namespace mxnet {\n"
-                      "namespace cpp {\n"
+                      "namespace mxnet {{\n"
+                      "namespace cpp {{\n"
                       "\n"
-                      "%s"
-                      "} //namespace cpp\n"
-                      "} //namespace mxnet\n"
+                      "{}"
+                      "}} //namespace cpp\n"
+                      "}} //namespace mxnet\n"
                       "#endif  // MXNET_CPP_OP_H_\n")
 
         # Generate a temporary file name
@@ -425,7 +425,7 @@ if __name__ == "__main__":
         temp_file_name = tf.name
         tf.close()
         with codecs.open(temp_file_name, 'w', 'utf-8') as f:
-            f.write(patternStr % ParseAllOps())
+            f.write(patternStr.format(ParseAllOps()))
     except Exception as e:
       if (os.path.exists(output_file)):
         os.remove(output_file)
diff --git a/cpp-package/scripts/lint.py b/cpp-package/scripts/lint.py
index f6e549878a..e0170cbf62 100644
--- a/cpp-package/scripts/lint.py
+++ b/cpp-package/scripts/lint.py
@@ -42,12 +42,11 @@ class LintHelper(object):
         if len(result_map) == 0:
             return 0
         npass = len([x for k, x in result_map.iteritems() if len(x) == 0])
-        strm.write('=====%d/%d %s files passed check=====\n' % (npass, len(result_map), ftype))
+        strm.write(f'====={npass}/{len(result_map)} {ftype} files passed check=====\n')
         for fname, emap in result_map.iteritems():
             if len(emap) == 0:
                 continue
-            strm.write('%s: %d Errors of %d Categories map=%s\n' % (
-                fname, sum(emap.values()), len(emap), str(emap)))
+            strm.write(f'{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}\n')
         return len(result_map) - npass
 
     def __init__(self):
@@ -114,7 +113,7 @@ class LintHelper(object):
         if nerr == 0:
             strm.write('All passed!\n')
         else:
-            strm.write('%d files failed lint\n' % nerr)
+            strm.write(f'{nerr} files failed lint\n')
         return nerr
 
 # singleton helper for lint check
diff --git a/docs/python_docs/python/scripts/conf.py b/docs/python_docs/python/scripts/conf.py
index 74fbcdd4df..190065d6ee 100644
--- a/docs/python_docs/python/scripts/conf.py
+++ b/docs/python_docs/python/scripts/conf.py
@@ -29,9 +29,9 @@ MOCK_MODULES = ['scipy', 'scipy.sparse', 'sklearn']
 needs_sphinx = '1.5.6'
 
 # General information about the project.
-project = u'Apache MXNet'
-author = u'%s developers' % project
-copyright = u'2015-2020, %s' % author
+project = 'Apache MXNet'
+author = f'{project} developers'
+copyright = f'2015-2020, {author}'
 github_doc_root = 'https://github.com/apache/incubator-mxnet/tree/master/docs/'
 doc_root = 'https://mxnet.apache.org/'
 
diff --git a/docs/python_docs/python/scripts/md2ipynb.py b/docs/python_docs/python/scripts/md2ipynb.py
index 8cc341b74e..3b2b57476b 100644
--- a/docs/python_docs/python/scripts/md2ipynb.py
+++ b/docs/python_docs/python/scripts/md2ipynb.py
@@ -47,12 +47,12 @@ def md2ipynb():
         if not any([i in input_fn for i in ignore_execution]):
             tic = time.time()
             notedown.run(notebook, timeout)
-            print('%s: Evaluated %s in %f sec'%(src_fn, input_fn, time.time()-tic))
+            print(f'{src_fn}: Evaluated {input_fn} in {time.time()-tic} sec')
     # need to add language info to for syntax highlight
     notebook['metadata'].update({'language_info':{'name':'python'}})
     with open(output_fn, 'w', encoding='utf-8') as f:
         f.write(nbformat.writes(notebook))
-    print('%s: Write results into %s'%(src_fn, output_fn))
+    print(f'{src_fn}: Write results into {output_fn}')
 
 if __name__ == '__main__':
     md2ipynb()
diff --git a/docs/python_docs/python/scripts/process_rst.py b/docs/python_docs/python/scripts/process_rst.py
index 5daea3abf2..464737059e 100644
--- a/docs/python_docs/python/scripts/process_rst.py
+++ b/docs/python_docs/python/scripts/process_rst.py
@@ -81,6 +81,6 @@ if __name__ == '__main__':
         inputs = f.read()
     outputs, num = add_hidden_title(inputs)
     if num is not None:
-        print('%s: add %d hidden sections for %s' % (src_fn, num, input_fn))
+        print(f'{src_fn}: add {num} hidden sections for {input_fn}')
     with open(output_fn, 'w') as f:
         f.write(outputs)
diff --git a/example/distributed_training-horovod/gluon_mnist.py b/example/distributed_training-horovod/gluon_mnist.py
index 324b985206..3f3823ae73 100644
--- a/example/distributed_training-horovod/gluon_mnist.py
+++ b/example/distributed_training-horovod/gluon_mnist.py
@@ -54,7 +54,7 @@ logging.info(args)
 
 # Function to get mnist iterator given a rank
 def get_mnist_iterator(rank):
-    data_dir = "data-%d" % rank
+    data_dir = f"data-{rank}"
     if not os.path.isdir(data_dir):
         os.makedirs(data_dir)
     zip_file_path = download('http://data.mxnet.io/mxnet/data/mnist.zip',
@@ -66,8 +66,8 @@ def get_mnist_iterator(rank):
     batch_size = args.batch_size
 
     train_iter = mx.io.MNISTIter(
-        image="%s/train-images-idx3-ubyte" % data_dir,
-        label="%s/train-labels-idx1-ubyte" % data_dir,
+        image=f"{data_dir}/train-images-idx3-ubyte",
+        label=f"{data_dir}/train-labels-idx1-ubyte",
         input_shape=input_shape,
         batch_size=batch_size,
         shuffle=True,
@@ -77,8 +77,8 @@ def get_mnist_iterator(rank):
     )
 
     val_iter = mx.io.MNISTIter(
-        image="%s/t10k-images-idx3-ubyte" % data_dir,
-        label="%s/t10k-labels-idx1-ubyte" % data_dir,
+        image=f"{data_dir}/t10k-images-idx3-ubyte",
+        label=f"{data_dir}/t10k-labels-idx1-ubyte",
         input_shape=input_shape,
         batch_size=batch_size,
         flat=False,
@@ -168,8 +168,7 @@ for epoch in range(args.epochs):
 
         if nbatch % 100 == 0:
             name, acc = metric.get()
-            logging.info('[Epoch %d Batch %d] Training: %s=%f' %
-                         (epoch, nbatch, name, acc))
+            logging.info(f'[Epoch {epoch} Batch {nbatch}] Training: {name}={acc}')
 
     if hvd.rank() == 0:
         elapsed = time.time() - tic
@@ -185,5 +184,5 @@ for epoch in range(args.epochs):
                      train_acc, name, val_acc)
 
     if hvd.rank() == 0 and epoch == args.epochs - 1:
-        assert val_acc > 0.96, "Achieved accuracy (%f) is lower than expected\
-                                (0.96)" % val_acc
+        assert val_acc > 0.96, f"Achieved accuracy ({val_acc}) is lower than expected\
+                                (0.96)"
diff --git a/example/distributed_training-horovod/resnet50_imagenet.py b/example/distributed_training-horovod/resnet50_imagenet.py
index cdf17a8912..427568132c 100644
--- a/example/distributed_training-horovod/resnet50_imagenet.py
+++ b/example/distributed_training-horovod/resnet50_imagenet.py
@@ -362,7 +362,7 @@ def train_gluon():
 
         # Save model
         if args.save_frequency and (epoch + 1) % args.save_frequency == 0:
-            net.export('%s-%d' % (args.model, rank), epoch=epoch)
+            net.export(f'{args.model}-{rank}', epoch=epoch)
 
     # Evaluate performance at the end of training
     evaluate(epoch)
diff --git a/example/distributed_training/cifar10_dist.py b/example/distributed_training/cifar10_dist.py
index c89619d595..8e561b9559 100644
--- a/example/distributed_training/cifar10_dist.py
+++ b/example/distributed_training/cifar10_dist.py
@@ -204,5 +204,5 @@ for epoch in range(epochs):
 
     # Print test accuracy after every epoch
     test_accuracy = evaluate_accuracy(test_data, net)
-    print("Epoch %d: Test_acc %f" % (epoch, test_accuracy))
+    print(f"Epoch {epoch}: Test_acc {test_accuracy}")
     sys.stdout.flush()
diff --git a/example/distributed_training/cifar10_kvstore_hvd.py b/example/distributed_training/cifar10_kvstore_hvd.py
index d605078427..cc75d32b60 100644
--- a/example/distributed_training/cifar10_kvstore_hvd.py
+++ b/example/distributed_training/cifar10_kvstore_hvd.py
@@ -234,4 +234,4 @@ for epoch in range(args.epochs):
     # Print test accuracy after every epoch
     test_accuracy = evaluate(test_data, net, ctx)
     if store.rank == 0:
-        logging.info("Epoch %d: Test_acc %f" % (epoch, test_accuracy))
\ No newline at end of file
+        logging.info(f"Epoch {epoch}: Test_acc {test_accuracy}")
\ No newline at end of file
diff --git a/example/extensions/lib_pass/test_pass.py b/example/extensions/lib_pass/test_pass.py
index ab89f9566e..67572806ca 100644
--- a/example/extensions/lib_pass/test_pass.py
+++ b/example/extensions/lib_pass/test_pass.py
@@ -60,7 +60,7 @@ def test_model(pass_name):
 
     # Gluon optimize_for
     print('-------------------------------')
-    print('Testing pass "%s" Gluon Hybridize with shapes/types without inference' % pass_name)
+    print(f'Testing pass "{pass_name}" Gluon Hybridize with shapes/types without inference')
     inputs = [a,b]
     sym_block2 = nn.SymbolBlock(sym, inputs)
     sym_block2.initialize()
diff --git a/example/extensions/lib_subgraph/test_subgraph.py b/example/extensions/lib_subgraph/test_subgraph.py
index bfd9005115..1fc5fcbf59 100644
--- a/example/extensions/lib_subgraph/test_subgraph.py
+++ b/example/extensions/lib_subgraph/test_subgraph.py
@@ -64,7 +64,7 @@ def test(backend):
 
     # Gluon Hybridize partitioning with shapes/types without inference
     print('-------------------------------')
-    print('Testing %s Gluon Hybridize partitioning with shapes/types without inference' % backend)
+    print(f'Testing {backend} Gluon Hybridize partitioning with shapes/types without inference')
     inputs = [a,b]
     sym_block2 = nn.SymbolBlock(sym, inputs)
     sym_block2.initialize()
@@ -73,7 +73,7 @@ def test(backend):
 
     # Test with additional input to subgraph op
     print('-------------------------------')
-    print('Testing %s Gluon Hybridize partitioning with extra input' % backend)
+    print(f'Testing {backend} Gluon Hybridize partitioning with extra input')
     sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend="addInputPass")
     out3 = sym_block2(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
     print(out3)
@@ -94,7 +94,7 @@ def test(backend):
 
     # Gluon optimize_for partitioning with shapes/types
     print('-------------------------------')
-    print('Testing %s Gluon optimize_for partitioning with shapes/types' % backend)
+    print(f'Testing {backend} Gluon optimize_for partitioning with shapes/types')
     inputs = [a]
     sym2_block = nn.SymbolBlock(sym2, inputs)
     sym2_block.initialize()
diff --git a/example/gluon/house_prices/kaggle_k_fold_cross_validation.py b/example/gluon/house_prices/kaggle_k_fold_cross_validation.py
index 52ddf0e280..ead32afd11 100644
--- a/example/gluon/house_prices/kaggle_k_fold_cross_validation.py
+++ b/example/gluon/house_prices/kaggle_k_fold_cross_validation.py
@@ -97,7 +97,7 @@ def train(net, X_train, y_train, epochs, verbose_epoch, learning_rate,
             trainer.step(batch_size)
             avg_loss = get_rmse_log(net, X_train, y_train)
         if epoch > verbose_epoch:
-            print("Epoch %d, train loss: %f" % (epoch, avg_loss))
+            print(f"Epoch {epoch}, train loss: {avg_loss}")
     return avg_loss
 
 def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train,
@@ -129,7 +129,7 @@ def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train,
                            learning_rate, weight_decay, batch_size)
         train_loss_sum += train_loss
         test_loss = get_rmse_log(net, X_val_test, y_val_test)
-        print("Test loss: %f" % test_loss)
+        print(f"Test loss: {test_loss}")
         test_loss_sum += test_loss
     return train_loss_sum / k, test_loss_sum / k
 
@@ -145,8 +145,7 @@ batch_size = 100
 train_loss, test_loss = \
     k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train,
                        learning_rate, weight_decay, batch_size)
-print("%d-fold validation: Avg train loss: %f, Avg test loss: %f" %
-      (k, train_loss, test_loss))
+print(f"{k}-fold validation: Avg train loss: {train_loss}, Avg test loss: {test_loss}")
 
 def learn(epochs, verbose_epoch, X_train, y_train, test, learning_rate,
           weight_decay, batch_size):
diff --git a/example/gluon/image_classification.py b/example/gluon/image_classification.py
index c7bfbf06e5..d45678b570 100644
--- a/example/gluon/image_classification.py
+++ b/example/gluon/image_classification.py
@@ -179,14 +179,14 @@ def update_learning_rate(lr, trainer, epoch, ratio, steps):
 
 def save_checkpoint(epoch, top1, best_acc):
     if opt.save_frequency and (epoch + 1) % opt.save_frequency == 0:
-        fname = os.path.join(opt.prefix, '%s_%d_acc_%.4f.params' % (opt.model, epoch, top1))
+        fname = os.path.join(opt.prefix, f'{opt.model}_{epoch}_acc_{top1:.4f}.params')
         net.save_parameters(fname)
-        logger.info('[Epoch %d] Saving checkpoint to %s with Accuracy: %.4f', epoch, fname, top1)
+        logger.info(f'[Epoch {epoch}] Saving checkpoint to {fname} with Accuracy: {top1:.4f}')
     if top1 > best_acc[0]:
         best_acc[0] = top1
-        fname = os.path.join(opt.prefix, '%s_best.params' % (opt.model))
+        fname = os.path.join(opt.prefix, f'{opt.model}_best.params')
         net.save_parameters(fname)
-        logger.info('[Epoch %d] Saving checkpoint to %s with Accuracy: %.4f', epoch, fname, top1)
+        logger.info(f'[Epoch {epoch}] Saving checkpoint to {fname} with Accuracy: {top1:.4f}')
 
 def train(opt, device):
     if isinstance(device, mx.Device):
@@ -267,10 +267,10 @@ def main():
 if __name__ == '__main__':
     if opt.profile:
         import hotshot, hotshot.stats
-        prof = hotshot.Profile('image-classifier-%s-%s.prof'%(opt.model, opt.mode))
+        prof = hotshot.Profile(f'image-classifier-{opt.model}-{opt.mode}.prof')
         prof.runcall(main)
         prof.close()
-        stats = hotshot.stats.load('image-classifier-%s-%s.prof'%(opt.model, opt.mode))
+        stats = hotshot.stats.load(f'image-classifier-{opt.model}-{opt.mode}.prof')
         stats.strip_dirs()
         stats.sort_stats('cumtime', 'calls')
         stats.print_stats()
diff --git a/example/gluon/mnist/mnist.py b/example/gluon/mnist/mnist.py
index a660cd57f2..86a5d88cf7 100644
--- a/example/gluon/mnist/mnist.py
+++ b/example/gluon/mnist/mnist.py
@@ -108,13 +108,13 @@ def train(epochs, ctx):
 
             if i % opt.log_interval == 0 and i > 0:
                 name, acc = metric.get()
-                print('[Epoch %d Batch %d] Training: %s=%f'%(epoch, i, name, acc))
+                print(f'[Epoch {epoch} Batch {i}] Training: {name}={acc}')
 
         name, acc = metric.get()
-        print('[Epoch %d] Training: %s=%f'%(epoch, name, acc))
+        print(f'[Epoch {epoch}] Training: {name}={acc}')
 
         name, val_acc = test(ctx)
-        print('[Epoch %d] Validation: %s=%f'%(epoch, name, val_acc))
+        print(f'[Epoch {epoch}] Validation: {name}={val_acc}')
 
     net.save_parameters('mnist.params')
 
diff --git a/example/gluon/super_resolution/super_resolution.py b/example/gluon/super_resolution/super_resolution.py
index f9789c1c9d..3986036073 100644
--- a/example/gluon/super_resolution/super_resolution.py
+++ b/example/gluon/super_resolution/super_resolution.py
@@ -171,7 +171,7 @@ def test(device):
         avg_psnr += 10 * math.log10(1/metric.get()[1])
         metric.reset()
     avg_psnr /= batches
-    print('validation avg psnr: %f' % avg_psnr)
+    print(f'validation avg psnr: {avg_psnr}')
 
 
 def train(epoch, device):
@@ -200,7 +200,7 @@ def train(epoch, device):
 
         name, acc = metric.get()
         metric.reset()
-        print('training mse at epoch %d: %s=%f'%(i, name, acc))
+        print(f'training mse at epoch {i}: {name}={acc}')
         test(device)
 
     net.save_parameters(path.join(this_dir, 'superres.params'))
diff --git a/example/profiler/profiler_ndarray.py b/example/profiler/profiler_ndarray.py
index e34b536d5d..51625f4aa0 100644
--- a/example/profiler/profiler_ndarray.py
+++ b/example/profiler/profiler_ndarray.py
@@ -203,7 +203,7 @@ def test_ndarray_saveload():
         assert len(data) == len(data2)
         for x, y in zip(data, data2):
             assert np.sum(x.asnumpy() != y.asnumpy()) == 0
-        dmap = {'ndarray xx %s' % i : x for i, x in enumerate(data)}
+        dmap = {f'ndarray xx {i}': x for i, x in enumerate(data)}
         mx.nd.save(fname, dmap)
         dmap2 = mx.nd.load(fname)
         assert len(dmap2) == len(dmap)
@@ -279,8 +279,8 @@ def test_reduce():
             if type(ndarray_ret) is mx.ndarray.NDArray:
                 ndarray_ret = ndarray_ret.asnumpy()
             assert (ndarray_ret.shape == numpy_ret.shape) or \
-                   (ndarray_ret.shape == (1,) and numpy_ret.shape == ()), "nd:%s, numpy:%s" \
-                                                         %(ndarray_ret.shape, numpy_ret.shape)
+                   (ndarray_ret.shape == (1,) and numpy_ret.shape == ()), \
+                   f"nd:{ndarray_ret.shape}, numpy:{numpy_ret.shape}"
             err = np.square(ndarray_ret - numpy_ret).mean()
             assert err < 1E-4
     test_reduce_inner(lambda data, axis, keepdims:_np_reduce(data, axis, keepdims, np.sum),
diff --git a/example/quantization/imagenet_gen_qsym_onednn.py b/example/quantization/imagenet_gen_qsym_onednn.py
index 65454a3111..d045718a38 100644
--- a/example/quantization/imagenet_gen_qsym_onednn.py
+++ b/example/quantization/imagenet_gen_qsym_onednn.py
@@ -36,7 +36,7 @@ from tools.rec2idx import IndexCreator
 
 def download_calib_dataset(dataset_url, calib_dataset, logger=None):
     if logger is not None:
-        logger.info('Downloading calibration dataset from %s to %s' % (dataset_url, calib_dataset))
+        logger.info(f'Downloading calibration dataset from {dataset_url} to {calib_dataset}')
     mx.test_utils.download(dataset_url, calib_dataset)
 
 
@@ -44,7 +44,7 @@ def get_from_gluon(model_name, classes=1000, logger=None):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     model_path = os.path.join(dir_path, 'model')
     if logger is not None:
-        logger.info('Converting model from Gluon-CV ModelZoo %s... into path %s' % (model_name, model_path))
+        logger.info(f'Converting model from Gluon-CV ModelZoo {model_name}... into path {model_path}')
     net = get_model(name=model_name, classes=classes, pretrained=True)
     prefix = os.path.join(model_path, model_name)
     return net, prefix
@@ -94,7 +94,7 @@ def get_exclude_symbols(model_name, exclude_first_conv):
         }
         excluded_first_conv_sym_names = regex_find_excluded_symbols(first_conv_regex, model_name)
         if excluded_first_conv_sym_names is None:
-            raise ValueError('Currently, model %s is not supported in this script' % model_name)
+            raise ValueError(f'Currently, model {model_name} is not supported in this script')
         excluded_sym_names += excluded_first_conv_sym_names
     return excluded_sym_names
 
@@ -152,8 +152,8 @@ if __name__ == '__main__':
 
     if logger:
         logger.info(args)
-        logger.info('shuffle_dataset=%s' % args.shuffle_dataset)
-        logger.info('calibration mode set to %s' % args.calib_mode)
+        logger.info(f'shuffle_dataset={args.shuffle_dataset}')
+        logger.info(f'calibration mode set to {args.calib_mode}')
 
     calib_mode = args.calib_mode
 
@@ -197,7 +197,7 @@ if __name__ == '__main__':
     # get batch size
     batch_size = args.batch_size
     if logger:
-        logger.info('batch size = %d for calibration' % batch_size)
+        logger.info(f'batch size = {batch_size} for calibration')
 
     # get number of batches for calibration
     num_calib_batches = args.num_calib_batches
@@ -205,7 +205,7 @@ if __name__ == '__main__':
         if calib_mode == 'none':
             logger.info('skip calibration step as calib_mode is none')
         else:
-            logger.info('number of batches = %d for calibration' % num_calib_batches)
+            logger.info(f'number of batches = {num_calib_batches} for calibration')
 
     # get number of threads for decoding the dataset
     data_nthreads = args.data_nthreads
@@ -234,10 +234,10 @@ if __name__ == '__main__':
             excluded_sym_names += []
 
     if logger:
-        logger.info('These layers have been excluded %s' % excluded_sym_names)
-        logger.info('Input data shape = %s' % str(data_shape))
-        logger.info('rgb_mean = %s' % rgb_mean)
-        logger.info('rgb_std = %s' % rgb_std)
+        logger.info(f'These layers have been excluded {excluded_sym_names}')
+        logger.info(f'Input data shape = {str(data_shape)}')
+        logger.info(f'rgb_mean = {rgb_mean}')
+        logger.info(f'rgb_std = {rgb_std}')
 
     rgb_mean = [float(i) for i in rgb_mean.split(',')]
     mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}
@@ -245,7 +245,7 @@ if __name__ == '__main__':
     std_args = {'std_r': rgb_std[0], 'std_g': rgb_std[1], 'std_b': rgb_std[2]}
     if calib_mode == 'none':
         if logger:
-            logger.info('Quantizing FP32 model %s' % args.model)
+            logger.info(f'Quantizing FP32 model {args.model}')
         qsym = quantize_net(net, ctx=ctx, exclude_layers_match=excluded_sym_names, data_shapes=data_shape,
                             calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
                             logger=logger)
@@ -263,12 +263,11 @@ if __name__ == '__main__':
                             calib_mode=calib_mode, calib_data=data_loader, num_calib_batches=num_calib_batches,
                             quantized_dtype=args.quantized_dtype, logger=logger)
         if calib_mode == 'entropy':
-            suffix = '-quantized-%dbatches-entropy' % num_calib_batches
+            suffix = f'-quantized-{num_calib_batches}batches-entropy'
         elif calib_mode == 'naive':
-            suffix = '-quantized-%dbatches-naive' % num_calib_batches
+            suffix = f'-quantized-{num_calib_batches}batches-naive'
         else:
-            raise ValueError('unknow calibration mode %s received, only supports `none`, `naive`, and `entropy`'
-                             % calib_mode)
+            raise ValueError(f'unknown calibration mode {calib_mode} received, only supports `none`, `naive`, and `entropy`')
     save_path = prefix + suffix
     model_path, params_path = qsym.export(save_path, epoch)
     if logger is not None:
diff --git a/example/quantization/imagenet_inference.py b/example/quantization/imagenet_inference.py
index 69f42a0b57..09372ed26e 100644
--- a/example/quantization/imagenet_inference.py
+++ b/example/quantization/imagenet_inference.py
@@ -28,7 +28,7 @@ from mxnet.gluon.data.vision import transforms
 
 def download_dataset(dataset_url, dataset_dir, logger=None):
     if logger is not None:
-        logger.info('Downloading dataset for inference from %s to %s' % (dataset_url, dataset_dir))
+        logger.info(f'Downloading dataset for inference from {dataset_url} to {dataset_dir}')
     mx.test_utils.download(dataset_url, dataset_dir)
 
 
@@ -37,7 +37,7 @@ def score(symblock, data, ctx, max_num_examples, skip_num_batches, logger=None):
                gluon.metric.create('top_k_accuracy', top_k=5)]
 
     # make sure that fp32 inference works on the same images as calibrated quantized model
-    logger.info('Skipping the first %d batches' % skip_num_batches)
+    logger.info(f'Skipping the first {skip_num_batches} batches')
 
     tic = time.time()
     num = 0
@@ -56,8 +56,8 @@ def score(symblock, data, ctx, max_num_examples, skip_num_batches, logger=None):
     speed = num / (time.time() - tic)
 
     if logger is not None:
-        logger.info('Finished inference with %d images' % num)
-        logger.info('Finished with %f images per second', speed)
+        logger.info(f'Finished inference with {num} images')
+        logger.info(f'Finished with {speed} images per second')
         for m in metrics:
             logger.info(m.get())
 
@@ -132,32 +132,32 @@ if __name__ == '__main__':
         ctx = mx.gpu(0)
         logger.warning('Notice that oneDNN optimized and quantized model may not work with GPU context')
     else:
-        raise ValueError('ctx %s is not supported in this script' % args.device)
+        raise ValueError(f'ctx {args.device} is not supported in this script')
 
     symbol_file = args.symbol_file
     param_file = args.param_file
     data_nthreads = args.data_nthreads
 
     batch_size = args.batch_size
-    logger.info('batch size = %d for inference' % batch_size)
+    logger.info(f'batch size = {batch_size} for inference')
 
     rgb_mean = args.rgb_mean
-    logger.info('rgb_mean = %s' % rgb_mean)
+    logger.info(f'rgb_mean = {rgb_mean}')
     rgb_mean = [float(i) for i in rgb_mean.split(',')]
     rgb_std = args.rgb_std
-    logger.info('rgb_std = %s' % rgb_std)
+    logger.info(f'rgb_std = {rgb_std}')
     rgb_std = [float(i) for i in rgb_std.split(',')]
 
     image_shape = args.image_shape
     data_shape = tuple([int(i) for i in image_shape.split(',')])
-    logger.info('Input data shape = %s' % str(data_shape))
+    logger.info(f'Input data shape = {str(data_shape)}')
 
     data_layer_type = args.data_layer_type
 
     if not args.benchmark:
         dataset = args.dataset
         download_dataset('http://data.mxnet.io/data/val_256_q90.rec', dataset)
-        logger.info('Dataset for inference: %s' % dataset)
+        logger.info(f'Dataset for inference: {dataset}')
 
         dataset = mx.gluon.data.vision.ImageRecordDataset(dataset)
         transformer = transforms.Compose([transforms.Resize(256),
@@ -171,7 +171,7 @@ if __name__ == '__main__':
         symblock = gluon.SymbolBlock.imports(symbol_file, ['data'], param_file)
 
         num_inference_images = args.num_inference_batches * batch_size
-        logger.info('Running model %s for inference' % symbol_file)
+        logger.info(f'Running model {symbol_file} for inference')
         score(symblock, data_loader, ctx, max_num_examples=num_inference_images,
               skip_num_batches=args.num_skipped_batches, logger=logger)
     else:
diff --git a/example/recommenders/movielens_data.py b/example/recommenders/movielens_data.py
index c6fe8912d9..8a725a718c 100644
--- a/example/recommenders/movielens_data.py
+++ b/example/recommenders/movielens_data.py
@@ -43,22 +43,22 @@ def load_mldataset(filename):
     return gluon.data.ArrayDataset(user, item, score)
 
 def ensure_local_data(prefix):
-    if not os.path.exists("%s.zip" % prefix):
-        print("Downloading MovieLens data: %s" % prefix)
+    if not os.path.exists(f"{prefix}.zip"):
+        print(f"Downloading MovieLens data: {prefix}")
         # MovieLens 100k dataset from https://grouplens.org/datasets/movielens/
         # This dataset is copy right to GroupLens Research Group at the University of Minnesota,
         # and licensed under their usage license.
         # For full text of the usage license, see http://files.grouplens.org/datasets/movielens/ml-100k-README.txt
-        os.system("wget http://files.grouplens.org/datasets/movielens/%s.zip" % prefix)
-        os.system("unzip %s.zip" % prefix)
+        os.system(f"wget http://files.grouplens.org/datasets/movielens/{prefix}.zip")
+        os.system(f"unzip {prefix}.zip")
 
 
 def get_dataset(prefix='ml-100k'):
     """Returns a pair of NDArrayDataIter, one for train, one for test.
     """
     ensure_local_data(prefix)
-    return (load_mldataset('./%s/u1.base' % prefix),
-            load_mldataset('./%s/u1.test' % prefix))
+    return (load_mldataset(f'./{prefix}/u1.base'),
+            load_mldataset(f'./{prefix}/u1.test'))
 
 def max_id(fname):
     mu = 0
diff --git a/python/mxnet/_ffi/_ctypes/function.py b/python/mxnet/_ffi/_ctypes/function.py
index 229b98727a..b154d1b411 100644
--- a/python/mxnet/_ffi/_ctypes/function.py
+++ b/python/mxnet/_ffi/_ctypes/function.py
@@ -51,7 +51,7 @@ def _get_global_func(name, allow_missing=False):
     if allow_missing:
         return None
 
-    raise ValueError("Cannot find global function %s" % name)
+    raise ValueError(f"Cannot find global function {name}")
 
 def _make_mxnet_args(args, temp_args):
     """Pack arguments into c args mxnet call accept"""
@@ -98,7 +98,7 @@ def _make_mxnet_args(args, temp_args):
             values[i].v_str = c_str(onp.dtype(arg).name)
             type_codes[i] = TypeCode.STR
         else:
-            raise TypeError("Don't know how to handle type %s" % type(arg))
+            raise TypeError(f"Don't know how to handle type {type(arg)}")
     return values, type_codes, num_args
 
 
diff --git a/python/mxnet/_ffi/function.py b/python/mxnet/_ffi/function.py
index cd42d0be56..b1268b7ee9 100644
--- a/python/mxnet/_ffi/function.py
+++ b/python/mxnet/_ffi/function.py
@@ -151,7 +151,7 @@ def _init_api_prefix(module_name, prefix):
         f = get_global_func(name)
         ff = _get_api(f)
         ff.__name__ = fname
-        ff.__doc__ = ("MXNet PackedFunc %s. " % fname)
+        ff.__doc__ = (f"MXNet PackedFunc {fname}. ")
         setattr(target_module, ff.__name__, ff)
 
 _set_class_packed_func(Function)
diff --git a/python/mxnet/_ffi/node_generic.py b/python/mxnet/_ffi/node_generic.py
index 8f1dcdc6b9..967b0b35a7 100644
--- a/python/mxnet/_ffi/node_generic.py
+++ b/python/mxnet/_ffi/node_generic.py
@@ -73,7 +73,7 @@ def convert_to_node(value):
             vlist.append(item[0])
             vlist.append(convert_to_node(item[1]))
         return _api_internal._Map(*vlist)
-    raise ValueError("don't know how to convert type %s to node" % type(value))
+    raise ValueError(f"don't know how to convert type {type(value)} to node")
 
 
 def const(value, dtype=None):
diff --git a/python/mxnet/amp/amp.py b/python/mxnet/amp/amp.py
index b73e48e846..1709e4ca0f 100644
--- a/python/mxnet/amp/amp.py
+++ b/python/mxnet/amp/amp.py
@@ -402,7 +402,7 @@ def init_trainer(optimizer_or_trainer):
         raise TypeError("AMP is currently only compatible with Gluon Trainer")
     else:
         raise TypeError("optimizer_or_trainer should be a Gluon Trainer or "
-                        "an optimizer, instead is %s" % type(optimizer_or_trainer))
+                        f"an optimizer, instead is {type(optimizer_or_trainer)}")
 
 def unscale(optimizer_or_trainer):
     """Check and unscale the gradients manually. This function should only be used
@@ -425,7 +425,7 @@ def unscale(optimizer_or_trainer):
         raise TypeError("AMP is currently only compatible with Gluon Trainer")
     else:
         raise TypeError("optimizer_or_trainer should be a Gluon Trainer or "
-                        "an optimizer, instead is %s" % type(optimizer_or_trainer))
+                        f"an optimizer, instead is {type(optimizer_or_trainer)}")
 
 
 def convert_symbol(sym, input_dtypes, param_dtypes, target_dtype, target_dtype_ops=None,
@@ -535,13 +535,13 @@ def convert_symbol(sym, input_dtypes, param_dtypes, target_dtype, target_dtype_o
                             list_lp16_fp32_ops(target_dtype) + original_cond_ops)
 
     illegal_ops = combined_ops - all_lp16_fp32_ops
-    assert len(illegal_ops) == 0, '''Can only choose ops from one of the four lists
+    assert len(illegal_ops) == 0, f'''Can only choose ops from one of the four lists
                             for lp16_ops and fp32_ops
                             1. amp.list_lp16_ops(target_dtype)
                             2. amp.list_fp32_ops(target_dtype)
                             3. amp.list_lp16_fp32_ops(target_dtype)
                             4. amp.list_conditional_fp32_ops(target_dtype)
-                            Op %s not in any of them''' % (illegal_ops)
+                            Op {illegal_ops} not in any of them'''
 
     widest_dtype_ops = list_widest_type_cast(target_dtype)
 
diff --git a/python/mxnet/autograd.py b/python/mxnet/autograd.py
index aac7cbc21a..f8f0fc4c05 100644
--- a/python/mxnet/autograd.py
+++ b/python/mxnet/autograd.py
@@ -360,7 +360,7 @@ def get_symbol(x):
         The retrieved Symbol.
     """
     assert isinstance(x, NDArray), \
-       "get_symbol: Invalid argument type, expecting %s, got %s"%(NDArray, type(x))
+       f"get_symbol: Invalid argument type, expecting {NDArray}, got {type(x)}"
     hdl = SymbolHandle()
     check_call(_LIB.MXAutogradGetSymbol(x.handle, ctypes.byref(hdl)))
     return Symbol(hdl)
@@ -468,12 +468,12 @@ class Function(object):
                 if isinstance(rets, array_cls):
                     rets = (rets,)
                 assert len(rets) == len(input_grads), \
-                    "%s.backward must return exactly the same number " \
+                    f"{self.__class__.name}.backward must return exactly the same number " \
                     "of NDArrays as the number of NDArrays arguments to forward." \
-                    "Expecting %d got %d"%(self.__class__.name, len(input_grads), len(rets))
+                    f"Expecting {len(input_grads)} got {len(rets)}"
                 for igrad, ret, req in zip(input_grads, rets, reqs):
                     assert isinstance(ret, array_cls), \
-                        "autograd.Function.backward must return NDArrays, not %s"%type(ret)
+                        f"autograd.Function.backward must return NDArrays, not {type(ret)}"
                     if req == 0:  # null
                         return True
                     elif req in (1, 2):  # write or inplace
@@ -481,7 +481,7 @@ class Function(object):
                     elif req == 'add':
                         igrad[:] += ret
             except Exception:  # pylint: disable=broad-except
-                print('Error in Function.backward: %s' % traceback.format_exc())
+                print(f'Error in Function.backward: {traceback.format_exc()}')
                 return False
             return True
 
@@ -490,7 +490,7 @@ class Function(object):
             try:
                 del Function._registry.ref_holder[key]
             except Exception:  # pylint: disable=broad-except
-                print('Error in autograd.Function.delete: %s' % traceback.format_exc())
+                print(f'Error in autograd.Function.delete: {traceback.format_exc()}')
                 return False
             return True
 
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index d1dbc0d64c..992b521833 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -541,14 +541,14 @@ def build_param_doc(arg_names, arg_types, arg_descs, remove_dup=True):
         if key == 'num_args':
             continue
         param_keys.add(key)
-        ret = '%s : %s' % (key, type_info)
+        ret = f'{key} : {type_info}'
         if len(desc) != 0:
             ret += '\n    ' + desc
         param_str.append(ret)
     doc_str = ('Parameters\n' +
                '----------\n' +
-               '%s\n')
-    doc_str = doc_str % ('\n'.join(param_str))
+               '{}\n')
+    doc_str = doc_str.format('\n'.join(param_str))
     return doc_str
 
 
@@ -581,7 +581,7 @@ def add_fileline_to_docstring(module, incursive=True):
             line = inspect.getsourcelines(obj)[-1]
         except IOError:
             return
-        obj.__doc__ += '\n\nFrom:%s:%d' % (fname, line)
+        obj.__doc__ += f'\n\nFrom:{fname}:{line}'
 
     if isinstance(module, str):
         module = sys.modules[module]
@@ -656,17 +656,17 @@ def _init_op_module(root_namespace, module_name, make_op_func):
         if not _is_np_op(op_name):
             op_names.append(op_name)
 
-    module_op = sys.modules["%s.%s.op" % (root_namespace, module_name)]
-    module_internal = sys.modules["%s.%s._internal" % (root_namespace, module_name)]
+    module_op = sys.modules[f"{root_namespace}.{module_name}.op"]
+    module_internal = sys.modules[f"{root_namespace}.{module_name}._internal"]
     # contrib module in the old format (deprecated)
     # kept here for backward compatibility
     # use mx.nd.contrib or mx.sym.contrib from now on
-    contrib_module_name_old = "%s.contrib.%s" % (root_namespace, module_name)
+    contrib_module_name_old = f"{root_namespace}.contrib.{module_name}"
     contrib_module_old = sys.modules[contrib_module_name_old]
     submodule_dict = {}
     for op_name_prefix in _OP_NAME_PREFIX_LIST:
         submodule_dict[op_name_prefix] =\
-            sys.modules["%s.%s.%s" % (root_namespace, module_name, op_name_prefix[1:-1])]
+            sys.modules[f"{root_namespace}.{module_name}.{op_name_prefix[1:-1]}"]
     for name in op_names:
         hdl = OpHandle()
         check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
@@ -676,7 +676,7 @@ def _init_op_module(root_namespace, module_name, make_op_func):
             if op_name_prefix != '_random_' or name.endswith('_like'):
                 func_name = name[len(op_name_prefix):]
                 cur_module = submodule_dict[op_name_prefix]
-                module_name_local = "%s.%s.%s" % (root_namespace, module_name, op_name_prefix[1:-1])
+                module_name_local = f"{root_namespace}.{module_name}.{op_name_prefix[1:-1]}"
             else:
                 func_name = name
                 cur_module = module_internal
@@ -760,7 +760,7 @@ def _generate_op_module_signature(root_namespace, module_name, op_code_gen_func)
         """Write the proper __all__ based on available operators."""
         module_file.write(os.linesep)
         module_file.write(os.linesep)
-        all_str = '__all__ = [' + ', '.join(["'%s'"%s for s in module_all_list]) + ']'
+        all_str = '__all__ = [' + ', '.join([f"'{s}'" for s in module_all_list]) + ']'
         module_file.write(all_str)
 
     plist = ctypes.POINTER(ctypes.c_char_p)()
@@ -774,15 +774,14 @@ def _generate_op_module_signature(root_namespace, module_name, op_code_gen_func)
         if not _is_np_op(op_name):
             op_names.append(op_name)
 
-    module_op_file = get_module_file("%s.%s.op" % (root_namespace, module_name))
+    module_op_file = get_module_file(f"{root_namespace}.{module_name}.op")
     module_op_all = []
-    module_internal_file = get_module_file("%s.%s._internal"%(root_namespace, module_name))
+    module_internal_file = get_module_file(f"{root_namespace}.{module_name}._internal")
     module_internal_all = []
     submodule_dict = {}
     for op_name_prefix in _OP_NAME_PREFIX_LIST:
         submodule_dict[op_name_prefix] =\
-            (get_module_file("%s.%s.%s" % (root_namespace, module_name,
-                                           op_name_prefix[1:-1])), [])
+            (get_module_file(f"{root_namespace}.{module_name}.{op_name_prefix[1:-1]}"), [])
     for name in op_names:
         hdl = OpHandle()
         check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
@@ -914,26 +913,26 @@ def _init_np_op_module(root_module_name, np_module_name, mx_module_name, make_op
 
     if mx_module_name is None:
         # register np/npx ops for imperative programming
-        op_module_name = "%s.%s._op" % (root_module_name, np_module_name)  # e.g. mxnet.numpy._op
-        op_submodule_name = "%s.%s" % (root_module_name, np_module_name)  # e.g. mxnet.numpy.random
+        op_module_name = f"{root_module_name}.{np_module_name}._op" # e.g. mxnet.numpy._op
+        op_submodule_name = f"{root_module_name}.{np_module_name}" # e.g. mxnet.numpy.random
     elif mx_module_name in ('ndarray', 'symbol'):
         # register numpy internal ops and np/npx ops for use in Gluon
         # np internal ops are registered in mxnet.ndarray/symbol.numpy._internal
         # np ops are registered in mxnet.ndarray/symbol.numpy._op
         # npx ops are registered in mxnet.ndarray/symbol.numpy_extension._op
-        op_module_name = "%s.%s.%s" % (root_module_name, mx_module_name, np_module_name)
+        op_module_name = f"{root_module_name}.{mx_module_name}.{np_module_name}"
         if op_name_prefix != _NP_INTERNAL_OP_PREFIX:
             op_module_name += '._op'
         # e.g. mxnet.symbol.numpy.random
-        op_submodule_name = "%s.%s.%s" % (root_module_name, mx_module_name, np_module_name)
+        op_submodule_name = f"{root_module_name}.{mx_module_name}.{np_module_name}"
     else:
         raise ValueError('unsupported mxnet module {}'.format(mx_module_name))
-    op_submodule_name += '.%s'
+    op_submodule_name += '.{}'
 
     op_module = sys.modules[op_module_name]
     submodule_dict = {}
     for submodule_name in submodule_name_list:
-        submodule_dict[submodule_name] = sys.modules[op_submodule_name % submodule_name[1:-1]]
+        submodule_dict[submodule_name] = sys.modules[op_submodule_name.format(submodule_name[1:-1])]
     for name in op_names:
         hdl = OpHandle()
         check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
@@ -941,7 +940,7 @@ def _init_np_op_module(root_module_name, np_module_name, mx_module_name, make_op
         if len(submodule_name) > 0:
             func_name = name[(len(op_name_prefix) + len(submodule_name)):]
             cur_module = submodule_dict[submodule_name]
-            module_name_local = op_submodule_name % submodule_name[1:-1]
+            module_name_local = op_submodule_name.format(submodule_name[1:-1])
         else:
             func_name = name[len(op_name_prefix):]
             cur_module = op_module
diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py
index 64942353db..6bef0c5c17 100644
--- a/python/mxnet/contrib/quantization.py
+++ b/python/mxnet/contrib/quantization.py
@@ -226,7 +226,7 @@ class _LayerHistogramCollector(CalibrationCollector):
             return
         arr = arr.copyto(cpu()).asnumpy()
         if self.logger:
-            self.logger.debug("Collecting layer %s histogram of shape %s" % (name, arr.shape))
+            self.logger.debug(f"Collecting layer {name} histogram of shape {arr.shape}")
         min_range = np.min(arr)
         max_range = np.max(arr)
         th = max(abs(min_range), abs(max_range))
@@ -289,7 +289,7 @@ class _LayerHistogramCollector(CalibrationCollector):
         assert isinstance(hist_dict, dict)
         if logger is not None:
             logger.info('Calculating optimal thresholds for quantization using KL divergence'
-                        ' with num_quantized_bins=%d' % num_quantized_bins)
+                        f' with num_quantized_bins={num_quantized_bins}')
         th_dict = {}
         # copy hist_dict keys since the keys() only returns a view in python3
         layer_names = list(hist_dict.keys())
@@ -333,8 +333,7 @@ class _LayerOutputMinMaxCollector(CalibrationCollector):
         else:
             self.min_max_dict[name] = (min_range, max_range)
         if self.logger:
-            self.logger.debug("Collecting layer %s min_range=%f, max_range=%f"
-                              % (name, min_range, max_range))
+            self.logger.debug(f"Collecting layer {name} min_range={min_range}, max_range={max_range}")
 
 
 def _calibrate_quantized_sym(qsym, min_max_dict):
@@ -364,8 +363,7 @@ def _calibrate_quantized_sym(qsym, min_max_dict):
 
 def _collect_layer_statistics(sym_block, data, collector, num_inputs, num_calib_batches=None, logger=None):
     if not isinstance(data, mx.gluon.data.DataLoader):
-        raise ValueError('Only supports data as a type of DataLoader, while received type %s'
-                         % str(type(data)))
+        raise ValueError(f'Only supports data as a type of DataLoader, while received type {str(type(data))}')
     sym_block.register_op_hook(collector.collect, monitor_all=True)
     num_batches = 0
     for batch in data:
@@ -377,7 +375,7 @@ def _collect_layer_statistics(sym_block, data, collector, num_inputs, num_calib_
         if num_calib_batches is not None and num_batches >= num_calib_batches:
             break
     if logger is not None:
-        logger.info("Collected statistics from %d batches" % (num_batches))
+        logger.info(f"Collected statistics from {num_batches} batches")
     return num_batches
 
 
@@ -498,24 +496,24 @@ def quantize_model(sym, arg_params, aux_params, data_names=('data',),
     if not isinstance(excluded_sym_names, list):
         raise ValueError('excluded_sym_names must be a list of strings representing'
                          ' the names of the symbols that will not be quantized,'
-                         ' while received type %s' % str(type(excluded_sym_names)))
+                         f' while received type {str(type(excluded_sym_names))}')
 
     if excluded_op_names is None:
         excluded_op_names = []
     if not isinstance(excluded_op_names, list):
         raise ValueError('excluded_op_names must be a list of strings representing'
                          ' the names of the operators that will not be quantized,'
-                         ' while received type %s' % str(type(excluded_op_names)))
+                         f' while received type {str(type(excluded_op_names))}')
 
     if logger:
         os.environ['MXNET_QUANTIZATION_VERBOSE'] = '1'
         logger.info('Quantizing symbol')
     if quantized_dtype not in ('int8', 'uint8', 'auto'):
-        raise ValueError('unknown quantized_dtype %s received,'
-                         ' expected `int8`, `uint8` or `auto`' % quantized_dtype)
+        raise ValueError(f'unknown quantized_dtype {quantized_dtype} received,'
+                         ' expected `int8`, `uint8` or `auto`')
     if quantize_granularity not in ('tensor-wise', 'channel-wise'):
-        raise ValueError('unkonwn quantize_granularity %s received,'
-                         ' expected `tensor-wise` or `channel-wise`.' % quantize_granularity)
+        raise ValueError(f'unkonwn quantize_granularity {quantize_granularity} received,'
+                         ' expected `tensor-wise` or `channel-wise`.')
     qsym, calib_layers = _quantize_symbol(sym, device, excluded_symbols=excluded_sym_names,
                                           excluded_operators=excluded_op_names,
                                           offline_params=list(arg_params.keys()),
@@ -525,12 +523,12 @@ def quantize_model(sym, arg_params, aux_params, data_names=('data',),
     min_max_dict = {}
     if calib_mode is not None and calib_mode != 'none':
         if not isinstance(device, Device):
-            raise ValueError('currently only supports single device, while received %s' % str(device))
+            raise ValueError(f'currently only supports single device, while received {str(device)}')
         if calib_data is None:
-            raise ValueError('calib_data must be provided when calib_mode=%s' % calib_mode)
+            raise ValueError(f'calib_data must be provided when calib_mode={calib_mode}')
         if not isinstance(calib_data, mx.gluon.data.DataLoader):
-            raise ValueError('calib_data must be of DataLoader type when calib_mode=%s,'
-                             ' while received type %s' % (calib_mode, str(type(calib_data))))
+            raise ValueError(f'calib_data must be of DataLoader type when calib_mode={calib_mode},'
+                             f' while received type {str(type(calib_data))}')
 
         inputs = [mx.sym.var(dname) for dname in data_names]
         param_dict = arg_params
@@ -548,14 +546,13 @@ def quantize_model(sym, arg_params, aux_params, data_names=('data',),
                                                     logger=logger)
 
         else:
-            raise ValueError('unknown calibration mode %s received,'
-                             ' expected `none`, `naive`, or `entropy`' % calib_mode)
+            raise ValueError(f'unknown calibration mode {calib_mode} received,'
+                             ' expected `none`, `naive`, or `entropy`')
 
         num_batches = _collect_layer_statistics(sym_block, calib_data, collector,
                                                 len(inputs), num_calib_batches, logger)
         if logger:
-            logger.info('Collected layer output min/max values from FP32 model using %d batches'
-                        % num_batches)
+            logger.info(f'Collected layer output min/max values from FP32 model using {num_batches} batches')
             logger.info('Performing calibration post collecting operations')
 
         min_max_dict = collector.post_collect()
@@ -593,7 +590,7 @@ def quantize_model_onednn(sym, arg_params, aux_params, data_names=('data',),
         A tuple of quantized symbol, quantized arg_params, and aux_params.
     """
     if not isinstance(device, Device):
-        raise ValueError('currently only supports single device, while received %s' % str(device))
+        raise ValueError(f'currently only supports single device, while received {str(device)}')
     if device.device_type != 'cpu':
         raise ValueError(
             'quantize_model_onednn only support Intel cpu platform with oneDNN Backend')
@@ -675,18 +672,18 @@ def quantize_graph(sym, arg_params, aux_params, device=cpu(),
     if not isinstance(excluded_sym_names, list):
         raise ValueError('excluded_sym_names must be a list of strings representing'
                          ' the names of the symbols that will not be quantized,'
-                         ' while received type %s' % str(type(excluded_sym_names)))
+                         f' while received type {str(type(excluded_sym_names))}')
     if not isinstance(device, Device):
-        raise ValueError('currently only supports single device, while received %s' % str(device))
+        raise ValueError(f'currently only supports single device, while received {str(device)}')
     if logger:
         os.environ['MXNET_QUANTIZATION_VERBOSE'] = '1'
         logger.info('Quantizing graph')
     if quantized_dtype not in ('int8', 'uint8', 'auto'):
-        raise ValueError('unknown quantized_dtype %s received,'
-                         ' expected `int8`, `uint8` or `auto`' % quantized_dtype)
+        raise ValueError(f'unknown quantized_dtype {quantized_dtype} received,'
+                         ' expected `int8`, `uint8` or `auto`')
     if quantize_granularity not in ('tensor-wise', 'channel-wise'):
-        raise ValueError('unkonwn quantize_granularity %s received,'
-                         ' expected `tensor-wise` or `channel-wise`.' % quantize_granularity)
+        raise ValueError(f'unkonwn quantize_granularity {quantize_granularity} received,'
+                         ' expected `tensor-wise` or `channel-wise`.')
     qsym, calib_layers = _quantize_symbol(sym, device, excluded_symbols=excluded_sym_names,
                                           excluded_operators=excluded_op_names,
                                           offline_params=list(arg_params.keys()),
@@ -711,7 +708,7 @@ def quantize_graph(sym, arg_params, aux_params, device=cpu(),
         elif calib_mode == 'custom' and LayerOutputCollector is not None:
             if not isinstance(LayerOutputCollector, CalibrationCollector):
                 raise ValueError('LayerOutputCollecotr must be a subclass of a CalibrationCollector class,'
-                                 ' but it is %s' % LayerOutputCollector.__class__)
+                                 f' but it is {LayerOutputCollector.__class__}')
             collector = LayerOutputCollector
 
             # Inject layer names that need calibration to collector
@@ -725,8 +722,8 @@ def quantize_graph(sym, arg_params, aux_params, device=cpu(),
                 logger.info(
                     'Create a custom layer output minmax collector for calibration')
         else:
-            raise ValueError('unknown calibration mode %s received,'
-                             ' expected `none`, `naive`, `entropy` or `custom`' % calib_mode)
+            raise ValueError(f'unknown calibration mode {calib_mode} received,'
+                             ' expected `none`, `naive`, `entropy` or `custom`')
         if logger:
             logger.info('Collector created, please use set_monitor_callback'
                         ' to collect calibration information.')
@@ -783,8 +780,8 @@ def calib_graph(qsym, arg_params, aux_params, collector,
             min_max_dict = collector.post_collect()
 
         else:
-            raise ValueError('unknown calibration mode %s received,'
-                             ' expected `none`, `naive`, `entropy` or `custom`' % calib_mode)
+            raise ValueError(f'unknown calibration mode {calib_mode} received,'
+                             ' expected `none`, `naive`, `entropy` or `custom`')
         qsym = _calibrate_quantized_sym(qsym, min_max_dict)
     else:
         raise ValueError('Please set calibration mode to naive, entropy or custom (with custom CalibrationCollector)')
@@ -940,7 +937,7 @@ def quantize_net(network, quantized_dtype='auto', quantize_mode='full', quantize
             if layers.name.find(name_match) != -1:
                 exclude_layers.append(layers.name)
     if logger:
-        logger.info('These layers have been excluded %s' % exclude_layers)
+        logger.info(f'These layers have been excluded {exclude_layers}')
 
     qsym, qarg_params, aux_params, collector, _ = quantize_graph(
         sym=symnet, arg_params=args, aux_params=auxs, device=device,
@@ -952,10 +949,10 @@ def quantize_net(network, quantized_dtype='auto', quantize_mode='full', quantize
     if calib_mode is not None and calib_mode != 'none':
         if not isinstance(device, Device):
             raise ValueError(
-                'currently only supports single device, while received %s' % str(device))
+                f'currently only supports single device, while received {str(device)}')
         if calib_data is None:
             raise ValueError(
-                'calib_data must be provided when calib_mode=%s' % calib_mode)
+                f'calib_data must be provided when calib_mode={calib_mode}')
         if calib_mode in ['naive', 'entropy', 'custom']:
             inputs = _multilist_iterator(data_descs, lambda dd: mx.sym.var(dd.name))
             calib_net = SymbolBlock(symnet, inputs)
@@ -968,8 +965,7 @@ def quantize_net(network, quantized_dtype='auto', quantize_mode='full', quantize
                                                     num_calib_batches, logger)
 
             if logger:
-                logger.info('Collected layer output values from FP32 model using %d batches'
-                            % num_batches)
+                logger.info(f'Collected layer output values from FP32 model using {num_batches} batches')
 
             qsym, qarg_params, aux_params = calib_graph(
                 qsym=qsym, arg_params=args, aux_params=auxs, collector=collector,
@@ -983,8 +979,8 @@ def quantize_net(network, quantized_dtype='auto', quantize_mode='full', quantize
     for k, v in net.collect_params().items():
         v.grad_req = 'null'
 
-    all_params = {('arg:%s' % k): v.as_in_context(cpu()) for k, v in qarg_params.items()}
-    all_params.update({('aux:%s' % k): v.as_in_context(cpu()) for k, v in aux_params.items()})
+    all_params = {(f'arg:{k}'): v.as_in_context(cpu()) for k, v in qarg_params.items()}
+    all_params.update({(f'aux:{k}'): v.as_in_context(cpu()) for k, v in aux_params.items()})
     net.load_dict(all_params, cast_dtype=True, dtype_source='saved')
     net.optimize_for(data_nd, backend=backend, skip_infer=True)
     return net
diff --git a/python/mxnet/contrib/tensorboard.py b/python/mxnet/contrib/tensorboard.py
index 8a4b591b28..b32bcf20c4 100644
--- a/python/mxnet/contrib/tensorboard.py
+++ b/python/mxnet/contrib/tensorboard.py
@@ -68,5 +68,5 @@ class LogMetricsCallback(object):
         name_value = param.eval_metric.get_name_value()
         for name, value in name_value:
             if self.prefix is not None:
-                name = '%s-%s' % (self.prefix, name)
+                name = f'{self.prefix}-{name}'
             self.summary_writer.add_scalar(name, value, global_step=param.epoch)
diff --git a/python/mxnet/contrib/text/embedding.py b/python/mxnet/contrib/text/embedding.py
index 952b664c63..c08ef84521 100644
--- a/python/mxnet/contrib/text/embedding.py
+++ b/python/mxnet/contrib/text/embedding.py
@@ -120,10 +120,10 @@ def get_pretrained_file_names(embedding_name=None):
 
     if embedding_name is not None:
         if embedding_name not in text_embedding_reg:
-            raise KeyError('Cannot find `embedding_name` %s. Use '
+            raise KeyError(f'Cannot find `embedding_name` {embedding_name}. Use '
                            '`get_pretrained_file_names('
                            'embedding_name=None).keys()` to get all the valid embedding '
-                           'names.' % embedding_name)
+                           'names.')
         return list(text_embedding_reg[embedding_name].pretrained_file_name_sha1.keys())
     else:
         return {embedding_name: list(embedding_cls.pretrained_file_name_sha1.keys())
@@ -259,9 +259,9 @@ class _TokenEmbedding(vocab.Vocabulary):
                 line_num += 1
                 elems = line.rstrip().split(elem_delim)
 
-                assert len(elems) > 1, 'At line %d of the pre-trained text embedding file: the ' \
-                                       'data format of the pre-trained token embedding file %s ' \
-                                       'is unexpected.' % (line_num, pretrained_file_path)
+                assert len(elems) > 1, f'At line {line_num} of the pre-trained text embedding file: the ' \
+                                       f'data format of the pre-trained token embedding file {pretrained_file_path} ' \
+                                       'is unexpected.'
 
                 token, elems = elems[0], [float(i) for i in elems[1:]]
 
@@ -269,14 +269,13 @@ class _TokenEmbedding(vocab.Vocabulary):
                     loaded_unknown_vec = elems
                     tokens.add(self.unknown_token)
                 elif token in tokens:
-                    warnings.warn('At line %d of the pre-trained token embedding file: the '
-                                  'embedding vector for token %s has been loaded and a duplicate '
-                                  'embedding for the  same token is seen and skipped.' %
-                                  (line_num, token))
+                    warnings.warn(f'At line {line_num} of the pre-trained token embedding file: the '
+                                  f'embedding vector for token {token} has been loaded and a duplicate '
+                                  'embedding for the  same token is seen and skipped.')
                 elif len(elems) == 1:
-                    warnings.warn('At line %d of the pre-trained text embedding file: token %s '
-                                  'with 1-dimensional vector %s is likely a header and is '
-                                  'skipped.' % (line_num, token, elems))
+                    warnings.warn(f'At line {line_num} of the pre-trained text embedding file: token {token} '
+                                  f'with 1-dimensional vector {elems} is likely a header and is '
+                                  'skipped.')
                 else:
                     if vec_len is None:
                         vec_len = len(elems)
@@ -285,10 +284,9 @@ class _TokenEmbedding(vocab.Vocabulary):
                         all_elems.extend([0] * vec_len)
                     else:
                         assert len(elems) == vec_len, \
-                            'At line %d of the pre-trained token embedding file: the dimension ' \
-                            'of token %s is %d but the dimension of previous tokens is %d. ' \
-                            'Dimensions of all the tokens must be the same.' \
-                            % (line_num, token, len(elems), vec_len)
+                            f'At line {line_num} of the pre-trained token embedding file: the dimension ' \
+                            f'of token {token} is {len(elems)} but the dimension of previous tokens is {vec_len}. ' \
+                            'Dimensions of all the tokens must be the same.'
                     all_elems.extend(elems)
                     self._idx_to_token.append(token)
                     self._token_to_idx[token] = len(self._idx_to_token) - 1
@@ -450,10 +448,10 @@ class _TokenEmbedding(vocab.Vocabulary):
             if token in self.token_to_idx:
                 indices.append(self.token_to_idx[token])
             else:
-                raise ValueError('Token %s is unknown. To update the embedding vector for an '
+                raise ValueError(f'Token {token} is unknown. To update the embedding vector for an '
                                  'unknown token, please specify it explicitly as the '
-                                 '`unknown_token` %s in `tokens`. This is to avoid unintended '
-                                 'updates.' % (token, self.idx_to_token[C.UNKNOWN_IDX]))
+                                 f'`unknown_token` {self.idx_to_token[C.UNKNOWN_IDX]} in `tokens`. '
+                                 'This is to avoid unintended updates.')
 
         array_fn = _mx_np.array if is_np_array() else nd.array
         self._idx_to_vec[array_fn(indices)] = new_vectors
@@ -471,10 +469,8 @@ class _TokenEmbedding(vocab.Vocabulary):
 
         embedding_name = cls.__name__.lower()
         if pretrained_file_name not in cls.pretrained_file_name_sha1:
-            raise KeyError('Cannot find pretrained file %s for token embedding %s. Valid '
-                           'pretrained files for embedding %s: %s' %
-                           (pretrained_file_name, embedding_name, embedding_name,
-                            ', '.join(cls.pretrained_file_name_sha1.keys())))
+            raise KeyError(f'Cannot find pretrained file {pretrained_file_name} for token embedding {embedding_name}. Valid '
+                           f'pretrained files for embedding {embedding_name}: {", ".join(cls.pretrained_file_name_sha1.keys())}')
 
 
 @register
diff --git a/python/mxnet/contrib/text/vocab.py b/python/mxnet/contrib/text/vocab.py
index f034e48d21..2872887c48 100644
--- a/python/mxnet/contrib/text/vocab.py
+++ b/python/mxnet/contrib/text/vocab.py
@@ -209,7 +209,7 @@ class Vocabulary(object):
         tokens = []
         for idx in indices:
             if not isinstance(idx, int) or idx > max_idx:
-                raise ValueError('Token index %d in the provided `indices` is invalid.' % idx)
+                raise ValueError(f'Token index {idx} in the provided `indices` is invalid.')
             tokens.append(self.idx_to_token[idx])
 
         return tokens[0] if to_reduce else tokens
diff --git a/python/mxnet/device.py b/python/mxnet/device.py
index 3a69ee7bd7..a2e1d3c9c7 100644
--- a/python/mxnet/device.py
+++ b/python/mxnet/device.py
@@ -103,7 +103,7 @@ class Device:
             self.device_id == other.device_id
 
     def __str__(self):
-        return '%s(%d)' % (self.device_type, self.device_id)
+        return f'{self.device_type}({self.device_id})'
 
     def __repr__(self):
         return self.__str__()
diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py
index a50b145e8a..e859151e0b 100644
--- a/python/mxnet/executor.py
+++ b/python/mxnet/executor.py
@@ -373,7 +373,7 @@ class Executor:
                 dst = self.arg_dict[name]
                 array.astype(dst.dtype).copyto(dst)
             elif not allow_extra_params:
-                raise ValueError('Find name \"%s\" that is not in the arguments' % name)
+                raise ValueError(f'Find name \"{name}\" that is not in the arguments')
 
         if aux_params is None:
             return
@@ -383,4 +383,4 @@ class Executor:
                 dst = self.aux_dict[name]
                 array.astype(dst.dtype).copyto(dst)
             elif not allow_extra_params:
-                raise ValueError('Find name %s that is not in the auxiliary states' % name)
+                raise ValueError(f'Find name {name} that is not in the auxiliary states')
diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index cff346b9f4..0b56a326f2 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -59,7 +59,7 @@ def _block_scope(block):
     if counter is not None:
         count = counter.get(name, 0)
         counter[name] = count + 1
-        name = '%s%d'%(name, count)
+        name = f'{name}{count}'
     counter_token = _naming_counter.set({})
     prefix_token = _prefix.set(_prefix.get() + name + '_')
     with _name.Prefix(_prefix.get()):
@@ -464,7 +464,7 @@ class Block:
         else:
             filename = None
         params = self.collect_params()
-        error_str = "file: %s" % (filename) if filename else "param_dict"
+        error_str = f"file: {filename}" if filename else "param_dict"
         loaded = {k[4:] if k.startswith('arg:') or k.startswith('aux:') else k: v \
                   for k, v in param_dict.items()}
 
@@ -475,18 +475,16 @@ class Block:
 
             for name, param in params.items():
                 assert any(p in loaded for p in params_inv[param]), \
-                    "Parameter '%s' is missing in '%s', which contains parameters: %s. " \
-                    "Set allow_missing=True to ignore missing parameters."%(
-                        name, error_str, _brief_print_list(loaded.keys()))
+                f"Parameter '{name}' is missing in '{error_str}', which contains parameters: {_brief_print_list(loaded.keys())}. " \
+                    "Set allow_missing=True to ignore missing parameters."
 
         if device is None:
             device = _device.current_device()
         for name in loaded:
             if not ignore_extra and name not in params:
                 raise ValueError(
-                    "Parameter '%s' loaded from '%s' is not present in Dict, " \
-                    "which contains parameters %s. Set ignore_extra=True to ignore. "%(
-                        name, error_str, _brief_print_list(params.keys())))
+                    f"Parameter '{name}' loaded from '{error_str}' is not present in Dict, " \
+                    f"which contains parameters {_brief_print_list(params.keys())}. Set ignore_extra=True to ignore. ")
             if name in params:
                 param = loaded[name]
                 if isinstance(param, np.ndarray):
@@ -948,7 +946,7 @@ class Block:
                 class_name = block.__class__.__name__
                 block_idx = len(summary) - 1
 
-                m_key = '%s-%i' % (class_name, block_idx+1)
+                m_key = f'{class_name}-{block_idx+1}'
                 summary[m_key] = OrderedDict()
                 summary[m_key]['output_shape'] = _get_shape_str(outputs)
 
@@ -1140,20 +1138,20 @@ class HybridBlock(Block):
         expected_names = set(input_names)
         for name in expected_names:
             assert name in param_names or name in data_names, \
-                "Unknown input to HybridBlock: %s" %name
+                f"Unknown input to HybridBlock: {name}"
 
         used_data_names = [i for i in data_names if i in expected_names]
         if len(used_data_names) != len(data_names):
-            unused = ', '.join(['%d-th'%i for name, i in data_names.items()
+            unused = ', '.join([f'{i}-th' for name, i in data_names.items()
                                 if name not in expected_names])
-            warnings.warn("The %s input to HybridBlock is not used by any "
-                          "computation. Is this intended?"%unused, stacklevel=4)
+            warnings.warn(f"The {unused} input to HybridBlock is not used by "
+                          "any computation. Is this intended?", stacklevel=4)
 
         used_param_names = [i for i in param_names if i in expected_names]
         if len(used_param_names) != len(param_names):
             unused = ', '.join(list(param_names - set(used_param_names)))
-            warnings.warn("Parameter %s is not used by any computation. "
-                          "Is this intended?"%unused, stacklevel=4)
+            warnings.warn(f"Parameter {unused} is not used by any computation. "
+                          "Is this intended?", stacklevel=4)
 
         args, _ = _flatten(args, "input")
         try:
@@ -1404,9 +1402,8 @@ class HybridBlock(Block):
         if not isinstance(block, HybridBlock):
             raise ValueError(
                 "Children of HybridBlock must also be HybridBlock, " \
-                "but %s has type %s. If you are using Sequential, " \
-                "please try HybridSequential instead."%(
-                    str(block), str(type(block))))
+                f"but {str(block)} has type {str(type(block))}. If you are using Sequential, " \
+                "please try HybridSequential instead.")
         super(HybridBlock, self).register_child(block, name)
         if self._active:
             warnings.warn("Currently the model has been hybridized. Automatically deactivate the hybridization \
@@ -1549,8 +1546,9 @@ class HybridBlock(Block):
         for var in sym.get_inputs():
             if var.name in rename_map:
                 var._set_attr(name=rename_map[var.name])
-
-        sym_filename = '%s-symbol.json' % (path if path is not None else "")
+        
+        path_string = path if path is not None else ""
+        sym_filename = f'{path_string}-symbol.json'
         if path is not None:
             sym.save(sym_filename, remove_amp_cast=remove_amp_cast)
 
@@ -1566,8 +1564,8 @@ class HybridBlock(Block):
                         warnings.warn('Parameter "{name}" is not found in the graph. '
                                       .format(name=name), stacklevel=3)
                     else:
-                        arg_dict['aux:%s'%name] = param._reduce()
-        params_filename = '%s-%04d.params'%((path if path is not None else ""), epoch)
+                        arg_dict[f'aux:{name}'] = param._reduce()
+        params_filename = f'{path_string}-{epoch:04d}.params'
 
         if path is not None:
             if is_np_array():
@@ -1788,7 +1786,7 @@ class SymbolBlock(HybridBlock):
         input_names = set()
         for i in syms:
             assert len(i.get_internals().list_outputs()) == 1, \
-                "Input symbols must be variable, but %s is an output of operators"%str(i)
+                f"Input symbols must be variable, but {str(i)} is an output of operators"
             input_names.add(i.name)
 
         # check if any symbol is row_sparse
@@ -1797,8 +1795,8 @@ class SymbolBlock(HybridBlock):
         for i in out:
             for j in i.get_internals():
                 assert(j.attr("__storage_type__") != str(row_sparse_storage)), \
-                    "SymbolBlock doesn't support Parameter '%s' because its storage " \
-                    "type is 'row_sparse'." % j.name
+                    f"SymbolBlock doesn't support Parameter '{j.name}' because its storage " \
+                    "type is 'row_sparse'."
         if len(out) > 1:
             out = symbol.Group(out, _check_same_symbol_type(out))
         else:
@@ -1866,7 +1864,7 @@ class SymbolBlock(HybridBlock):
 
         assert isinstance(x, Symbol), \
             "HybridBlock requires the first argument to forward be either " \
-            "Symbol or NDArray, but got %s"%type(x)
+            f"Symbol or NDArray, but got {type(x)}"
         args, in_fmt = _flatten([x] + list(args), "input")
         assert in_fmt == self._in_format, "Invalid input format"
         ret = copy.copy(self._cached_graph[1])
diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py
index 7b212c03c3..0a8e879ee8 100644
--- a/python/mxnet/gluon/contrib/estimator/estimator.py
+++ b/python/mxnet/gluon/contrib/estimator/estimator.py
@@ -167,9 +167,9 @@ class Estimator(object):
                                  "refer to mxnet.Device:{}".format(devices))
             for device in devices:
                 assert device in available_gpus or str(device).startswith('cpu'), \
-                    "%s is not available, please make sure " \
-                    "your device is in one of: mx.cpu(), %s" % \
-                    (device, ", ".join([str(device) for device in available_gpus]))
+                    "{} is not available, please make sure " \
+                    "your device is in one of: mx.cpu(), {}".format(
+                        device, ', '.join([str(device) for device in available_gpus]))
         else:
             # provide default device
             if gpus > 0:
diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py
index 99a50d567f..ea37449960 100644
--- a/python/mxnet/gluon/contrib/estimator/event_handler.py
+++ b/python/mxnet/gluon/contrib/estimator/event_handler.py
@@ -279,11 +279,11 @@ class LoggingHandler(TrainBegin, TrainEnd, EpochBegin, EpochEnd, BatchBegin, Bat
 
     def train_end(self, estimator, *args, **kwargs):
         train_time = time.time() - self.train_start
-        msg = 'Train finished using total %ds with %d epochs. ' % (train_time, self.current_epoch)
+        msg = f'Train finished using total {train_time}s with {self.current_epoch} epochs. '
         # log every result in train stats including train/validation loss & metrics
         for metric in self.metrics:
             name, value = metric.get()
-            msg += '%s: %.4f, ' % (name, value)
+            msg += f'{name}: {value:.4f}, '
         estimator.logger.info(msg.rstrip(', '))
 
     def batch_begin(self, estimator, *args, **kwargs):
@@ -293,17 +293,17 @@ class LoggingHandler(TrainBegin, TrainEnd, EpochBegin, EpochEnd, BatchBegin, Bat
     def batch_end(self, estimator, *args, **kwargs):
         if isinstance(self.log_interval, int):
             batch_time = time.time() - self.batch_start
-            msg = '[Epoch %d][Batch %d]' % (self.current_epoch, self.batch_index)
+            msg = f'[Epoch {self.current_epoch}][Batch {self.batch_index}]'
             self.processed_samples += kwargs['batch'][0].shape[0]
-            msg += '[Samples %s] ' % (self.processed_samples)
+            msg += f'[Samples {self.processed_samples}] '
             self.log_interval_time += batch_time
             if self.batch_index % self.log_interval == 0:
-                msg += 'time/interval: %.3fs ' % self.log_interval_time
+                msg += f'time/interval: {self.log_interval_time:.3f}s '
                 self.log_interval_time = 0
                 for metric in self.metrics:
                     # only log current training loss & metric after each interval
                     name, value = metric.get()
-                    msg += '%s: %.4f, ' % (name, value)
+                    msg += f'{name}: {value:.4f}, '
                 estimator.logger.info(msg.rstrip(', '))
         self.batch_index += 1
 
@@ -324,10 +324,10 @@ class LoggingHandler(TrainBegin, TrainEnd, EpochBegin, EpochEnd, BatchBegin, Bat
     def epoch_end(self, estimator, *args, **kwargs):
         if isinstance(self.log_interval, int) or self.log_interval == 'epoch':
             epoch_time = time.time() - self.epoch_start
-            msg = '[Epoch %d] Finished in %.3fs, ' % (self.current_epoch, epoch_time)
+            msg = f'[Epoch {self.current_epoch}] Finished in {epoch_time:.3f}s, '
             for monitor in self.metrics:
                 name, value = monitor.get()
-                msg += '%s: %.4f, ' % (name, value)
+                msg += f'{name}: {value:.4f}, '
             estimator.logger.info(msg.rstrip(', '))
         self.current_epoch += 1
         self.batch_index = 0
@@ -404,10 +404,10 @@ class CheckpointHandler(TrainBegin, BatchEnd, EpochEnd):
         self.saved_checkpoints = []
         if self.save_best:
             if mode not in ['auto', 'min', 'max']:
-                warnings.warn('ModelCheckpoint mode %s is unknown, '
+                warnings.warn(f'ModelCheckpoint mode {mode} is unknown, '
                               'fallback to auto mode. CheckpointHandler will use'
                               'max mode for f1 and accuracy metric comparison and '
-                              'use min mode other wise' % (mode),
+                              'use min mode other wise',
                               RuntimeWarning)
                 mode = 'auto'
 
@@ -477,12 +477,11 @@ class CheckpointHandler(TrainBegin, BatchEnd, EpochEnd):
         else:
             save_epoch_number = self.current_epoch
             save_batch_number = self.current_batch
-        prefix = "%s-epoch%dbatch%d" % (self.model_prefix, save_epoch_number, save_batch_number)
+        prefix = f"{self.model_prefix}-epoch{save_epoch_number}batch{save_batch_number}"
         self._save_params_and_trainer(estimator, prefix)
         if self.verbose > 0:
-            estimator.logger.info('[Epoch %d] CheckpointHandler: trained total %d batches, '
-                                  'saving model at %s with prefix: %s',
-                                  self.current_epoch, self.current_batch + 1, self.model_dir, prefix)
+            estimator.logger.info(f'[Epoch {self.current_epoch}] CheckpointHandler: trained total {self.current_batch + 1} batches, '
+                                  f'saving model at {self.model_dir} with prefix: {prefix}')
 
         if self.save_best:
             monitor_name, monitor_value = self.monitor.get()
@@ -557,35 +556,33 @@ class CheckpointHandler(TrainBegin, BatchEnd, EpochEnd):
         if self.trained_epoch == -1:
             msg = "CheckpointHandler: No checkpoint found, training from scratch for "
             if estimator.max_batch:
-                msg += "%d batches" % estimator.max_batch
+                msg += f"{estimator.max_batch} batches"
             else:
-                msg += "%d epochs" % estimator.max_epoch
+                msg += f"{estimator.max_epoch} epochs"
             estimator.logger.info(msg)
         else:
-            msg = "CheckpointHandler: Checkpoint resumed from epoch %d batch %d, " \
-                  "continue to train for " % (self.trained_epoch, self.trained_batch)
+            msg = f"CheckpointHandler: Checkpoint resumed from epoch {self.trained_epoch} batch {self.trained_batch}, " \
+                  "continue to train for "
             # change maximum number of epoch or batch to train if resumed from epoch checkpoint
             if estimator.max_epoch:
                 if self.trained_epoch >= estimator.max_epoch - 1:
-                    raise ValueError("Found checkpoint with maximum number of epoch %d reached, please specify "
-                                     "resume_from_checkpoint=False (default value) if you wan to train from scratch."
-                                     % estimator.max_epoch)
+                    raise ValueError(f"Found checkpoint with maximum number of epoch {estimator.max_epoch} reached, please specify "
+                                     "resume_from_checkpoint=False (default value) if you wan to train from scratch.")
                 estimator.max_epoch = estimator.max_epoch - self.trained_epoch - 1
-                msg += "%d epochs " % estimator.max_epoch
+                msg += f"{estimator.max_epoch} epochs "
             if estimator.max_batch:
                 if self.trained_batch >= estimator.max_batch - 1:
-                    raise ValueError("Found checkpoint with maximum number of batch %d reached, please specify"
-                                     "resume_from_checkpoint=False (default value) if you wan to train from scratch."
-                                     % self.trained_batch)
+                    raise ValueError(f"Found checkpoint with maximum number of batch {self.trained_batch} reached, please specify"
+                                     "resume_from_checkpoint=False (default value) if you wan to train from scratch.")
                 estimator.max_batch = estimator.max_batch - self.trained_batch - 1
-                msg += "%d batches " % estimator.max_batch
+                msg += f"{estimator.max_batch} batches "
             # load checkpoint
-            param_file = "%s-epoch%dbatch%d.params" % (self.model_prefix, self.trained_epoch, self.trained_batch)
+            param_file = "{}-epoch{}batch{}.params".format(self.model_prefix, self.trained_epoch, self.trained_batch)
             param_file = os.path.join(self.model_dir, param_file)
-            trainer_file = "%s-epoch%dbatch%d.states" % (self.model_prefix, self.trained_epoch, self.trained_batch)
+            trainer_file = "{}-epoch{}batch{}.states".format(self.model_prefix, self.trained_epoch, self.trained_batch)
             trainer_file = os.path.join(self.model_dir, trainer_file)
-            assert os.path.exists(param_file), "Failed to load checkpoint, %s does not exist" % param_file
-            assert os.path.exists(trainer_file), "Failed to load checkpoint, %s does not exist" % trainer_file
+            assert os.path.exists(param_file), f"Failed to load checkpoint, {param_file} does not exist"
+            assert os.path.exists(trainer_file), f"Failed to load checkpoint, {trainer_file} does not exist"
             estimator.net.load_parameters(param_file, ctx=estimator.device)
             estimator.trainer.load_states(trainer_file)
             estimator.logger.warning(msg)
@@ -655,10 +652,10 @@ class EarlyStoppingHandler(TrainBegin, EpochEnd, TrainEnd):
         self.stop_training = False
 
         if mode not in ['auto', 'min', 'max']:
-            warnings.warn('EarlyStopping mode %s is unknown, '
+            warnings.warn(f'EarlyStopping mode {mode} is unknown, '
                           'fallback to auto mode. CheckpointHandler will use'
                           'max mode for f1 and accuracy metric comparison and '
-                          'use min mode other wise' % (mode),
+                          'use min mode other wise',
                           RuntimeWarning)
             mode = 'auto'
 
diff --git a/python/mxnet/gluon/data/_internal.py b/python/mxnet/gluon/data/_internal.py
index 925cf54535..c6401dc2a4 100644
--- a/python/mxnet/gluon/data/_internal.py
+++ b/python/mxnet/gluon/data/_internal.py
@@ -199,13 +199,12 @@ def _make_internal_datasets(handle):
         [py_str(arg_types[i]) for i in range(narg)],
         [py_str(arg_descs[i]) for i in range(narg)])
 
-    doc_str = ('%s\n\n' +
-               '%s\n' +
+    doc_str = (f'{desc.value}\n\n' +
+               f'{param_str}\n' +
                'Returns\n' +
                '-------\n' +
                'MXDataset\n'+
                '    The result dataset.')
-    doc_str = doc_str % (desc.value, param_str)
 
     def creator(*args, **kwargs):
         """Create a dataset.
@@ -243,7 +242,7 @@ def _make_internal_datasets(handle):
             ctypes.byref(dataset_handle)))
 
         if len(args):
-            raise TypeError('%s can only accept keyword arguments' % iter_name)
+            raise TypeError(f'{iter_name} can only accept keyword arguments')
 
         return MXDataset(dataset_handle, **kwargs)
 
@@ -287,13 +286,12 @@ def _make_internal_batchify_functions(handle):
         [py_str(arg_types[i]) for i in range(narg)],
         [py_str(arg_descs[i]) for i in range(narg)])
 
-    doc_str = ('%s\n\n' +
-               '%s\n' +
+    doc_str = (f'{desc.value}\n\n' +
+               f'{param_str}\n' +
                'Returns\n' +
                '-------\n' +
                'MXBatchifyFunction\n'+
                '    The result batchify function.')
-    doc_str = doc_str % (desc.value, param_str)
 
     def creator(*args, **kwargs):
         """Create an iterator.
@@ -331,7 +329,7 @@ def _make_internal_batchify_functions(handle):
             ctypes.byref(batchify_fn_handle)))
 
         if len(args):
-            raise TypeError('%s can only accept keyword arguments' % bf_name)
+            raise TypeError(f'{bf_name} can only accept keyword arguments')
 
         return MXBatchifyFunction(batchify_fn_handle, creator_name=bf_name, **kwargs)
 
diff --git a/python/mxnet/gluon/data/batchify.py b/python/mxnet/gluon/data/batchify.py
index de1f52d8e7..050bf471ef 100644
--- a/python/mxnet/gluon/data/batchify.py
+++ b/python/mxnet/gluon/data/batchify.py
@@ -349,13 +349,13 @@ class Group(object):
         if isinstance(fn, (list, tuple)):
             assert len(args) == 0, 'Input pattern not understood. The input of Group can be ' \
                                    'Group(A, B, C) or Group([A, B, C]) or Group((A, B, C)). ' \
-                                   'Received fn=%s, args=%s' % (str(fn), str(args))
+                                   f'Received fn={str(fn)}, args={str(args)}'
             self._fn = fn
         else:
             self._fn = (fn, ) + args
         for i, ele_fn in enumerate(self._fn):
             assert hasattr(ele_fn, '__call__'), 'Batchify functions must be callable! ' \
-                                                'type(fn[%d]) = %s' % (i, str(type(ele_fn)))
+                                                f'type(fn[{i}]) = {str(type(ele_fn))}'
 
     def __call__(self, data):
         """Batchify the input data.
diff --git a/python/mxnet/gluon/data/dataset.py b/python/mxnet/gluon/data/dataset.py
index 7c45d9e09e..946550b83a 100644
--- a/python/mxnet/gluon/data/dataset.py
+++ b/python/mxnet/gluon/data/dataset.py
@@ -83,7 +83,7 @@ class Dataset(object):
         Dataset
             The result dataset.
         """
-        assert index < num_shards, 'Shard index of out bound: %d out of %d'%(index, num_shards)
+        assert index < num_shards, f'Shard index of out bound: {index} out of {num_shards}'
         assert num_shards > 0, 'Number of shards must be greater than 0'
         assert index >= 0, 'Index must be non-negative'
         length = len(self)
@@ -132,8 +132,7 @@ class Dataset(object):
         """
         from . import Sampler
         if not isinstance(sampler, Sampler):
-            raise TypeError('Invalid sampler type: %s. Expected gluon.data.Sampler instead.'%
-                            type(sampler))
+            raise TypeError(f'Invalid sampler type: {type(sampler)}. Expected gluon.data.Sampler instead.')
         return _SampledDataset(self, sampler)
 
     def transform(self, fn, lazy=True):
@@ -356,8 +355,8 @@ class ArrayDataset(Dataset):
         self._data = []
         for i, data in enumerate(args):
             assert len(data) == self._length, \
-                "All arrays must have the same length; array[0] has length %d " \
-                "while array[%d] has %d." % (self._length, i+1, len(data))
+                f"All arrays must have the same length; array[0] has length {self._length} " \
+                f"while array[{i+1}] has {len(data)}."
             if isinstance(data, ndarray.NDArray) and len(data.shape) == 1:
                 data = data.asnumpy()
             self._data.append(data)
diff --git a/python/mxnet/gluon/data/sampler.py b/python/mxnet/gluon/data/sampler.py
index 143f54e470..acf49099a9 100644
--- a/python/mxnet/gluon/data/sampler.py
+++ b/python/mxnet/gluon/data/sampler.py
@@ -148,7 +148,7 @@ class BatchSampler(Sampler):
             else:
                 raise ValueError(
                     "last_batch must be one of 'keep', 'discard', or 'rollover', " \
-                    "but got %s"%self._last_batch)
+                    f"but got {self._last_batch}")
 
     def __len__(self):
         if self._last_batch == 'keep':
@@ -159,7 +159,7 @@ class BatchSampler(Sampler):
             return (len(self._prev) + len(self._sampler)) // self._batch_size
         raise ValueError(
             "last_batch must be one of 'keep', 'discard', or 'rollover', " \
-            "but got %s"%self._last_batch)
+            f"but got {self._last_batch}")
 
 
 class IntervalSampler(Sampler):
diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py
index 028d846c6b..70e3045e45 100644
--- a/python/mxnet/gluon/data/vision/datasets.py
+++ b/python/mxnet/gluon/data/vision/datasets.py
@@ -327,7 +327,7 @@ class ImageFolderDataset(dataset.Dataset):
         for folder in sorted(os.listdir(root)):
             path = os.path.join(root, folder)
             if not os.path.isdir(path):
-                warnings.warn('Ignoring %s, which is not a directory.'%path, stacklevel=3)
+                warnings.warn(f'Ignoring {path}, which is not a directory.', stacklevel=3)
                 continue
             label = len(self.synsets)
             self.synsets.append(folder)
@@ -335,8 +335,7 @@ class ImageFolderDataset(dataset.Dataset):
                 filename = os.path.join(path, filename)
                 ext = os.path.splitext(filename)[1]
                 if ext.lower() not in self._exts:
-                    warnings.warn('Ignoring %s of type %s. Only support %s'%(
-                        filename, ext, ', '.join(self._exts)))
+                    warnings.warn(f'Ignoring {filename} of type {ext}. Only support {", ".join(self._exts)}')
                     continue
                 self.items.append((filename, label))
 
diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
index 14195b8e09..36a45c4be9 100644
--- a/python/mxnet/gluon/loss.py
+++ b/python/mxnet/gluon/loss.py
@@ -510,9 +510,9 @@ class CTCLoss(Loss):
 
     def __init__(self, layout='NTC', label_layout='NT', weight=None, **kwargs):
         assert layout in ['NTC', 'TNC'],\
-            "Only 'NTC' and 'TNC' layouts for pred are supported. Got: %s" % layout
+            f"Only 'NTC' and 'TNC' layouts for pred are supported. Got: {layout}"
         assert label_layout in ['NT', 'TN'],\
-            "Only 'NT' and 'TN' layouts for label are supported. Got: %s" % label_layout
+            f"Only 'NT' and 'TN' layouts for label are supported. Got: {label_layout}"
         self._layout = layout
         self._label_layout = label_layout
         batch_axis = label_layout.find('N')
@@ -713,8 +713,7 @@ class LogisticLoss(Loss):
         super(LogisticLoss, self).__init__(weight, batch_axis, **kwargs)
         self._label_format = label_format
         if self._label_format not in ["signed", "binary"]:
-            raise ValueError("label_format can only be signed or binary, received %s."
-                             % label_format)
+            raise ValueError(f"label_format can only be signed or binary, received {label_format}.")
 
     def forward(self, pred, label, sample_weight=None):
         label = npx.reshape_like(label, pred)
diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py
index 974ce0cd6b..67b52b9a4b 100644
--- a/python/mxnet/gluon/metric.py
+++ b/python/mxnet/gluon/metric.py
@@ -482,7 +482,7 @@ class TopKAccuracy(EvalMetric):
             output_names=output_names, label_names=label_names)
         self.top_k = top_k
         assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1'
-        self.name += '_%d' % self.top_k
+        self.name += f'_{self.top_k}'
 
     def update(self, labels, preds):
         """Updates the internal evaluation result.
@@ -537,7 +537,7 @@ def predict_with_threshold(pred, threshold=0.5):
     elif isinstance(threshold, (numpy.ndarray, ndarray.ndarray.NDArray)):
         num_classes = pred.shape[-1]
         assert threshold.shape[-1] == num_classes, \
-                "shape mismatch: %s vs. %s"%(pred.shape[-1], threshold.shape[-1])
+                f"shape mismatch: {pred.shape[-1]} vs. {threshold.shape[-1]}"
         return pred > threshold
     else:
         raise ValueError("{} is a wrong type for threshold!".format(type(threshold)))
@@ -1411,7 +1411,7 @@ class CrossEntropy(EvalMetric):
         num = 0
         for label, pred in zip(labels, preds):
             assert label.size == pred.size/pred.shape[-1], \
-                "shape mismatch: %s vs. %s"%(label.shape, pred.shape)
+                f"shape mismatch: {label.shape} vs. {pred.shape}"
             label = label.reshape((label.size,))
             if self.from_logits:
                 pred = npx.softmax(pred, axis=self.axis)
@@ -1792,7 +1792,7 @@ class CustomMetric(EvalMetric):
         if name is None:
             name = feval.__name__
             if name.find('<') != -1:
-                name = 'custom(%s)' % name
+                name = f'custom({name})'
         super(CustomMetric, self).__init__(
             name, feval=feval,
             allow_extra_outputs=allow_extra_outputs,
diff --git a/python/mxnet/gluon/model_zoo/vision/__init__.py b/python/mxnet/gluon/model_zoo/vision/__init__.py
index cbce5190a7..e6176f488c 100644
--- a/python/mxnet/gluon/model_zoo/vision/__init__.py
+++ b/python/mxnet/gluon/model_zoo/vision/__init__.py
@@ -147,6 +147,5 @@ def get_model(name, **kwargs):
     name = name.lower()
     if name not in models:
         raise ValueError(
-            'Model %s is not supported. Available options are\n\t%s' % (
-                name, '\n\t'.join(sorted(models.keys()))))
+            "Model {} is not supported. Available options are\n\t{}".format(name, '\n\t'.join(sorted(models.keys()))))
     return models[name](**kwargs)
diff --git a/python/mxnet/gluon/model_zoo/vision/densenet.py b/python/mxnet/gluon/model_zoo/vision/densenet.py
index 088596d96a..a323413dc9 100644
--- a/python/mxnet/gluon/model_zoo/vision/densenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/densenet.py
@@ -142,7 +142,7 @@ def get_densenet(num_layers, pretrained=False, device=cpu(),
     net = DenseNet(num_init_features, growth_rate, block_config, **kwargs)
     if pretrained:
         from ..model_store import get_model_file
-        net.load_parameters(get_model_file('densenet%d'%(num_layers), root=root), device=device)
+        net.load_parameters(get_model_file(f'densenet{num_layers}', root=root), device=device)
     return net
 
 def densenet121(**kwargs):
diff --git a/python/mxnet/gluon/model_zoo/vision/mobilenet.py b/python/mxnet/gluon/model_zoo/vision/mobilenet.py
index 4b21a90051..f6fca8f2eb 100644
--- a/python/mxnet/gluon/model_zoo/vision/mobilenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/mobilenet.py
@@ -215,7 +215,7 @@ def get_mobilenet(multiplier, pretrained=False, device=cpu(),
         if version_suffix in ('1.00', '0.50'):
             version_suffix = version_suffix[:-1]
         net.load_parameters(
-            get_model_file('mobilenet%s' % version_suffix, root=root), device=device)
+            get_model_file(f'mobilenet{version_suffix}', root=root), device=device)
     return net
 
 
@@ -248,7 +248,7 @@ def get_mobilenet_v2(multiplier, pretrained=False, device=cpu(),
         if version_suffix in ('1.00', '0.50'):
             version_suffix = version_suffix[:-1]
         net.load_parameters(
-            get_model_file('mobilenetv2_%s' % version_suffix, root=root), device=device)
+            get_model_file(f'mobilenetv2_{version_suffix}', root=root), device=device)
     return net
 
 
diff --git a/python/mxnet/gluon/model_zoo/vision/resnet.py b/python/mxnet/gluon/model_zoo/vision/resnet.py
index fe0aa68667..01f1083189 100644
--- a/python/mxnet/gluon/model_zoo/vision/resnet.py
+++ b/python/mxnet/gluon/model_zoo/vision/resnet.py
@@ -379,17 +379,16 @@ def get_resnet(version, num_layers, pretrained=False, device=cpu(),
         Location for keeping the model parameters.
     """
     assert num_layers in resnet_spec, \
-        "Invalid number of layers: %d. Options are %s"%(
-            num_layers, str(resnet_spec.keys()))
+        f"Invalid number of layers: {num_layers}. Options are {str(resnet_spec.keys())}"
     block_type, layers, channels = resnet_spec[num_layers]
     assert version >= 1 and version <= 2, \
-        "Invalid resnet version: %d. Options are 1 and 2."%version
+        f"Invalid resnet version: {version}. Options are 1 and 2."
     resnet_class = resnet_net_versions[version-1]
     block_class = resnet_block_versions[version-1][block_type]
     net = resnet_class(block_class, layers, channels, **kwargs)
     if pretrained:
         from ..model_store import get_model_file
-        net.load_parameters(get_model_file('resnet%d_v%d'%(num_layers, version),
+        net.load_parameters(get_model_file(f'resnet{num_layers}_v{version}',
                                            root=root), device=device)
     return net
 
diff --git a/python/mxnet/gluon/model_zoo/vision/squeezenet.py b/python/mxnet/gluon/model_zoo/vision/squeezenet.py
index 571e6dc38a..355cfbff30 100644
--- a/python/mxnet/gluon/model_zoo/vision/squeezenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/squeezenet.py
@@ -134,7 +134,7 @@ def get_squeezenet(version, pretrained=False, device=cpu(),
     net = SqueezeNet(version, **kwargs)
     if pretrained:
         from ..model_store import get_model_file
-        net.load_parameters(get_model_file('squeezenet%s'%version, root=root), device=device)
+        net.load_parameters(get_model_file(f'squeezenet{version}', root=root), device=device)
     return net
 
 @wrap_ctx_to_device_func
diff --git a/python/mxnet/gluon/model_zoo/vision/vgg.py b/python/mxnet/gluon/model_zoo/vision/vgg.py
index dd657a6f75..88a928c252 100644
--- a/python/mxnet/gluon/model_zoo/vision/vgg.py
+++ b/python/mxnet/gluon/model_zoo/vision/vgg.py
@@ -116,7 +116,7 @@ def get_vgg(num_layers, pretrained=False, device=cpu(),
     if pretrained:
         from ..model_store import get_model_file
         batch_norm_suffix = '_bn' if kwargs.get('batch_norm') else ''
-        net.load_parameters(get_model_file('vgg%d%s'%(num_layers, batch_norm_suffix),
+        net.load_parameters(get_model_file(f'vgg{num_layers}{batch_norm_suffix}',
                                            root=root), device=device)
     return net
 
diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py
index 883b714b16..4622f562c2 100644
--- a/python/mxnet/gluon/nn/basic_layers.py
+++ b/python/mxnet/gluon/nn/basic_layers.py
@@ -95,8 +95,8 @@ class Sequential(Block):
         """
         if self._children and all(isinstance(c(), HybridBlock) for c in self._children.values()):
             warnings.warn(
-                "All children of this Sequential layer '%s'\n are HybridBlocks. Consider "
-                "using HybridSequential for the best performance."%repr(self), stacklevel=2)
+                f"All children of this Sequential layer '{repr(self)}'\n are HybridBlocks. Consider "
+                "using HybridSequential for the best performance.", stacklevel=2)
         super(Sequential, self).hybridize(active, **kwargs)
 
 
@@ -858,7 +858,7 @@ class Lambda(Block):
             elif hasattr(npx, function):
                 self._func_impl = getattr(npx, function)
             else:
-                raise Exception("Function name %s is not found in np/npx." % function)
+                raise Exception(f"Function name {function} is not found in np/npx.")
             self._func_name = function
         elif callable(function):
             self._func_impl = function
@@ -907,7 +907,7 @@ class HybridLambda(HybridBlock):
             elif hasattr(npx, function):
                 self._func = getattr(npx, function)
             else:
-                raise Exception("Function name %s is not found in np/npx." % function)
+                raise Exception(f"Function name {function} is not found in np/npx.")
             self._func_name = function
         elif callable(function):
             self._func = function
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index 1b396490a7..df910a698d 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -130,9 +130,9 @@ class Parameter(object):
         # sparse related storage type information
         valid_stypes = ['default', 'row_sparse', 'csr']
         assert grad_stype in valid_stypes, "grad_stype for Parameter must be " \
-            "one of 'default', 'row_sparse', or 'csr', but got '%s'" % (grad_stype)
+            f"one of 'default', 'row_sparse', or 'csr', but got '{grad_stype}'"
         assert stype in valid_stypes, "stype for Parameter must be " \
-            "one of 'default', 'row_sparse', or 'csr', but got '%s'" % (stype)
+            f"one of 'default', 'row_sparse', or 'csr', but got '{stype}'"
         self._grad_stype = grad_stype
         self._stype = stype
 
@@ -151,7 +151,7 @@ class Parameter(object):
     @grad_req.setter
     def grad_req(self, req):
         assert req in ['write', 'add', 'null'], \
-            "grad_req must be one of 'write', 'add', or 'null', but got '%s'"%req
+            f"grad_req must be one of 'write', 'add', or 'null', but got '{req}'"
         if not self._differentiable:
             req = 'null'
         if self._grad_req == req:
@@ -200,8 +200,8 @@ class Parameter(object):
 
         assert len(self._shape) == len(new_shape) and \
             all(j in (-1, 0, i) for i, j in zip(new_shape, self._shape)), \
-            "Expected shape %s is incompatible with given shape %s for Parameter %s."%(
-                str(new_shape), str(self._shape), str(self.name))  # -1 means unknown dim size in np_shape mode
+            f"Expected shape {str(new_shape)} is incompatible with given shape {str(self._shape)} for Parameter {str(self.name)}." 
+            # -1 means unknown dim size in np_shape mode
 
         self._shape = new_shape
 
@@ -210,9 +210,8 @@ class Parameter(object):
         # trainer cannot be replaced for sparse params
         if self._stype != 'default' and self._trainer and trainer and self._trainer() is not trainer:
             raise RuntimeError(
-                "Failed to set the trainer for Parameter '%s' because it was already set. " \
-                "More than one trainers for a %s Parameter is not supported." \
-                %(self.name, self._stype))
+                f"Failed to set the trainer for Parameter '{self.name}' because it was already set. " \
+                f"More than one trainers for a {self._stype} Parameter is not supported.")
         if trainer is not None:
             self._trainer = weakref.ref(trainer)
         else:
@@ -233,33 +232,32 @@ class Parameter(object):
                 if idx is not None:
                     return arr_list[idx]
             raise RuntimeError(
-                "Parameter '%s' was not initialized on device %s. "
-                "It was only initialized on %s."%(
-                    self.name, str(device), str(self._device_list)))
+                f"Parameter '{self.name}' was not initialized on device {str(device)}. "
+                f"It was only initialized on {str(self._device_list)}.")
         if self._deferred_init:
             raise DeferredInitializationError(
-                "Parameter '%s' has not been initialized yet because initialization was " \
+                f"Parameter '{self.name}' has not been initialized yet because initialization was " \
                 "deferred. Actual initialization happens during the first forward pass. " \
                 "Please pass one batch of data through the network before accessing Parameters. " \
                 "You can also avoid deferred initialization by specifying in_units, " \
-                "num_features, etc., for network layers."%(self.name))
+                "num_features, etc., for network layers.")
         raise RuntimeError(
-            "Parameter '%s' has not been initialized. Note that " \
+            f"Parameter '{self.name}' has not been initialized. Note that " \
             "you should initialize parameters and create Trainer " \
             "with Block.collect_params() instead of Block.params " \
             "because the later does not include Parameters of " \
-            "nested child Blocks"%(self.name))
+            "nested child Blocks")
 
     @wrap_ctx_to_device_func
     def _get_row_sparse(self, arr_list, device, row_id):
         """ Get row_sparse data from row_sparse parameters based on row_id. """
         # get row sparse params based on row ids
         if not isinstance(row_id, ndarray.NDArray):
-            raise TypeError("row_id must have NDArray type, but %s is given"%(type(row_id)))
+            raise TypeError(f"row_id must have NDArray type, but {type(row_id)} is given")
         trainer = self._trainer() if self._trainer else None
         if not trainer:
-            raise RuntimeError("Cannot get row_sparse data for Parameter '%s' when no " \
-                               "Trainer is created with it."%self.name)
+            raise RuntimeError(f"Cannot get row_sparse data for Parameter '{self.name}' when no " \
+                               "Trainer is created with it.")
         results = self._check_and_get(arr_list, device)
 
         # fetch row sparse params from the trainer
@@ -289,9 +287,8 @@ class Parameter(object):
             unknown_dim_size = -1 if is_np_shape() else 0
             for self_dim, data_dim in zip(self.shape, data.shape):
                 assert self_dim in (unknown_dim_size, data_dim), \
-                    "Failed loading Parameter '%s' from saved params: " \
-                    "shape incompatible expected %s vs saved %s"%(
-                        self.name, str(self.shape), str(data.shape))
+                    f"Failed loading Parameter '{self.name}' from saved params: " \
+                    f"shape incompatible expected {str(self.shape)} vs saved {str(data.shape)}"
             self.shape = tuple(i if i != unknown_dim_size else j
                                for i, j in zip(self.shape, data.shape))
         if self.dtype:
@@ -302,10 +299,9 @@ class Parameter(object):
                     self.dtype = data.dtype
             else:
                 assert self.dtype == data.dtype, \
-                "Failed loading Parameter '%s' from saved params: " \
-                "dtype incompatible expected %s vs saved %s. " \
-                "Set cast_dtype=True to cast the dtype of saved params."%(
-                    self.name, str(self.dtype), str(data.dtype))
+                f"Failed loading Parameter '{self.name}' from saved params: " \
+                f"dtype incompatible expected {str(self.dtype)} vs saved {str(data.dtype)}. " \
+                "Set cast_dtype=True to cast the dtype of saved params."
         if self._stype != data.stype:
             data = data.tostype(self._stype)
         if isinstance(device, Device):
@@ -313,18 +309,16 @@ class Parameter(object):
         if self._data is None:
             if self._deferred_init:
                 assert device is None or set(device) == set(self._deferred_init[1]), \
-                    "Failed to load Parameter '%s' on %s because it was " \
-                    "previous initialized on %s."%(
-                        self.name, str(device), str(self.list_device()))
+                    f"Failed to load Parameter '{self.name}' on {str(device)} because it was " \
+                    f"previous initialized on {str(self.list_device())}."
                 device = self._deferred_init[1]
             elif device is None:
                 device = [cpu()]
             self._init_impl(data, device)
         else:
             assert device is None or set(device) == set(self.list_device()), \
-                "Failed to load Parameter '%s' on %s because it was " \
-                "previous initialized on %s."%(
-                    self.name, str(device), str(self.list_device()))
+                f"Failed to load Parameter '{self.name}' on {str(device)} because it was " \
+                f"previous initialized on {str(self.list_device())}."
             self.set_data(data)
         self._deferred_init = ()
 
@@ -336,10 +330,9 @@ class Parameter(object):
         self._deferred_init = ()
 
         assert shape_is_known(self.shape), \
-            "Cannot initialize Parameter '%s' because it has " \
-            "invalid shape: %s. Please specify in_units, " \
-            "in_channels, etc for `Block`s."%(
-                self.name, str(self.shape))
+            f"Cannot initialize Parameter '{self.name}' because it has " \
+            f"invalid shape: {str(self.shape)}. Please specify in_units, " \
+            "in_channels, etc for `Block`s."
 
         with autograd.pause(), dc.context(False):
             if data is None:
@@ -409,8 +402,8 @@ class Parameter(object):
             data = ndarray.zeros(self.shape, stype='row_sparse', ctx=device)
             trainer = self._trainer() if self._trainer else None
             if not trainer:
-                raise RuntimeError("Cannot reduce row_sparse data for Parameter '%s' when no " \
-                                   "Trainer is created with it."%self.name)
+                raise RuntimeError(f"Cannot reduce row_sparse data for Parameter '{self.name}' when no " \
+                                   "Trainer is created with it.")
             trainer._row_sparse_pull(self, data, all_row_ids, full_idx=True)
         return data
 
@@ -460,8 +453,8 @@ class Parameter(object):
         <NDArray 2x2 @gpu(1)>
         """
         if self._data is not None and not force_reinit:
-            warnings.warn("Parameter '%s' is already initialized, ignoring. " \
-                          "Set force_reinit=True to re-initialize."%self.name,
+            warnings.warn(f"Parameter '{self.name}' is already initialized, ignoring. " \
+                          "Set force_reinit=True to re-initialize.",
                           stacklevel=2)
             return
         self._data = self._grad = None
@@ -478,8 +471,8 @@ class Parameter(object):
             if self._allow_deferred_init:
                 self._deferred_init = (init, device, default_init, None)
                 return
-            raise ValueError("Cannot initialize Parameter '%s' because it has " \
-                             "invalid shape: %s."%(self.name, str(self.shape)))
+            raise ValueError(f"Cannot initialize Parameter '{self.name}' because it has " \
+                             f"invalid shape: {str(self.shape)}.")
 
         self._deferred_init = (init, device, default_init, None)
         self._finish_deferred_init()
@@ -505,8 +498,8 @@ class Parameter(object):
             init, _, default_init, data = self._deferred_init
             self._deferred_init = (init, device, default_init, data)
         else:
-            raise ValueError("Cannot reset device for Parameter '%s' because it "
-                             "has not been initialized."%self.name)
+            raise ValueError(f"Cannot reset device for Parameter '{self.name}' because it "
+                             "has not been initialized.")
 
     def reset_ctx(self, ctx):
         """This function has been deprecated. Please refer to ``Parameter.reset_device``."""
@@ -520,7 +513,7 @@ class Parameter(object):
 
         if self._data is None:
             assert self._deferred_init, \
-                "Parameter '%s' has not been initialized"%self.name
+                f"Parameter '{self.name}' has not been initialized"
             self._deferred_init = self._deferred_init[:3] + (data,)
             return
 
@@ -548,9 +541,8 @@ class Parameter(object):
         NDArray on row_id's device
         """
         if self._stype != 'row_sparse':
-            raise RuntimeError("Cannot return a copy of Parameter %s via row_sparse_data() " \
-                               "because its storage type is %s. Please use data() instead." \
-                               %(self.name, self._stype))
+            raise RuntimeError(f"Cannot return a copy of Parameter {self.name} via row_sparse_data() " \
+                               f"because its storage type is {self._stype}. Please use data() instead.")
         return self._get_row_sparse(self._data, row_id.device, row_id)
 
     def list_row_sparse_data(self, row_id):
@@ -568,9 +560,9 @@ class Parameter(object):
         list of NDArrays
         """
         if self._stype != 'row_sparse':
-            raise RuntimeError("Cannot return copies of Parameter '%s' on all devices via " \
-                               "list_row_sparse_data() because its storage type is %s. Please " \
-                               "use data() instead." % (self.name, self._stype))
+            raise RuntimeError(f"Cannot return copies of Parameter '{self.name}' on all devices via " \
+                               f"list_row_sparse_data() because its storage type is {self._stype}. Please " \
+                               "use data() instead.")
         return self._get_row_sparse(self._data, list, row_id)
 
     @wrap_ctx_to_device_func
@@ -589,9 +581,8 @@ class Parameter(object):
         NDArray on device
         """
         if self._stype != 'default':
-            raise RuntimeError("Cannot return a copy of Parameter '%s' on device %s via data() " \
-                               "because its storage type is %s. Please use row_sparse_data() " \
-                               "instead." % (self.name, str(device), self._stype))
+            raise RuntimeError(f"Cannot return a copy of Parameter '{self.name}' on device {str(device)} via data() " \
+                               f"because its storage type is {self._stype}. Please use row_sparse_data() instead.")
         data = self._check_and_get(self._data, device)
         dc.set_variable(data, self.var())
         return data
@@ -606,9 +597,9 @@ class Parameter(object):
         list of NDArrays
         """
         if self._stype != 'default':
-            raise RuntimeError("Cannot return copies of Parameter '%s' on all devices via " \
-                               "list_data() because its storage type is %s. Please use " \
-                               "row_sparse_data() instead." % (self.name, self._stype))
+            raise RuntimeError(f"Cannot return copies of Parameter '{self.name}' on all devices via " \
+                               f"list_data() because its storage type is {self._stype}. Please use " \
+                               "row_sparse_data() instead.")
         return self._check_and_get(self._data, list)
 
     def grad(self, device=None):
@@ -621,8 +612,8 @@ class Parameter(object):
         """
         if self._data is not None and self._grad is None:
             raise RuntimeError(
-                "Cannot get gradient array for Parameter '%s' " \
-                "because grad_req='null'"%(self.name))
+                f"Cannot get gradient array for Parameter '{self.name}' " \
+                "because grad_req='null'")
         return self._check_and_get(self._grad, device)
 
     def list_grad(self):
@@ -630,8 +621,8 @@ class Parameter(object):
         as :py:meth:`values`."""
         if self._data is not None and self._grad is None:
             raise RuntimeError(
-                "Cannot get gradient array for Parameter '%s' " \
-                "because grad_req='null'"%(self.name))
+                f"Cannot get gradient array for Parameter '{self.name}' " \
+                "because grad_req='null'")
         return self._check_and_get(self._grad, list)
 
     def list_ctx(self):
@@ -645,7 +636,7 @@ class Parameter(object):
         if self._data is None:
             if self._deferred_init:
                 return self._deferred_init[1]
-            raise RuntimeError("Parameter '%s' has not been initialized"%self.name)
+            raise RuntimeError(f"Parameter '{self.name}' has not been initialized")
         return self._device_list
 
     def zero_grad(self):
@@ -714,10 +705,9 @@ class Parameter(object):
                     continue
 
                 assert v is None or v == existing, \
-                    "Cannot retrieve Parameter '%s' because desired attribute " \
-                    "does not match with stored for attribute '%s': " \
-                    "desired '%s' vs stored '%s'."%(
-                        self.name, k, str(v), str(getattr(self, k)))
+                    f"Cannot retrieve Parameter '{self.name}' because desired attribute " \
+                    f"does not match with stored for attribute '{k}': " \
+                    f"desired '{str(v)}' vs stored '{str(getattr(self, k))}'."
             else:
                 setattr(self, k, v)
 
diff --git a/python/mxnet/gluon/rnn/conv_rnn_cell.py b/python/mxnet/gluon/rnn/conv_rnn_cell.py
index 7ac6db4049..1dec2458aa 100644
--- a/python/mxnet/gluon/rnn/conv_rnn_cell.py
+++ b/python/mxnet/gluon/rnn/conv_rnn_cell.py
@@ -71,7 +71,7 @@ class _BaseConvRNNCell(HybridRecurrentCell):
         self._h2h_kernel = (h2h_kernel,) * dims if isinstance(h2h_kernel, numeric_types) \
                            else h2h_kernel
         assert all(k % 2 == 1 for k in self._h2h_kernel), \
-            "Only support odd number, get h2h_kernel= %s" % str(h2h_kernel)
+            f"Only support odd number, get h2h_kernel= {str(h2h_kernel)}"
         self._h2h_dilate = (h2h_dilate,) * dims if isinstance(h2h_dilate, numeric_types) \
                            else h2h_dilate
 
diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py
index d189e39758..1619919b3c 100644
--- a/python/mxnet/gluon/rnn/rnn_cell.py
+++ b/python/mxnet/gluon/rnn/rnn_cell.py
@@ -901,7 +901,7 @@ class ModifierCell(HybridRecurrentCell):
     """
     def __init__(self, base_cell):
         assert not base_cell._modified, \
-            "Cell %s is already modified. One cell cannot be modified twice"%base_cell.name
+            f"Cell {base_cell.name} is already modified. One cell cannot be modified twice"
         base_cell._modified = True
         super(ModifierCell, self).__init__()
         self.base_cell = base_cell
diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py
index 42fabf0a5c..dab8f8dd9d 100644
--- a/python/mxnet/gluon/rnn/rnn_layer.py
+++ b/python/mxnet/gluon/rnn/rnn_layer.py
@@ -41,7 +41,7 @@ class _RNNLayer(HybridBlock):
                  dtype, use_sequence_length=False, **kwargs):
         super(_RNNLayer, self).__init__(**kwargs)
         assert layout in ('TNC', 'NTC'), \
-            "Invalid layout %s; must be one of ['TNC' or 'NTC']"%layout
+            f"Invalid layout {layout}; must be one of ['TNC' or 'NTC']"
         self._hidden_size = hidden_size
         self._projection_size = projection_size if projection_size else None
         self._num_layers = num_layers
@@ -149,8 +149,7 @@ class _RNNLayer(HybridBlock):
         for state, info in zip(states, self.state_info(batch_size)):
             if state.shape != info['shape']:
                 raise ValueError(
-                    "Invalid recurrent state shape. Expecting %s, got %s."%(
-                        str(info['shape']), str(state.shape)))
+                    f"Invalid recurrent state shape. Expecting {str(info['shape'])}, got {str(state.shape)}.")
         out = self._forward_kernel(inputs, states, sequence_length)
 
         # out is (output, state)
diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py
index afbe3e49aa..f0527b8c2a 100644
--- a/python/mxnet/gluon/trainer.py
+++ b/python/mxnet/gluon/trainer.py
@@ -86,7 +86,7 @@ class Trainer(object):
         if not isinstance(params, (list, tuple)):
             raise ValueError(
                 "First argument must be a list or dict of Parameters, " \
-                "got %s."%(type(params)))
+                f"got {type(params)}.")
         self._params = []
         # parameters to initialize on the kvstore
         self._contains_sparse_weight = False
@@ -96,7 +96,7 @@ class Trainer(object):
             if not isinstance(param, Parameter):
                 raise ValueError(
                     "First argument must be a list or dict of Parameters, " \
-                    "got list of %s."%(type(param)))
+                    f"got list of {type(param)}.")
             if param._uuid in self._param2idx:
                 # Shared parameters have same uuid; only need to store one of the shared versions
                 continue
@@ -138,8 +138,8 @@ class Trainer(object):
             device = param.list_device()
             assert devices is None or devices == device, \
                 "All Parameters must be initialized on the same set of devices, " \
-                "but Parameter %s is initialized on %s while previous Parameters " \
-                "are initialized on %s."%(param.name, str(device), str(devices))
+                f"but Parameter {param.name} is initialized on {str(device)} while previous Parameters " \
+                f"are initialized on {str(devices)}."
             devices = device
         return devices
 
@@ -464,13 +464,12 @@ class Trainer(object):
                 for data in param._check_and_get(param._data, list):
                     if not data._fresh_grad:
                         raise UserWarning(
-                            "Gradient of Parameter `%s` on device %s has not been updated "
+                            f"Gradient of Parameter `{param.name}` on device {str(data.device)} has not been updated "
                             "by backward since last `step`. This could mean a bug in your "
                             "model that made it only use a subset of the Parameters (Blocks) "
                             "for this iteration. If you are intentionally only using a subset, "
                             "call step with ignore_stale_grad=True to suppress this "
-                            "warning and skip updating of Parameters with stale gradient" \
-                            %(param.name, str(data.device)))
+                            "warning and skip updating of Parameters with stale gradient")
 
             if self._kvstore and self._update_on_kvstore:
                 continue
diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
index 36ce4b0bfd..5e22db9270 100644
--- a/python/mxnet/gluon/utils.py
+++ b/python/mxnet/gluon/utils.py
@@ -64,10 +64,9 @@ def split_data(data, num_slice, batch_axis=0, even_split=True):
     size = data.shape[batch_axis]
     if even_split and size % num_slice != 0:
         raise ValueError(
-            "data with shape %s cannot be evenly split into %d slices along axis %d. " \
-            "Use a batch size that's multiple of %d or set even_split=False to allow " \
-            "uneven partitioning of data."%(
-                str(data.shape), num_slice, batch_axis, num_slice))
+            f"data with shape {str(data.shape)} cannot be evenly split into {num_slice} slices " \
+            f"along axis {batch_axis}. Use a batch size that's multiple of {num_slice} " \
+            f"or set even_split=False to allow uneven partitioning of data.")
 
     n_each_section, extras = divmod(size, num_slice)
     section_sizes = [0] + (extras * [n_each_section + 1] +
@@ -392,7 +391,7 @@ def _brief_print_list(lst, limit=7):
     if len(lst) > limit:
         return _brief_print_list(lst[:limit//2], limit) + ', ..., ' + \
             _brief_print_list(lst[-limit//2:], limit)
-    return ', '.join(["'%s'"%str(i) for i in lst])
+    return ', '.join([f"'{str(i)}'" for i in lst])
 
 
 class HookHandle(object):
diff --git a/python/mxnet/image/detection.py b/python/mxnet/image/detection.py
index e62fb67ce8..257e6ed29a 100644
--- a/python/mxnet/image/detection.py
+++ b/python/mxnet/image/detection.py
@@ -693,7 +693,7 @@ class ImageDetIter(ImageIter):
     def _check_valid_label(self, label):
         """Validate label and its shape."""
         if len(label.shape) != 2 or label.shape[1] < 5:
-            msg = "Label with shape (1+, 5+) required, %s received." % str(label)
+            msg = f"Label with shape (1+, 5+) required, {str(label)} received."
             raise RuntimeError(msg)
         valid_label = np.where(np.logical_and(label[:, 0] >= 0, label[:, 3] > label[:, 1],
                                               label[:, 4] > label[:, 2]))[0]
@@ -730,8 +730,7 @@ class ImageDetIter(ImageIter):
         header_width = int(raw[0])
         obj_width = int(raw[1])
         if (raw.size - header_width) % obj_width != 0:
-            msg = "Label shape %s inconsistent with annotation width %d." \
-                %(str(raw.shape), obj_width)
+            msg = f"Label shape {str(raw.shape)} inconsistent with annotation width {obj_width}."
             raise RuntimeError(msg)
         out = np.reshape(raw[header_width:], (-1, obj_width))
         # remove bad ground-truths
@@ -848,12 +847,10 @@ class ImageDetIter(ImageIter):
         if not len(label_shape) == 2:
             raise ValueError('label_shape should have length 2')
         if label_shape[0] < self.label_shape[0]:
-            msg = 'Attempts to reduce label count from %d to %d, not allowed.' \
-                % (self.label_shape[0], label_shape[0])
+            msg = f'Attempts to reduce label count from {self.label_shape[0]} to {label_shape[0]}, not allowed.'
             raise ValueError(msg)
         if label_shape[1] != self.provide_label[0][1][2]:
-            msg = 'label_shape object width inconsistent: %d vs %d.' \
-                % (self.provide_label[0][1][2], label_shape[1])
+            msg = f'label_shape object width inconsistent: {self.provide_label[0][1][2]} vs {label_shape[1]}.'
             raise ValueError(msg)
 
     def draw_next(self, color=None, thickness=2, mean=None, std=None, clip=True,
diff --git a/python/mxnet/image/image.py b/python/mxnet/image/image.py
index 4ce4139a0a..645fa8821e 100644
--- a/python/mxnet/image/image.py
+++ b/python/mxnet/image/image.py
@@ -350,7 +350,7 @@ def _get_interp_method(interp, sizes=()):
     if interp == 10:
         return random.randint(0, 4)
     if interp not in (0, 1, 2, 3, 4):
-        raise ValueError('Unknown interp method %d' % interp)
+        raise ValueError(f'Unknown interp method {interp}')
     return interp
 
 
diff --git a/python/mxnet/initializer.py b/python/mxnet/initializer.py
index 987b357f57..0767af760d 100644
--- a/python/mxnet/initializer.py
+++ b/python/mxnet/initializer.py
@@ -262,10 +262,10 @@ class Initializer(object):
 
     def _init_default(self, name, _):
         raise ValueError(
-            'Unknown initialization pattern for %s. ' \
+            f'Unknown initialization pattern for {name}. ' \
             'Default initialization is now limited to '\
             '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \
-            'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
+            'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern')
 
     def __eq__(self, other):
         if not isinstance(other, Initializer):
@@ -345,15 +345,14 @@ class Load(object):
     def __call__(self, name, arr):
         if name in self.param:
             assert arr.shape == self.param[name].shape, \
-                'Parameter %s cannot be initialized from loading. '%name + \
-                'Shape mismatch, target %s vs loaded %s'%(str(arr.shape),
-                                                          self.param[name].shape)
+                f'Parameter {name} cannot be initialized from loading. ' + \
+                f'Shape mismatch, target {str(arr.shape)} vs loaded {self.param[name].shape}'
             arr[:] = self.param[name]
             if self.verbose:
                 logging.info('Initialized %s by loading', name)
         else:
             assert self.default_init is not None, \
-                "Cannot Initialize %s. Not found in loaded param "%name + \
+                f"Cannot Initialize {name}. Not found in loaded param " + \
                 "and no default Initializer is provided."
             self.default_init(name, arr)
             if self.verbose:
diff --git a/python/mxnet/io/io.py b/python/mxnet/io/io.py
index b0a01290c4..c3dc985137 100644
--- a/python/mxnet/io/io.py
+++ b/python/mxnet/io/io.py
@@ -72,8 +72,7 @@ class DataDesc(namedtuple('DataDesc', ['name', 'shape'])):
         return ret
 
     def __repr__(self):
-        return "DataDesc[%s,%s,%s,%s]" % (self.name, self.shape, self.dtype,
-                                          self.layout)
+        return f"DataDesc[{self.name},{self.shape},{self.dtype},{self.layout}]"
 
     @staticmethod
     def get_batch_axis(layout):
@@ -955,13 +954,12 @@ def _make_io_iterator(handle):
         [py_str(arg_types[i]) for i in range(narg)],
         [py_str(arg_descs[i]) for i in range(narg)])
 
-    doc_str = ('%s\n\n' +
-               '%s\n' +
+    doc_str = (f'{desc.value}\n\n' +
+               f'{param_str}\n' +
                'Returns\n' +
                '-------\n' +
                'MXDataIter\n'+
                '    The result iterator.')
-    doc_str = doc_str % (desc.value, param_str)
 
     def creator(*args, **kwargs):
         """Create an iterator.
@@ -1002,7 +1000,7 @@ def _make_io_iterator(handle):
             ctypes.byref(iter_handle)))
 
         if len(args):
-            raise TypeError('%s can only accept keyword arguments' % iter_name)
+            raise TypeError(f'{iter_name} can only accept keyword arguments')
 
         return MXDataIter(iter_handle, **kwargs)
 
diff --git a/python/mxnet/io/utils.py b/python/mxnet/io/utils.py
index 55ba34aea4..7785a97e53 100644
--- a/python/mxnet/io/utils.py
+++ b/python/mxnet/io/utils.py
@@ -45,7 +45,7 @@ def _init_data(data, allow_empty, default_name):
             data = OrderedDict([(default_name, data[0])])  # pylint: disable=redefined-variable-type
         else:
             data = OrderedDict(  # pylint: disable=redefined-variable-type
-                [('_%d_%s' % (i, default_name), d) for i, d in enumerate(data)])
+                [(f'_{i}_{default_name}', d) for i, d in enumerate(data)])
     if not isinstance(data, dict):
         raise TypeError("Input must be NDArray, numpy.ndarray, h5py.Dataset " +
                         "a list of them or dict with them as values")
@@ -54,7 +54,7 @@ def _init_data(data, allow_empty, default_name):
             try:
                 data[k] = array(v)
             except:
-                raise TypeError(("Invalid type '%s' for %s, " % (type(v), k)) +
+                raise TypeError((f"Invalid type '{type(v)}' for {k}, ") +
                                 "should be NDArray, numpy.ndarray or h5py.Dataset")
 
     return list(sorted(data.items()))
diff --git a/python/mxnet/kvstore/base.py b/python/mxnet/kvstore/base.py
index 39e84f52b2..f22fe1b8d9 100644
--- a/python/mxnet/kvstore/base.py
+++ b/python/mxnet/kvstore/base.py
@@ -234,11 +234,9 @@ class KVStoreBase(object):
         assert(isinstance(klass, type))
         name = klass.__name__.lower()
         if name in KVStoreBase.kv_registry:
-            warnings.warn('WARNING: New kvstore %s.%s is overriding '
-                          'existing kvstore %s.%s' %
-                          (klass.__module__, klass.__name__,
-                           KVStoreBase.kv_registry[name].__module__,
-                           KVStoreBase.kv_registry[name].__name__))
+            warnings.warn(f'WARNING: New kvstore {klass.__module__}.{klass.__name__} is overriding '
+                          'existing kvstore '
+                          f'{KVStoreBase.kv_registry[name].__module__}.{KVStoreBase.kv_registry[name].__name__}')
         KVStoreBase.kv_registry[name] = klass
         return klass
 
diff --git a/python/mxnet/kvstore/kvstore_server.py b/python/mxnet/kvstore/kvstore_server.py
index f7fd5350f7..d91a651160 100644
--- a/python/mxnet/kvstore/kvstore_server.py
+++ b/python/mxnet/kvstore/kvstore_server.py
@@ -58,8 +58,7 @@ class KVStoreServer(object):
                     raise
                 self.kvstore.set_optimizer(optimizer)
             else:
-                print("server %d, unknown command (%d, %s)" % (
-                    self.kvstore.rank, cmd_id, cmd_body))
+                print(f"server {self.kvstore.rank}, unknown command ({cmd_id}, {cmd_body})")
         return server_controller
 
     def run(self):
diff --git a/python/mxnet/library.py b/python/mxnet/library.py
index 22528a08dc..0cacfca82c 100644
--- a/python/mxnet/library.py
+++ b/python/mxnet/library.py
@@ -54,14 +54,14 @@ def load(path, verbose=True):
 
     #check if path exists
     if not os.path.exists(path):
-        raise MXNetError("load path %s does NOT exist" % path)
+        raise MXNetError(f"load path {path} does NOT exist")
     #check if path is an absolute path
     if not os.path.isabs(path):
-        raise MXNetError("load path %s is not an absolute path" % path)
+        raise MXNetError(f"load path {path} is not an absolute path")
     #check if path is to a library file
     _, file_ext = os.path.splitext(path)
     if not file_ext in ['.so', '.dll']:
-        raise MXNetError("load path %s is NOT a library file" % path)
+        raise MXNetError(f"load path {path} is NOT a library file")
 
     verbose_val = 1 if verbose else 0
     byt_obj = path.encode('utf-8')
diff --git a/python/mxnet/lr_scheduler.py b/python/mxnet/lr_scheduler.py
index 436085620a..f4375a14fc 100644
--- a/python/mxnet/lr_scheduler.py
+++ b/python/mxnet/lr_scheduler.py
@@ -63,7 +63,7 @@ class LRScheduler(object):
         elif self.warmup_mode == 'constant':
             return self.warmup_begin_lr
         else:
-            raise ValueError("Invalid warmup mode %s"%self.warmup_mode)
+            raise ValueError(f"Invalid warmup mode {self.warmup_mode}")
 
     def __call__(self, num_update):
         """Return a new learning rate.
diff --git a/python/mxnet/model.py b/python/mxnet/model.py
index e634aef256..051e1fa369 100644
--- a/python/mxnet/model.py
+++ b/python/mxnet/model.py
@@ -63,8 +63,8 @@ def _create_sparse_kvstore(kvstore):
     elif isinstance(kvstore, str):
         kv = kvs.create(kvstore)
     else:
-        raise TypeError("Cannot create '%s' KVStore with row_sparse parameters. "
-                        "The type must be KVStore or str." % kvstore)
+        raise TypeError(f"Cannot create '{kvstore}' KVStore with row_sparse parameters. "
+                        "The type must be KVStore or str.")
     assert kv.is_capable(kvs.KVStoreBase.OPTIMIZER), \
         "KVStore with sparse weight requires optimizer support. " \
         "However, type(kv) does not support optimizer. " \
@@ -209,23 +209,23 @@ def save_checkpoint(prefix, epoch, symbol, arg_params, aux_params, remove_amp_ca
     - ``prefix-epoch.params`` will be saved for parameters.
     """
     if symbol is not None:
-        symbol.save('%s-symbol.json' % prefix, remove_amp_cast=remove_amp_cast)
+        symbol.save(f'{prefix}-symbol.json', remove_amp_cast=remove_amp_cast)
 
-    save_dict = {('arg:%s' % k) : v.as_in_context(cpu()) for k, v in arg_params.items()}
-    save_dict.update({('aux:%s' % k) : v.as_in_context(cpu()) for k, v in aux_params.items()})
-    param_name = '%s-%04d.params' % (prefix, epoch)
+    save_dict = {(f'arg:{k}') : v.as_in_context(cpu()) for k, v in arg_params.items()}
+    save_dict.update({(f'aux:{k}') : v.as_in_context(cpu()) for k, v in aux_params.items()})
+    param_name = f'{prefix}-{epoch:04}.params'
     nd.save(param_name, save_dict)
-    logging.info('Saved checkpoint to \"%s\"', param_name)
+    logging.info('Saved checkpoint to "{}"'.format(param_name))
 
 
 def load_params(prefix, epoch):
     """Load params from a file
     """
-    save_dict = nd.load("%s-%04d.params" % (prefix, epoch))
+    save_dict = nd.load(f'{prefix}-{epoch:04}.params')
     arg_params = {}
     aux_params = {}
     if not save_dict:
-        logging.warning("Params file '%s' is empty", '%s-%04d.params' % (prefix, epoch))
+        logging.warning("Params file '%s' is empty", f'{prefix}-{epoch:04}.params')
         return (arg_params, aux_params)
     for k, v in save_dict.items():
         tp, name = k.split(":", 1)
@@ -259,6 +259,6 @@ def load_checkpoint(prefix, epoch):
     - Symbol will be loaded from ``prefix-symbol.json``.
     - Parameters will be loaded from ``prefix-epoch.params``.
     """
-    symbol = sym.load('%s-symbol.json' % prefix)
+    symbol = sym.load(f'{prefix}-symbol.json')
     arg_params, aux_params = load_params(prefix, epoch)
     return (symbol, arg_params, aux_params)
diff --git a/python/mxnet/name.py b/python/mxnet/name.py
index 59e4f6b39a..a80c2b8b63 100644
--- a/python/mxnet/name.py
+++ b/python/mxnet/name.py
@@ -54,7 +54,7 @@ class NameManager:
             return name
         if hint not in self._counter:
             self._counter[hint] = 0
-        name = '%s%d' % (hint, self._counter[hint])
+        name = f'{hint}{self._counter[hint]}'
         self._counter[hint] += 1
         return name
 
diff --git a/python/mxnet/ndarray/contrib.py b/python/mxnet/ndarray/contrib.py
index 5e1de94c0d..8d45783f2c 100644
--- a/python/mxnet/ndarray/contrib.py
+++ b/python/mxnet/ndarray/contrib.py
@@ -109,8 +109,8 @@ def _flatten(args, inout_str):
         return [args], int(0)
 
     assert isinstance(args, (list, tuple)), \
-        "%s must be (nested) list of NDArray, " \
-        "but got %s of type %s"%(inout_str, str(args), str(type(args)))
+        f"{inout_str} must be (nested) list of NDArray, " \
+        f"but got {str(args)} of type {str(type(args))}"
     flat = []
     fmts = []
     for i in args:
@@ -128,7 +128,7 @@ def _regroup(args, fmt):
 
     assert isinstance(args, (list, tuple)), \
         "output must be (nested) list of NDArray, " \
-        "but got %s of type %s"%(str(args), str(type(args)))
+        f"but got {str(args)} of type {str(type(args))}"
     ret = []
     for i in fmt:
         res, args = _regroup(args, i)
@@ -327,7 +327,7 @@ def while_loop(cond, func, loop_vars, max_iterations=None):
         try:
             inputs = type_(inputs)
         except:
-            raise ValueError("Cannot convert %s to python %s" % (name, type_.__name__))
+            raise ValueError(f"Cannot convert {name} to python {type_.__name__}")
         return inputs
 
     def _func_wrapper(loop_vars):
@@ -389,8 +389,8 @@ def while_loop(cond, func, loop_vars, max_iterations=None):
             stacked_outputs.append(ndarray.op.concat(*items, dim=0))
         except ValueError:
             raise ValueError("\n".join(
-                ["Shapes of %d-th elements in step_outputs are inconsistent, which are:" % i_th] +
-                ["  Step %d, shape is %s" % (i, str(x.shape)) for i, x in enumerate(items)]
+                [f"Shapes of {i_th}-th elements in step_outputs are inconsistent, which are:"] +
+                [f"  Step {i}, shape is {str(x.shape)}" for i, x in enumerate(items)]
             ))
     if out_fmt is not None:
         stacked_outputs, _ = _regroup(stacked_outputs, out_fmt)
@@ -455,7 +455,7 @@ def cond(pred, then_func, else_func):
         try:
             inputs = type_(inputs)
         except:
-            raise ValueError("Cannot convert %s to python %s" % (name, type_.__name__))
+            raise ValueError(f"Cannot convert {name} to python {type_.__name__}")
         return inputs
 
     branch = _to_python_scalar(pred, bool, "pred")
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 0e6432a999..25f0fba3e6 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -300,10 +300,8 @@ fixed-size items.
     def __repr__(self):
         """Returns a string representation of the array."""
         if self._alive:
-            shape_info = 'x'.join(['%d' % x for x in self.shape])
-            return '\n%s\n<%s %s @%s>' % (str(self.asnumpy()),
-                                          self.__class__.__name__,
-                                          shape_info, self.ctx)
+            shape_info = 'x'.join([f'{x}' for x in self.shape])
+            return f'\n{str(self.asnumpy())}\n<{self.__class__.__name__} {shape_info} @{self.ctx}>'
         else:
             return '<FREED {}>'.format(self.__class__.__name__)
 
@@ -334,7 +332,7 @@ fixed-size items.
         elif isinstance(other, numeric_types):
             return _internal._plus_scalar(self, float(other), out=self)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __radd__(self, other):
         return self.__add__(other)
@@ -352,7 +350,7 @@ fixed-size items.
         elif isinstance(other, numeric_types):
             return _internal._minus_scalar(self, float(other), out=self)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __rsub__(self, other):
         """x.__rsub__(y) <=> y-x <=> mx.nd.subtract(y, x) """
@@ -375,7 +373,7 @@ fixed-size items.
         elif isinstance(other, numeric_types):
             return _internal._mul_scalar(self, float(other), out=self)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __rmul__(self, other):
         return self.__mul__(other)
@@ -397,7 +395,7 @@ fixed-size items.
         elif isinstance(other, numeric_types):
             return _internal._div_scalar(self, float(other), out=self)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __truediv__(self, other):
         return divide(self, other)
@@ -425,7 +423,7 @@ fixed-size items.
         elif isinstance(other, numeric_types):
             return _internal._mod_scalar(self, float(other), out=self)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __pow__(self, other):
         """x.__pow__(y) <=> x**y <=> mx.nd.power(x,y) """
@@ -1374,11 +1372,10 @@ fixed-size items.
                 source_array = np.array(source_array, dtype=self.dtype)
             except:
                 raise TypeError('array must consist of array-like data,' +
-                                'type %s is not supported' % str(type(array)))
+                                f'type {str(type(array))} is not supported')
         source_array = np.asarray(source_array, dtype=self.dtype, order='C')
         if source_array.shape != self.shape:
-            raise ValueError('Shape inconsistent: expected %s vs got %s'%(
-                str(source_array.shape), str(self.shape)))
+            raise ValueError(f'Shape inconsistent: expected {str(source_array.shape)} vs got {str(self.shape)}')
         check_call(_LIB.MXNDArraySyncCopyFromCPU(
             self.handle,
             source_array.ctypes.data_as(ctypes.c_void_p),
@@ -1442,8 +1439,7 @@ fixed-size items.
             length = self.shape[0]
             idx += length
             if idx < 0:
-                raise IndexError('index %d is out of bounds for axis 0 with size %d'
-                                 % (idx-length, length))
+                raise IndexError(f'index {idx-length} is out of bounds for axis 0 with size {length}')
         if _int64_enabled():
             check_call(_LIB.MXNDArrayAt64(
                 self.handle, ctypes.c_int64(idx), ctypes.byref(handle)))
@@ -2987,7 +2983,7 @@ fixed-size items.
         """
         if stype == 'csr' and len(self.shape) != 2:
             raise ValueError("To convert to a CSR, the NDArray should be 2 Dimensional. Current "
-                             "shape is %s" % str(self.shape))
+                             f"shape is {str(self.shape)}")
 
         return op.cast_storage(self, stype=stype)
 
@@ -3334,7 +3330,7 @@ def _get_broadcast_shape(shape1, shape2):
     i = max(length1, length2) - 1
     for a, b in zip(shape1[::-1], shape2[::-1]):
         if a != 1 and b != 1 and a != b:
-            raise ValueError('shape1=%s is not broadcastable to shape2=%s' % (shape1, shape2))
+            raise ValueError(f'shape1={shape1} is not broadcastable to shape2={shape2}')
         shape[i] = b if a == 1 else a
         i -= 1
     return tuple(shape)
@@ -3506,12 +3502,12 @@ def moveaxis(tensor, source, destination):
         source = np.core.numeric.normalize_axis_tuple(source, tensor.ndim)
     except IndexError:
         raise ValueError('Source should verify 0 <= source < tensor.ndim'
-                         'Got %d' % source)
+                         f'Got {source}')
     try:
         destination = np.core.numeric.normalize_axis_tuple(destination, tensor.ndim)
     except IndexError:
-        raise ValueError('Destination should verify 0 <= destination < tensor.ndim (%d).'
-                         % tensor.ndim, 'Got %d' % destination)
+        raise ValueError(f'Destination should verify 0 <= destination < tensor.ndim ({tensor.ndim}).',
+                         f'Got {destination}')
 
     if len(source) != len(destination):
         raise ValueError('`source` and `destination` arguments must have '
@@ -3671,7 +3667,7 @@ def _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar=None):
     elif isinstance(rhs, NDArray):
         return fn_array(lhs, rhs)
     else:
-        raise TypeError('type %s not supported' % str(type(rhs)))
+        raise TypeError(f'type {str(type(rhs))} not supported')
 #pylint: enable= too-many-arguments, no-member, protected-access
 
 
diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py
index 0cc1a71672..1c42723b64 100644
--- a/python/mxnet/ndarray/numpy/_op.py
+++ b/python/mxnet/ndarray/numpy/_op.py
@@ -9485,7 +9485,7 @@ def pad(x, pad_width, mode='constant', **kwargs): # pylint: disable=too-many-arg
         # Make sure have allowed kwargs appropriate for mode
         for key in kwargs:
             if key not in allowedkwargs[mode]:
-                raise ValueError('%s keyword not in allowed keywords %s' %(key, allowedkwargs[mode]))
+                raise ValueError(f'{key} keyword not in allowed keywords {allowedkwargs[mode]}')
 
     unsupported_kwargs = set(kwargs) - set(allowedkwargs[mode])
     if unsupported_kwargs:
diff --git a/python/mxnet/ndarray/numpy_extension/control_flow.py b/python/mxnet/ndarray/numpy_extension/control_flow.py
index e3b66e03d8..f5c3879756 100644
--- a/python/mxnet/ndarray/numpy_extension/control_flow.py
+++ b/python/mxnet/ndarray/numpy_extension/control_flow.py
@@ -405,7 +405,7 @@ def while_loop(cond, func, loop_vars, max_iterations=None, name="while_loop"):
         try:
             inputs = type_(inputs)
         except:
-            raise ValueError("Cannot convert %s to python %s" % (name, type_.__name__))
+            raise ValueError(f"Cannot convert {name} to python {type_.__name__}")
         return inputs
 
     def _cond_wrapper(loop_vars):
@@ -543,7 +543,7 @@ def while_loop(cond, func, loop_vars, max_iterations=None, name="while_loop"):
         _union_inputs(cond_g, func_g)
     for i_th, loc in enumerate(func_var_locs, 1):
         if loc == -1:
-            raise ValueError("The %d-th loop_var doesn't involve into the computation" % i_th)
+            raise ValueError(f"The {i_th}-th loop_var doesn't involve into the computation")
     result = _api_internal.while_loop(
         cond_g.handle,
         func_g.handle,
diff --git a/python/mxnet/ndarray/random.py b/python/mxnet/ndarray/random.py
index 1e7955f39d..bc83d83259 100644
--- a/python/mxnet/ndarray/random.py
+++ b/python/mxnet/ndarray/random.py
@@ -34,7 +34,7 @@ def _random_helper(random, sampler, params, shape, dtype, ctx, out, kwargs):
         for i in params[1:]:
             assert isinstance(i, NDArray), \
                 "Distribution parameters must all have the same type, but got " \
-                "both %s and %s."%(type(params[0]), type(i))
+                f"both {type(params[0])} and {type(i)}."
         return sampler(*params, shape=shape, dtype=dtype, out=out, **kwargs)
     elif isinstance(params[0], numeric_types):
         if ctx is None:
@@ -44,11 +44,11 @@ def _random_helper(random, sampler, params, shape, dtype, ctx, out, kwargs):
         for i in params[1:]:
             assert isinstance(i, numeric_types), \
                 "Distribution parameters must all have the same type, but got " \
-                "both %s and %s."%(type(params[0]), type(i))
+                f"both {type(params[0])} and {type(i)}."
         return random(*params, shape=shape, dtype=dtype, ctx=ctx, out=out, **kwargs)
 
     raise ValueError("Distribution parameters must be either NDArray or numbers, "
-                     "but got %s."%type(params[0]))
+                     f"but got {type(params[0])}.")
 
 
 def uniform(low=0, high=1, shape=_Null, dtype=_Null, ctx=None, out=None, **kwargs):
diff --git a/python/mxnet/ndarray/register.py b/python/mxnet/ndarray/register.py
index 08ba647245..19791bc7ca 100644
--- a/python/mxnet/ndarray/register.py
+++ b/python/mxnet/ndarray/register.py
@@ -155,19 +155,19 @@ def _generate_ndarray_function_code(handle, op_name, func_name, signature_only=F
         name, atype = arg_names[i], arg_types[i]
         if name == 'dtype':
             dtype_name = name
-            signature.append('%s=_Null'%name)
+            signature.append(f'{name}=_Null')
         elif atype.startswith('NDArray') or atype.startswith('Symbol'):
             assert not arr_name, \
                 "Op can only have one argument with variable " \
                 "size and it must be the last argument."
             if atype.endswith('[]'):
-                ndsignature.append('*%s'%name)
+                ndsignature.append(f'*{name}')
                 arr_name = name
             else:
-                ndsignature.append('%s=None'%name)
+                ndsignature.append(f'{name}=None')
                 ndarg_names.append(name)
         else:
-            signature.append('%s=_Null'%name)
+            signature.append(f'{name}=_Null')
             kwarg_names.append(name)
     signature.append('out=None')
     signature.append('name=None')
diff --git a/python/mxnet/ndarray/sparse.py b/python/mxnet/ndarray/sparse.py
index 6e5393a0aa..29a56413e5 100644
--- a/python/mxnet/ndarray/sparse.py
+++ b/python/mxnet/ndarray/sparse.py
@@ -125,10 +125,9 @@ class BaseSparseNDArray(NDArray):
 
     def __repr__(self):
         """Returns a string representation of the sparse array."""
-        shape_info = 'x'.join(['%d' % x for x in self.shape])
+        shape_info = 'x'.join([f'{x}' for x in self.shape])
         # The data content is not displayed since the array usually has big shape
-        return '\n<%s %s @%s>' % (self.__class__.__name__,
-                                  shape_info, self.context)
+        return f'\n<{self.__class__.__name__} {shape_info} @{self.context}>'
 
     def __add__(self, other):
         return add(self, other)
@@ -450,7 +449,7 @@ class CSRNDArray(BaseSparseNDArray):
                 tmp = _array(value)
                 tmp.copyto(self)
             else:
-                raise TypeError('type %s not supported' % str(type(value)))
+                raise TypeError(f'type {str(type(value))} not supported')
         else:
             assert(isinstance(key, (int, tuple)))
             raise Exception('CSRNDArray only supports [:] for assignment')
@@ -713,7 +712,7 @@ class RowSparseNDArray(BaseSparseNDArray):
                 tmp = _array(value)
                 tmp.copyto(self)
             else:
-                raise TypeError('type %s not supported' % str(type(value)))
+                raise TypeError(f'type {str(type(value))} not supported')
         else:
             assert(isinstance(key, (int, tuple)))
             raise TypeError('RowSparseNDArray only supports [:] for assignment')
@@ -1201,7 +1200,7 @@ def _ndarray_cls(handle, writable=True, stype=_STORAGE_TYPE_UNDEFINED):
     elif stype == _STORAGE_TYPE_ROW_SPARSE:
         return RowSparseNDArray(handle, writable=writable)
     else:
-        raise Exception("unknown storage type: %s"%stype)
+        raise Exception(f"unknown storage type: {stype}")
 
 
 _set_ndarray_class(_ndarray_cls)
diff --git a/python/mxnet/ndarray_doc.py b/python/mxnet/ndarray_doc.py
index d696e71d81..468fd98890 100644
--- a/python/mxnet/ndarray_doc.py
+++ b/python/mxnet/ndarray_doc.py
@@ -38,17 +38,16 @@ def _build_doc(func_name,
     param_str = _build_param_doc(arg_names, arg_types, arg_desc)
     # if key_var_num_args:
     #     desc += '\nThis function support variable length of positional input.'
-    doc_str = ('%s\n\n' +
-               '%s\n' +
+    doc_str = (f'{desc}\n\n' +
+               f'{param_str}\n' +
                'out : NDArray, optional\n' +
                '    The output NDArray to hold the result.\n\n'+
                'Returns\n' +
                '-------\n' +
                'out : NDArray or list of NDArrays\n' +
                '    The output of this function.')
-    doc_str = doc_str % (desc, param_str)
     extra_doc = "\n" + '\n'.join([x.__doc__ for x in type.__subclasses__(NDArrayDoc)
-                                  if x.__name__ == '%sDoc' % func_name])
+                                  if x.__name__ == f'{func_name}Doc'])
     doc_str += _re.sub(_re.compile("    "), "", extra_doc)
     doc_str = _re.sub('NDArray-or-Symbol', 'NDArray', doc_str)
 
diff --git a/python/mxnet/numpy/function_base.py b/python/mxnet/numpy/function_base.py
index 69052765dd..d8c52195ac 100644
--- a/python/mxnet/numpy/function_base.py
+++ b/python/mxnet/numpy/function_base.py
@@ -91,8 +91,7 @@ def meshgrid(*xi, **kwargs):
     indexing = kwargs.pop('indexing', 'xy')
 
     if kwargs:
-        raise TypeError("meshgrid() got an unexpected keyword argument '%s'"
-                        % (list(kwargs)[0],))
+        raise TypeError(f"meshgrid() got an unexpected keyword argument '{list(kwargs)[0]}'")
 
     if indexing not in ['xy', 'ij']:
         raise ValueError(
diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py
index 35644e8429..ed96795ffa 100644
--- a/python/mxnet/numpy/multiarray.py
+++ b/python/mxnet/numpy/multiarray.py
@@ -638,7 +638,7 @@ class ndarray(NDArray):  # pylint: disable=invalid-name
         elif isinstance(value, ndarray):
             _npi.boolean_mask_assign_tensor(data=self, mask=key, value=value, start_axis=0, out=self)
         else:
-            raise NotImplementedError('type %s is not supported.'%(type(value)))
+            raise NotImplementedError(f'type {type(value)} is not supported.')
 
     # pylint: disable=too-many-return-statements
     def __getitem__(self, key):
diff --git a/python/mxnet/onnx/mx2onnx/_export_onnx.py b/python/mxnet/onnx/mx2onnx/_export_onnx.py
index 78941351e0..53c5ce6297 100644
--- a/python/mxnet/onnx/mx2onnx/_export_onnx.py
+++ b/python/mxnet/onnx/mx2onnx/_export_onnx.py
@@ -77,9 +77,9 @@ class MXNetGraph(object):
         op = str(node["op"])
         opset_version = kwargs.get("opset_version", onnx_opset_version())
         if opset_version < 12:
-            logging.warning('Your ONNX op set version is %s, '  % str(opset_version) +
+            logging.warning('Your ONNX op set version is {}, '
                             'which is lower than then lowest tested op set (12), please consider '
-                            'updating ONNX')
+                            'updating ONNX'.format(str(opset_version)))
             opset_version = 12
         # Fallback to older opset versions if op is not registered in current version
         convert_func = None
@@ -91,7 +91,7 @@ class MXNetGraph(object):
 
         # The conversion logic is not implemented
         if convert_func is None:
-            raise AttributeError("No conversion function registered for op type %s yet." % op)
+            raise AttributeError(f"No conversion function registered for op type {op} yet.")
 
         ret = convert_func(node, **kwargs)
         # in case the conversion function does not specify the returned dtype, we just return None
@@ -368,11 +368,11 @@ class MXNetGraph(object):
                             if nodename in graph_outputs:
                                 graph_output_names.append(nodename)
                                 if verbose:
-                                    logging.info("Output node is: %s", nodename)
+                                    logging.info("Output node is: {}".format(nodename))
                     elif isinstance(converted_node, TensorProto):
                         raise ValueError("Did not expect TensorProto")
                     else:
-                        raise ValueError("node is of an unrecognized type: %s" % type(node))
+                        raise ValueError(f"node is of an unrecognized type: {type(node)}")
 
                     all_processed_nodes.append(converted_node)
 
diff --git a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py
index 7058cbd651..311bc1b68f 100644
--- a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py
+++ b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py
@@ -61,7 +61,7 @@ def parse_helper(attrs, attrs_name, alt_value=None):
             dims = eval(attrs_str)
             return dims
         else:
-            raise AttributeError("Malformed %s dimensions: %s" % (attrs_name, str(attrs_str)))
+            raise AttributeError(f"Malformed {attrs_name} dimensions: {str(attrs_str)}")
     return alt_value
 
 def transform_padding(pad_width):
@@ -537,7 +537,7 @@ def convert_activation(node, **kwargs):
         )
     else:
         raise AttributeError(
-            "Activation %s not implemented or recognized in the converter" % act_type
+            f"Activation {act_type} not implemented or recognized in the converter"
         )
 
     return [node]
diff --git a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py
index 4b09253c32..580d7aaf00 100644
--- a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py
+++ b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py
@@ -62,7 +62,7 @@ def parse_helper(attrs, attrs_name, alt_value=None):
             dims = eval(attrs_str)
             return dims
         else:
-            raise AttributeError("Malformed %s dimensions: %s" % (attrs_name, str(attrs_str)))
+            raise AttributeError(f"Malformed {attrs_name} dimensions: {str(attrs_str)}")
     return alt_value
 
 def transform_padding(pad_width):
diff --git a/python/mxnet/operator.py b/python/mxnet/operator.py
index 10b3aaf360..ae82252352 100644
--- a/python/mxnet/operator.py
+++ b/python/mxnet/operator.py
@@ -306,7 +306,7 @@ class NDArrayOp(PythonOp):
                                                         writable=False))
                 self.forward(in_data=tensors[0], out_data=tensors[1])
             except Exception:
-                print('Error in NDArrayOp.forward: %s' % traceback.format_exc())
+                print(f'Error in NDArrayOp.forward: {traceback.format_exc()}')
                 return False
             return True
 
@@ -324,7 +324,7 @@ class NDArrayOp(PythonOp):
                 self.backward(in_data=tensors[0], out_data=tensors[1],
                               in_grad=tensors[2], out_grad=tensors[3])
             except Exception:
-                print('Error in NDArrayOp.backward: %s' % traceback.format_exc())
+                print(f'Error in NDArrayOp.backward: {traceback.format_exc()}')
                 return False
             return True
 
@@ -347,7 +347,7 @@ class NDArrayOp(PythonOp):
                                             POINTER(mx_int))
                     tensor_dims[i] = len(rshape[i])
             except Exception:
-                print('Error in NDArrayOp.infer_shape: %s' % traceback.format_exc())
+                print(f'Error in NDArrayOp.infer_shape: {traceback.format_exc()}')
                 return False
             return True
 
@@ -359,7 +359,7 @@ class NDArrayOp(PythonOp):
                 ret = c_array(c_char_p, ret)
                 out[0] = cast(ret, POINTER(POINTER(c_char)))
             except Exception:
-                print('Error in NDArrayOp.list_outputs: %s' % traceback.format_exc())
+                print(f'Error in NDArrayOp.list_outputs: {traceback.format_exc()}')
                 return False
             return True
 
@@ -371,7 +371,7 @@ class NDArrayOp(PythonOp):
                 ret = c_array(c_char_p, ret)
                 out[0] = cast(ret, POINTER(POINTER(c_char)))
             except Exception:
-                print('Error in NDArrayOp.list_arguments: %s' % traceback.format_exc())
+                print(f'Error in NDArrayOp.list_arguments: {traceback.format_exc()}')
                 return False
             return True
 
@@ -386,7 +386,7 @@ class NDArrayOp(PythonOp):
                 rdeps = cast(c_array_buf(c_int, array('i', rdeps)), c_int_p)
                 deps[0] = rdeps
             except Exception:
-                print('Error in NDArrayOp.declare_backward_dependency: %s' % traceback.format_exc())
+                print(f'Error in NDArrayOp.declare_backward_dependency: {traceback.format_exc()}')
                 return False
             return True
 
@@ -565,9 +565,9 @@ class CustomOpProp(object):
         for i, stype in enumerate(in_stype):
             assert stype == _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT], \
             "Default infer_storage_type implementation doesnt allow non default stypes: " \
-            "found non default stype '%s' for in_stype[%d]. Please implement " \
+            f"found non default stype '{stype}' for in_stype[{i}]. Please implement " \
             "infer_storage_type and infer_storage_type_backward interface " \
-            "in your custom operator if you have non-default input/output stypes" % (stype, i)
+            "in your custom operator if you have non-default input/output stypes"
         return in_stype, \
                [_STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT]]*len(self.list_outputs()), \
                [_STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT]]*len(self.list_auxiliary_states())
@@ -610,17 +610,17 @@ class CustomOpProp(object):
         for i, stype in enumerate(ograd_stype):
             assert stype == _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT], \
             "Default infer_storage_type_backward implementation doesnt allow non default stypes: " \
-             "found non default stype '%s' for ograd_stype[%d]. Please implement " \
+             f"found non default stype '{stype}' for ograd_stype[{i}]. Please implement " \
              "infer_storage_type and infer_storage_type_backward interface " \
-             "in your custom operator if you have non-default output gradient stypes" % (stype, i)
+             "in your custom operator if you have non-default output gradient stypes"
         for i, stype in enumerate(igrad_stype):
             if stype == _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_UNDEFINED]:
                 stype = _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT]
             assert stype == _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT], \
             "Default infer_storage_type_backward implementation doesnt allow non default stypes: " \
-            "found non default stype '%s' for igrad_stype[%d]. Please implement " \
+            f"found non default stype '{stype}' for igrad_stype[{i}]. Please implement " \
             "infer_storage_type and infer_storage_type_backward interface " \
-            "in your custom operator if you have non-default input gradient stypes" % (stype, i)
+            "in your custom operator if you have non-default input gradient stypes"
         stype_lists = [ograd_stype, in_stype, out_stype, igrad_stype, aux_stype]
         for stype_list in stype_lists:
             stype_list[:] = len(stype_list) * [_STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT]]
@@ -762,14 +762,14 @@ def register(reg_name):
                     else:
                         raise AssertionError("infer_shape must return 2 or 3 lists")
                     assert len(oshape) == n_out, \
-                        "InferShape Error: expecting %d entries in returned output " \
-                        "shapes, got %d."%(n_out, len(oshape))
+                        f"InferShape Error: expecting {n_out} entries in returned output " \
+                        f"shapes, got {len(oshape)}."
                     assert len(ishape) == n_in, \
-                        "InferShape Error: expecting %d entries in returned input " \
-                        "shapes, got %d."%(n_in, len(ishape))
+                        f"InferShape Error: expecting {n_in} entries in returned input " \
+                        f"shapes, got {len(ishape)}."
                     assert len(ashape) == n_aux, \
-                        "InferShape Error: expecting %d entries in returned aux state " \
-                        "shapes, got %d."%(n_aux, len(ashape))
+                        f"InferShape Error: expecting {n_aux} entries in returned aux state " \
+                        f"shapes, got {len(ashape)}."
                     rshape = list(ishape) + list(oshape) + list(ashape)
                     for i in range(n_in+n_out+n_aux):
                         tensor_shapes[i] = cast(c_array_buf(mx_int,
@@ -779,7 +779,7 @@ def register(reg_name):
 
                     infer_shape_entry._ref_holder = [tensor_shapes]
                 except Exception:
-                    print('Error in %s.infer_shape: %s' % (reg_name, traceback.format_exc()))
+                    print(f'Error in {reg_name}.infer_shape: {traceback.format_exc()}')
                     return False
                 return True
 
@@ -804,25 +804,25 @@ def register(reg_name):
                     else:
                         raise AssertionError("infer_storage_type_backward must return 4 or 5 lists")
                     assert len(ret[0]) == len(tensors[0]), \
-                        "InferStorageTypeBackward Error: expecting == %d " \
+                        f"InferStorageTypeBackward Error: expecting == {len(tensors[0])} " \
                         "entries in returned output gradient " \
-                        "stypes, got %d."%(len(tensors[0]), len(ret[0]))
+                        f"stypes, got {len(ret[0])}."
                     assert len(ret[1]) == len(tensors[1]), \
-                        "InferStorageTypeBackward Error: expecting == %d " \
+                        f"InferStorageTypeBackward Error: expecting == {len(tensors[1])} " \
                         "entries in returned input stypes, " \
-                        "got %d."%(len(tensors[1]), len(ret[1]))
+                        f"got {len(ret[1])}."
                     assert len(ret[2]) == len(tensors[2]), \
-                        "InferStorageTypeBackward Error: expecting == %d " \
+                        f"InferStorageTypeBackward Error: expecting == {len(tensors[2])} " \
                         "entries in returned output stypes, " \
-                        "got %d."%(len(tensors[2]), len(ret[2]))
+                        f"got {len(ret[2])}."
                     assert len(ret[3]) == len(tensors[3]), \
-                        "InferStorageTypeBackward Error: expecting == %d " \
+                        f"InferStorageTypeBackward Error: expecting == {len(tensors[3])} " \
                         "entries in returned input gradient stypes, " \
-                        "got %d."%(len(tensors[3]), len(ret[3]))
+                        f"got {len(ret[3])}."
                     assert len(ret[4]) == len(tensors[4]), \
-                        "InferStorageTypeBackward Error: expecting == %d " \
+                        f"InferStorageTypeBackward Error: expecting == {len(tensors[4])} " \
                         "entries in returned aux stypes, " \
-                        "got %d."%(len(tensors[4]), len(ret[4]))
+                        f"got {len(ret[4])}."
                     rstype = []
                     for ret_list in ret:
                         rstype.extend(ret_list)
@@ -831,16 +831,15 @@ def register(reg_name):
                         assert stype != _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_UNDEFINED], \
                             "stype should not be undefined"
                         assert stype in _STORAGE_TYPE_STR_TO_ID, \
-                            "Provided stype: %s is not valid " \
-                            "valid stypes are %s, %s, %s"%(stype,
-                                                           _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT],
-                                                           _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_ROW_SPARSE],
-                                                           _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_CSR])
+                            f"Provided stype: {stype} is not valid " \
+                            "valid stypes are {}, {}, {}".format(_STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_DEFAULT],
+                                                                 _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_ROW_SPARSE],
+                                                                 _STORAGE_TYPE_ID_TO_STR[_STORAGE_TYPE_CSR])
                         tensor_stypes[i] = _STORAGE_TYPE_STR_TO_ID[stype]
 
                     infer_storage_type_backward_entry._ref_holder = [tensor_stypes]
                 except Exception:
-                    print('Error in %s.infer_type: %s' % (reg_name, traceback.format_exc()))
+                    print(f'Error in {reg_name}.infer_type: {traceback.format_exc()}')
                     return False
                 return True
 
@@ -863,20 +862,20 @@ def register(reg_name):
                         raise AssertionError("infer_storage_type must return 2 or 3 lists")
 
                     assert len(ostype) == n_out, \
-                        "InferStorageType Error: expecting %d entries in returned output " \
-                        "stypes, got %d."%(n_out, len(ostype))
+                        f"InferStorageType Error: expecting {n_out} entries in returned output " \
+                        f"stypes, got {len(ostype)}."
                     assert len(istype) == n_in, \
-                        "InferStorageType Error: expecting %d entries in returned input " \
-                        "stypes, got %d."%(n_in, len(istype))
+                        f"InferStorageType Error: expecting {n_in} entries in returned input " \
+                        f"stypes, got {len(istype)}."
                     assert len(astype) == n_aux, \
-                        "InferStorageType Error: expecting %d entries in returned aux state " \
-                        "stypes, got %d."%(n_aux, len(astype))
+                        f"InferStorageType Error: expecting {n_aux} entries in returned aux state " \
+                        f"stypes, got {len(astype)}."
                     rtype = list(istype) + list(ostype) + list(astype)
                     for i, dtype in enumerate(rtype):
                         tensor_stypes[i] = _STORAGE_TYPE_STR_TO_ID[dtype]
                     infer_storage_type_entry._ref_holder = [tensor_stypes]
                 except Exception:
-                    print('Error in %s.infer_type: %s' % (reg_name, traceback.format_exc()))
+                    print(f'Error in {reg_name}.infer_type: {traceback.format_exc()}')
                     return False
                 return True
 
@@ -898,21 +897,21 @@ def register(reg_name):
                     else:
                         raise AssertionError("infer_type must return 2 or 3 lists")
                     assert len(otype) == n_out, \
-                        "InferType Error: expecting %d entries in returned output " \
-                        "types, got %d."%(n_out, len(otype))
+                        f"InferType Error: expecting {n_out} entries in returned output " \
+                        f"types, got {len(otype)}."
                     assert len(itype) == n_in, \
-                        "InferType Error: expecting %d entries in returned input " \
-                        "types, got %d."%(n_in, len(itype))
+                        f"InferType Error: expecting {n_in} entries in returned input " \
+                        f"types, got {len(itype)}."
                     assert len(atype) == n_aux, \
-                        "InferType Error: expecting %d entries in returned aux state " \
-                        "types, got %d."%(n_aux, len(atype))
+                        f"InferType Error: expecting {n_aux} entries in returned aux state " \
+                        f"types, got {len(atype)}."
                     rtype = list(itype) + list(otype) + list(atype)
                     for i, dtype in enumerate(rtype):
                         tensor_types[i] = dtype_np_to_mx(dtype)
 
                     infer_type_entry._ref_holder = [tensor_types]
                 except Exception:
-                    print('Error in %s.infer_type: %s' % (reg_name, traceback.format_exc()))
+                    print(f'Error in {reg_name}.infer_type: {traceback.format_exc()}')
                     return False
                 return True
 
@@ -926,7 +925,7 @@ def register(reg_name):
 
                     list_outputs_entry._ref_holder = [out]
                 except Exception:
-                    print('Error in %s.list_outputs: %s' % (reg_name, traceback.format_exc()))
+                    print(f'Error in {reg_name}.list_outputs: {traceback.format_exc()}')
                     return False
                 return True
 
@@ -940,7 +939,7 @@ def register(reg_name):
 
                     list_arguments_entry._ref_holder = [out]
                 except Exception:
-                    print('Error in %s.list_arguments: %s' % (reg_name, traceback.format_exc()))
+                    print(f'Error in {reg_name}.list_arguments: {traceback.format_exc()}')
                     return False
                 return True
 
@@ -955,7 +954,7 @@ def register(reg_name):
                     list_auxiliary_states_entry._ref_holder = [out]
                 except Exception:
                     tb = traceback.format_exc()
-                    print('Error in %s.list_auxiliary_states: %s' % (reg_name, tb))
+                    print(f'Error in {reg_name}.list_auxiliary_states: {tb}')
                     return False
                 return True
 
@@ -976,7 +975,7 @@ def register(reg_name):
                     declare_backward_dependency_entry._ref_holder = [deps]
                 except Exception:
                     tb = traceback.format_exc()
-                    print('Error in %s.declare_backward_dependency: %s' % (reg_name, tb))
+                    print(f'Error in {reg_name}.declare_backward_dependency: {tb}')
                     return False
                 return True
 
@@ -1010,7 +1009,7 @@ def register(reg_name):
                                            in_data=tensors[0], out_data=tensors[1],
                                            aux=tensors[4])
                         except Exception:
-                            print('Error in CustomOp.forward: %s' % traceback.format_exc())
+                            print(f'Error in CustomOp.forward: {traceback.format_exc()}')
                             return False
                         return True
 
@@ -1049,7 +1048,7 @@ def register(reg_name):
                                             in_grad=tensors[2], out_grad=tensors[3],
                                             aux=tensors[4])
                         except Exception:
-                            print('Error in CustomOp.backward: %s' % traceback.format_exc())
+                            print(f'Error in CustomOp.backward: {traceback.format_exc()}')
                             return False
                         return True
 
@@ -1060,7 +1059,7 @@ def register(reg_name):
                         try:
                             del _registry.ref_holder[cur]
                         except Exception:
-                            print('Error in CustomOp.delete: %s' % traceback.format_exc())
+                            print(f'Error in CustomOp.delete: {traceback.format_exc()}')
                             return False
                         return True
 
@@ -1077,7 +1076,7 @@ def register(reg_name):
                     op._ref_holder = [ret]
                     _registry.ref_holder[cur] = op
                 except Exception:
-                    print('Error in %s.create_operator: %s' % (reg_name, traceback.format_exc()))
+                    print(f'Error in {reg_name}.create_operator: {traceback.format_exc()}')
                     return False
                 return True
 
@@ -1088,7 +1087,7 @@ def register(reg_name):
                 try:
                     del _registry.ref_holder[cur]
                 except Exception:
-                    print('Error in CustomOpProp.delete: %s' % traceback.format_exc())
+                    print(f'Error in CustomOpProp.delete: {traceback.format_exc()}')
                     return False
                 return True
 
diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py
index 640fd90da0..18572dd364 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -156,11 +156,8 @@ class Optimizer(object):
         assert(isinstance(klass, type))
         name = klass.__name__.lower()
         if name in Optimizer.opt_registry:
-            warnings.warn('WARNING: New optimizer %s.%s is overriding '
-                          'existing optimizer %s.%s' %
-                          (klass.__module__, klass.__name__,
-                           Optimizer.opt_registry[name].__module__,
-                           Optimizer.opt_registry[name].__name__))
+            warnings.warn(f'WARNING: New optimizer {klass.__module__}.{klass.__name__} is overriding '
+                          f'existing optimizer {Optimizer.opt_registry[name].__module__}.{Optimizer.opt_registry[name].__name__}')
         Optimizer.opt_registry[name] = klass
         return klass
 
@@ -196,7 +193,7 @@ class Optimizer(object):
         if name.lower() in Optimizer.opt_registry:
             return Optimizer.opt_registry[name.lower()](**kwargs)
         else:
-            raise ValueError('Cannot find optimizer %s' % name)
+            raise ValueError(f'Cannot find optimizer {name}')
 
     @property
     def learning_rate(self):
diff --git a/python/mxnet/recordio.py b/python/mxnet/recordio.py
index b1d972e9cf..9de47ea393 100644
--- a/python/mxnet/recordio.py
+++ b/python/mxnet/recordio.py
@@ -78,7 +78,7 @@ class MXRecordIO(object):
             check_call(_LIB.MXRecordIOReaderCreate(self.uri, ctypes.byref(self.handle)))
             self.writable = False
         else:
-            raise ValueError("Invalid flag %s"%self.flag)
+            raise ValueError(f"Invalid flag {self.flag}")
         # pylint: disable=not-callable
         # It's bug from pylint(astroid). See https://github.com/PyCQA/pylint/issues/1699
         self.pid = current_process().pid
@@ -336,7 +336,7 @@ class MXIndexedRecordIO(MXRecordIO):
         key = self.key_type(idx)
         pos = self.tell()
         self.write(buf)
-        self.fidx.write('%s\t%d\n'%(str(key), pos))
+        self.fidx.write(f'{str(key)}\t{pos}\n')
         self.idx[key] = pos
         self.keys.append(key)
 
diff --git a/python/mxnet/registry.py b/python/mxnet/registry.py
index 4b1260b075..fc2711411b 100644
--- a/python/mxnet/registry.py
+++ b/python/mxnet/registry.py
@@ -66,21 +66,19 @@ def get_register_func(base_class, nickname):
     def register(klass, name=None):
         """Register functions"""
         assert issubclass(klass, base_class), \
-            "Can only register subclass of %s"%base_class.__name__
+             f"Can only register subclass of {base_class.__name__}"
         if name is None:
             name = klass.__name__
         name = name.lower()
         if name in registry:
             warnings.warn(
-                "\033[91mNew %s %s.%s registered with name %s is"
-                "overriding existing %s %s.%s\033[0m"%(
-                    nickname, klass.__module__, klass.__name__, name,
-                    nickname, registry[name].__module__, registry[name].__name__),
+                f"\033[91mNew {nickname} {klass.__module__}.{klass.__name__} registered with name {name} is"
+                f"overriding existing {nickname} {registry[name].__module__}.{registry[name].__name__}\033[0m",
                 UserWarning, stacklevel=2)
         registry[name] = klass
         return klass
 
-    register.__doc__ = "Register %s to the %s factory"%(nickname, nickname)
+    register.__doc__ = f"Register {nickname} to the {nickname} factory"
     return register
 
 
@@ -139,13 +137,13 @@ def get_create_func(base_class, nickname):
 
         if isinstance(name, base_class):
             assert len(args) == 0 and len(kwargs) == 0, \
-                "%s is already an instance. Additional arguments are invalid"%(nickname)
+                f"{nickname} is already an instance. Additional arguments are invalid"
             return name
 
         if isinstance(name, dict):
             return create(**name)
 
-        assert isinstance(name, string_types), "%s must be of string type"%nickname
+        assert isinstance(name, string_types), f"{nickname} must be of string type"
 
         if name.startswith('['):
             assert not args and not kwargs
@@ -158,18 +156,17 @@ def get_create_func(base_class, nickname):
 
         name = name.lower()
         assert name in registry, \
-            "%s is not registered. Please register with %s.register first"%(
-                str(name), nickname)
+            f"{str(name)} is not registered. Please register with {nickname}.register first"
         return registry[name](*args, **kwargs)
 
-    create.__doc__ = """Create a %s instance from config.
+    create.__doc__ = f"""Create a {nickname} instance from config.
 
 Parameters
 ----------
-%s : str or %s instance
+{nickname} : str or {base_class.__name__} instance
     class name of desired instance. If is a instance,
     it will be returned directly.
 **kwargs : dict
-    arguments to be passed to constructor"""%(nickname, nickname, base_class.__name__)
+    arguments to be passed to constructor"""
 
     return create
diff --git a/python/mxnet/rtc.py b/python/mxnet/rtc.py
index 296e546e69..bcd1ad50bb 100644
--- a/python/mxnet/rtc.py
+++ b/python/mxnet/rtc.py
@@ -148,14 +148,14 @@ class CudaModule(object):
             match = pattern.match(sanitized_arg)
             if not match or match.groups()[1] == 'const':
                 raise ValueError(
-                    'Invalid function prototype "%s". Must be in the '
-                    'form of "(const) type (*) (name)"'%sanitized_arg)
+                    f'Invalid function prototype "{sanitized_arg}". Must be in the '
+                    'form of "(const) type (*) (name)"')
             is_const.append(bool(match.groups()[0]))
             dtype = match.groups()[1]
             is_ndarray.append(bool(match.groups()[2]))
             if dtype not in _DTYPE_CPP_TO_NP:
                 raise TypeError(
-                    "Unsupported kernel argument type %s. Supported types are: %s." % (
+                    "Unsupported kernel argument type {}. Supported types are: {}.".format(
                         sanitized_arg, ','.join(_DTYPE_CPP_TO_NP.keys())))
             dtypes.append(dtype_np_to_mx(_DTYPE_CPP_TO_NP[dtype]))
 
@@ -205,20 +205,17 @@ class CudaKernel(object):
         assert len(grid_dims) == 3, "grid_dims must be a tuple of 3 integers"
         assert len(block_dims) == 3, "grid_dims must be a tuple of 3 integers"
         assert len(args) == len(self._dtypes), \
-            "CudaKernel(%s) expects %d arguments but got %d"%(
-                self._name, len(self._dtypes), len(args))
+            f"CudaKernel({self._name}) expects {len(self._dtypes)} arguments but got {len(args)}"
         void_args = []
         ref_holder = []
         for i, (arg, is_nd, dtype) in enumerate(zip(args, self._is_ndarray, self._dtypes)):
             if is_nd:
                 assert isinstance(arg, NDArray), \
-                    "The %d-th argument is expected to be a NDArray but got %s"%(
-                        i, type(arg))
+                    f"The {i}-th argument is expected to be a NDArray but got {type(arg)}"
                 void_args.append(arg.handle)
             else:
                 assert isinstance(arg, numeric_types), \
-                    "The %d-th argument is expected to be a number, but got %s"%(
-                        i, type(arg))
+                    f"The {i}-th argument is expected to be a number, but got {type(arg)}"
                 ref_holder.append(np.array(arg, dtype=dtype))
                 void_args.append(ref_holder[-1].ctypes.data_as(ctypes.c_void_p))
 
diff --git a/python/mxnet/symbol/contrib.py b/python/mxnet/symbol/contrib.py
index 3b98c429aa..241f2ac42e 100644
--- a/python/mxnet/symbol/contrib.py
+++ b/python/mxnet/symbol/contrib.py
@@ -82,7 +82,7 @@ def rand_zipfian(true_classes, num_sampled, range_max):
     >>> exp_count_sample.eval(true_cls=mx.nd.array([3]))[0].asnumpy()
     array([0.22629439, 0.12453879, 0.12453879, 0.12453879])
     """
-    assert(isinstance(true_classes, Symbol)), "unexpected type %s" % type(true_classes)
+    assert(isinstance(true_classes, Symbol)), f"unexpected type {type(true_classes)}"
     log_range = math.log(range_max + 1)
     rand = uniform(0, log_range, shape=(num_sampled,), dtype='float64')
     # make sure sampled_classes are in the range of [0, range_max)
@@ -105,8 +105,8 @@ def _flatten(args, inout_str):
         return [args], int(length)
 
     assert isinstance(args, (list, tuple)), \
-        "%s must be (nested) list of Symbol, " \
-        "but got %s of type %s"%(inout_str, str(args), str(type(args)))
+        f"{inout_str} must be (nested) list of Symbol, " \
+        f"but got {str(args)} of type {str(type(args))}"
     flat = []
     fmts = []
     for i in args:
@@ -124,7 +124,7 @@ def _regroup(args, fmt):
 
     assert isinstance(args, (list, tuple)), \
         "output must be (nested) list of Symbol, " \
-        "but got %s of type %s"%(str(args), str(type(args)))
+        f"but got {str(args)} of type {str(type(args))}"
     ret = []
     for i in fmt:
         res, args = _regroup(args, i)
@@ -345,8 +345,7 @@ def foreach(body, data, init_states, name="foreach"):
 
     remain_locs = []
     for in_name in subg_input_names:
-        assert in_name in gin_names, "The input variable %s can't be found in graph inputs: %s" \
-                % (in_name, str(gin_names))
+        assert in_name in gin_names, f"The input variable {in_name} can't be found in graph inputs: {str(gin_names)}"
         if in_name in cut_var_names:
             ordered_ins.append(cut_var_map[in_name])
             remain_locs.append(subg_input_names.index(in_name))
@@ -452,7 +451,7 @@ def while_loop(cond, func, loop_vars, max_iterations=None, name="while_loop"):
         try:
             inputs = type_(inputs)
         except:
-            raise ValueError("Cannot convert %s to python %s" % (name, type_.__name__))
+            raise ValueError(f"Cannot convert {name} to python {type_.__name__}")
         return inputs
 
     def _cond_wrapper(loop_vars):
@@ -577,7 +576,7 @@ def while_loop(cond, func, loop_vars, max_iterations=None, name="while_loop"):
         _union_inputs(cond_g, func_g)
     for i_th, loc in enumerate(func_var_locs, 1):
         if loc == -1:
-            raise ValueError("The %d-th loop_var doesn't involve into the computation" % i_th)
+            raise ValueError(f"The {i_th}-th loop_var doesn't involve into the computation")
     result = symbol._internal._while_loop(
         cond_g,
         func_g,
diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py
index 3a029db83f..d88746ce0e 100644
--- a/python/mxnet/symbol/numpy/_symbol.py
+++ b/python/mxnet/symbol/numpy/_symbol.py
@@ -163,9 +163,9 @@ class _Symbol(Symbol):
         if self._alive:
             if self.num_outputs > 1:
                 name = ', '.join([str(ele_sym) for ele_sym in self])
-                return '<%s group [%s]>' % (self.__class__.__name__, name)
+                return f'<{self.__class__.__name__} group [{name}]>'
             else:
-                return '<%s %s>' % (self.__class__.__name__, self.name)
+                return f'<{self.__class__.__name__} {self.name}>'
         else:
             return '<FREED {}>'.format(self.__class__.__name__)
 
@@ -1618,7 +1618,7 @@ def _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar=None, ou
     elif isinstance(rhs, Symbol):
         return fn_array(lhs, rhs, out=out)
     else:
-        raise TypeError('type %s not supported' % str(type(rhs)))
+        raise TypeError(f'type {str(type(rhs))} not supported')
 #pylint: enable= too-many-arguments, no-member, protected-access
 
 
@@ -7837,7 +7837,7 @@ def pad(x, pad_width, mode='constant', **kwargs): # pylint: disable=too-many-arg
         # Make sure have allowed kwargs appropriate for mode
         for key in kwargs:
             if key not in allowedkwargs[mode]:
-                raise ValueError('%s keyword not in allowed keywords %s' %(key, allowedkwargs[mode]))
+                raise ValueError(f'{key} keyword not in allowed keywords {allowedkwargs[mode]}')
 
     unsupported_kwargs = set(kwargs) - set(allowedkwargs[mode])
     if unsupported_kwargs:
diff --git a/python/mxnet/symbol/random.py b/python/mxnet/symbol/random.py
index 827ec40c4b..fdfa5f56bd 100644
--- a/python/mxnet/symbol/random.py
+++ b/python/mxnet/symbol/random.py
@@ -32,17 +32,17 @@ def _random_helper(random, sampler, params, shape, dtype, kwargs):
         for i in params[1:]:
             assert isinstance(i, Symbol), \
                 "Distribution parameters must all have the same type, but got " \
-                "both %s and %s."%(type(params[0]), type(i))
+                f"both {type(params[0])} and {type(i)}."
         return sampler(*params, shape=shape, dtype=dtype, **kwargs)
     elif isinstance(params[0], numeric_types):
         for i in params[1:]:
             assert isinstance(i, numeric_types), \
                 "Distribution parameters must all have the same type, but got " \
-                "both %s and %s."%(type(params[0]), type(i))
+                f"both {type(params[0])} and {type(i)}."
         return random(*params, shape=shape, dtype=dtype, **kwargs)
 
     raise ValueError("Distribution parameters must be either Symbol or numbers, "
-                     "but got %s."%type(params[0]))
+                     f"but got {type(params[0])}.")
 
 
 def uniform(low=0, high=1, shape=_Null, dtype=_Null, **kwargs):
diff --git a/python/mxnet/symbol/register.py b/python/mxnet/symbol/register.py
index 282a91d1c8..c82e350b1b 100644
--- a/python/mxnet/symbol/register.py
+++ b/python/mxnet/symbol/register.py
@@ -123,19 +123,19 @@ def _generate_symbol_function_code(handle, op_name, func_name, signature_only=Fa
         name, atype = arg_names[i], arg_types[i]
         if name == 'dtype':
             dtype_name = name
-            signature.append('%s=_Null'%name)
+            signature.append(f'{name}=_Null')
         elif atype.startswith('NDArray') or atype.startswith('Symbol'):
             assert not arr_name, \
                 "Op can only have one argument with variable " \
                 "size and it must be the last argument."
             if atype.endswith('[]'):
-                ndsignature.append('*%s'%name)
+                ndsignature.append(f'*{name}')
                 arr_name = name
             else:
-                ndsignature.append('%s=None'%name)
+                ndsignature.append(f'{name}=None')
                 ndarg_names.append(name)
         else:
-            signature.append('%s=_Null'%name)
+            signature.append(f'{name}=_Null')
             kwarg_names.append(name)
     #signature.append('is_train=False')
     signature.append('name=None')
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index 6634ce41e5..eaa015f55b 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -77,9 +77,9 @@ class Symbol(SymbolBase):
             name = self.name
             if name is None:
                 name = ', '.join([i.name for i in self])
-                return '<%s group [%s]>' % (self.__class__.__name__, name)
+                return f'<{self.__class__.__name__} group [{name}]>'
             else:
-                return '<%s %s>' % (self.__class__.__name__, name)
+                return f'<{self.__class__.__name__} {name}>'
         else:
             return '<FREED {}>'.format(self.__class__.__name__)
 
@@ -120,7 +120,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._PlusScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __bool__(self):
         raise NotImplementedForSymbol(self.__bool__, 'bool')
@@ -143,7 +143,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._MinusScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __isub__(self, other):
         raise NotImplementedForSymbol(self.__isub__, '-=', other)
@@ -166,7 +166,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._RMinusScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __mul__(self, other):
         """x.__mul__(y) <=> x*y
@@ -178,7 +178,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._MulScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __imul__(self, other):
         raise NotImplementedForSymbol(self.__imul__, '*=', other)
@@ -196,7 +196,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._DivScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __rdiv__(self, other):
         """x.__rdiv__(y) <=> y/x
@@ -216,7 +216,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._RDivScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __mod__(self, other):
         """x.__mod__(y) <=> x%y
@@ -228,7 +228,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._ModScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __rmod__(self, other):
         """x.__rmod__(y) <=> y%x
@@ -248,7 +248,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._RModScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __idiv__(self, other):
         raise NotImplementedForSymbol(self.__idiv__, '/=', other)
@@ -272,7 +272,7 @@ class Symbol(SymbolBase):
         if isinstance(other, Number):
             return _internal._PowerScalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __rpow__(self, other):
         """x.__rpow__(y) <=> y ** x"""
@@ -281,7 +281,7 @@ class Symbol(SymbolBase):
         elif isinstance(other, Number):
             return _internal._rpower_scalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __neg__(self):
         """x.__neg__() <=> -x
@@ -340,7 +340,7 @@ class Symbol(SymbolBase):
         if isinstance(other, numeric_types):
             return _internal._equal_scalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __ne__(self, other):
         """x.__ne__(y) <=> x!=y
@@ -352,7 +352,7 @@ class Symbol(SymbolBase):
         if isinstance(other, numeric_types):
             return _internal._not_equal_scalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __gt__(self, other):
         """x.__gt__(y) <=> x>y
@@ -364,7 +364,7 @@ class Symbol(SymbolBase):
         if isinstance(other, numeric_types):
             return _internal._greater_scalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __ge__(self, other):
         """x.__ge__(y) <=> x>=y
@@ -376,7 +376,7 @@ class Symbol(SymbolBase):
         if isinstance(other, numeric_types):
             return _internal._greater_equal_scalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __lt__(self, other):
         """x.__lt__(y) <=> x<y
@@ -388,7 +388,7 @@ class Symbol(SymbolBase):
         if isinstance(other, numeric_types):
             return _internal._lesser_scalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __le__(self, other):
         """x.__le__(y) <=> x<=y
@@ -400,7 +400,7 @@ class Symbol(SymbolBase):
         if isinstance(other, numeric_types):
             return _internal._lesser_equal_scalar(self, scalar=other)
         else:
-            raise TypeError('type %s not supported' % str(type(other)))
+            raise TypeError(f'type {str(type(other))} not supported')
 
     def __getstate__(self):
         handle = self.handle
@@ -544,10 +544,10 @@ class Symbol(SymbolBase):
             for i, name in enumerate(output_names):
                 if name == index:
                     if idx is not None:
-                        raise ValueError('There are multiple outputs with name \"%s\"' % index)
+                        raise ValueError(f'There are multiple outputs with name \"{index}\"')
                     idx = i
             if idx is None:
-                raise ValueError('Cannot find output that matches name \"%s\"' % index)
+                raise ValueError(f'Cannot find output that matches name \"{index}\"')
             index = idx
 
         if not isinstance(index, int):
@@ -950,7 +950,7 @@ class Symbol(SymbolBase):
                         if len(unknowns) >= 10:
                             unknowns.append('...')
                             break
-                        unknowns.append('%s: %s' % (name, str(dtype)))
+                        unknowns.append(f'{name}: {str(dtype)}')
                 warnings.warn(
                     "Cannot decide type for the following arguments. " +
                     "Consider providing them as input:\n\t" +
@@ -959,9 +959,9 @@ class Symbol(SymbolBase):
         except MXNetError:
             print("infer_type error. Arguments:")
             for i, arg in enumerate(args):
-                print("  #%d: %s" % (i, arg))
+                print(f"  #{i}: {arg}")
             for k, v in kwargs.items():
-                print("  %s: %s" % (k, v))
+                print(f"  {k}: {v}")
             raise
 
     def infer_type_partial(self, *args, **kwargs):
@@ -1137,7 +1137,7 @@ class Symbol(SymbolBase):
                         if len(unknowns) >= 10:
                             unknowns.append('...')
                             break
-                        unknowns.append('%s: %s' % (name, str(shape)))
+                        unknowns.append(f'{name}: {str(shape)}')
                 warnings.warn(
                     "Cannot decide shape for the following arguments " +
                     "(0s in shape means unknown dimensions). " +
@@ -1147,9 +1147,9 @@ class Symbol(SymbolBase):
         except MXNetError:
             print("infer_shape error. Arguments:")
             for i, arg in enumerate(args):
-                print("  #%d: %s" % (i, arg))
+                print(f"  #{i}: {arg}")
             for k, v in kwargs.items():
-                print("  %s: %s" % (k, v))
+                print(f"  {k}: {v}")
             raise
 
     def infer_shape_partial(self, *args, **kwargs):
@@ -1215,7 +1215,7 @@ class Symbol(SymbolBase):
                 if s is not None:
                     if not isinstance(s, tuple):
                         raise TypeError("Arguments need to be shapes (tuple), "
-                                        "but argument %d is %s." % (i, type(s)))
+                                        f"but argument {i} is {type(s)}.")
                     sdata.extend(s)
                 indptr.append(len(sdata))
         else:
@@ -1223,7 +1223,7 @@ class Symbol(SymbolBase):
             for k, v in kwargs.items():
                 if not isinstance(v, tuple):
                     raise TypeError("Arguments need to be shapes (tuple), "
-                                    "but '%s' is %s." % (k, type(v)))
+                                    f"but '{k}' is {type(v)}.")
                 str_keys.append(k)
                 sdata.extend(v)
                 indptr.append(len(sdata))
@@ -1437,7 +1437,7 @@ class Symbol(SymbolBase):
         arg_arrays = []
         if isinstance(args, list):
             if len(args) != len(arg_names):
-                raise ValueError('Length of %s does not match the number of arguments' % arg_key)
+                raise ValueError(f'Length of {arg_key} does not match the number of arguments')
             for narr in args:
                 if narr is None and allow_missing:
                     arg_handles.append(None)
@@ -1459,7 +1459,7 @@ class Symbol(SymbolBase):
                         arg_handles.append(None)
                         arg_arrays.append(None)
                     else:
-                        raise ValueError('key `%s` is missing in `%s`' % (name, arg_key))
+                        raise ValueError(f'key `{name}` is missing in `{arg_key}`')
         else:
             raise TypeError('Only accept list of NDArrays or dict of str to NDArray')
         return c_array(NDArrayHandle, arg_handles), arg_arrays
@@ -2747,9 +2747,9 @@ def var(name, attr=None, shape=None, lr_mult=None, wd_mult=None, dtype=None,
         if k.startswith('__') and k.endswith('__'):
             attr[k] = str(v)
         else:
-            raise ValueError('Attribute name=%s is not supported.'
+            raise ValueError(f'Attribute name={k} is not supported.'
                              ' Additional attributes must start and end with double underscores,'
-                             ' e.g, __yourattr__' % k)
+                             ' e.g, __yourattr__')
     ret._set_attr(**attr)
     return ret
 
@@ -2898,7 +2898,7 @@ def pow(base, exp):
     if isinstance(base, Number) and isinstance(exp, Number):
         return base**exp
     else:
-        raise TypeError('types (%s, %s) not supported' % (str(type(base)), str(type(exp))))
+        raise TypeError(f'types ({str(type(base))}, {str(type(exp))}) not supported')
 
 
 def power(base, exp):
@@ -2979,7 +2979,7 @@ def maximum(left, right):
     if isinstance(left, Number) and isinstance(right, Number):
         return left if left > right else right
     else:
-        raise TypeError('types (%s, %s) not supported' % (str(type(left)), str(type(right))))
+        raise TypeError(f'types ({str(type(left))}, {str(type(right))}) not supported')
 
 
 # pylint: disable=no-member
@@ -3023,7 +3023,7 @@ def minimum(left, right):
     if isinstance(left, Number) and isinstance(right, Number):
         return left if left < right else right
     else:
-        raise TypeError('types (%s, %s) not supported' % (str(type(left)), str(type(right))))
+        raise TypeError(f'types ({str(type(left))}, {str(type(right))}) not supported')
 
 
 # pylint: disable=no-member
@@ -3068,7 +3068,7 @@ def hypot(left, right):
     if isinstance(left, Number) and isinstance(right, Number):
         return _numpy.hypot(left, right)
     else:
-        raise TypeError('types (%s, %s) not supported' % (str(type(left)), str(type(right))))
+        raise TypeError(f'types ({str(type(left))}, {str(type(right))}) not supported')
 
 
 def eye(N, M=0, k=0, dtype=None, **kwargs):
diff --git a/python/mxnet/symbol_doc.py b/python/mxnet/symbol_doc.py
index 8f86510164..d62a41a4ee 100644
--- a/python/mxnet/symbol_doc.py
+++ b/python/mxnet/symbol_doc.py
@@ -69,17 +69,16 @@ def _build_doc(func_name,
     param_str = _build_param_doc(arg_names, arg_types, arg_desc)
     if key_var_num_args:
         desc += '\nThis function support variable length of positional input.'
-    doc_str = ('%s\n\n' +
-               '%s\n' +
+    doc_str = (f'{desc}\n\n' +
+               f'{param_str}\n' +
                'name : string, optional.\n' +
                '    Name of the resulting symbol.\n\n' +
                'Returns\n' +
                '-------\n' +
                'Symbol\n' +
                '    The result symbol.')
-    doc_str = doc_str % (desc, param_str)
     extra_doc = "\n" + '\n'.join([x.__doc__ for x in type.__subclasses__(SymbolDoc)
-                                  if x.__name__ == '%sDoc' % func_name])
+                                  if x.__name__ == f'{func_name}Doc'])
     doc_str += _re.sub(_re.compile("    "), "", extra_doc)
     doc_str = _re.sub('NDArray-or-Symbol', 'Symbol', doc_str)
     return doc_str
diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index d2fe560480..c647af5743 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -236,11 +236,10 @@ def _validate_csr_generation_inputs(num_rows, num_cols, density,
 
     if distribution == "powerlaw":
         if total_nnz < 2 * num_rows:
-            raise ValueError("not supported for this density: %s"
-                             " for this shape (%s, %s)"
+            raise ValueError(f"not supported for this density: {density}"
+                             f" for this shape ({num_rows}, {num_cols})"
                              " Please keep :"
-                             " num_rows * num_cols * density >= 2 * num_rows"
-                             % (density, num_rows, num_cols))
+                             " num_rows * num_cols * density >= 2 * num_rows")
 
 
 def shuffle_csr_column_indices(csr):
@@ -332,8 +331,8 @@ def _get_powerlaw_dataset_csr(num_rows, num_cols, density=0.1, dtype=None):
         col_max = col_max * 2
 
     if unused_nnz > 0:
-        raise ValueError("not supported for this density: %s"
-                         " for this shape (%s,%s)" % (density, num_rows, num_cols))
+        raise ValueError(f"not supported for this density: {density}"
+                         f" for this shape ({num_rows},{num_cols})")
 
     return mx.nd.array(output_arr).tostype("csr")
 
@@ -443,7 +442,7 @@ def rand_sparse_ndarray(shape, stype, density=None, dtype=None, distribution=Non
     distribution = "uniform" if distribution is None else distribution
     if stype == 'row_sparse':
         assert (distribution == "uniform"), \
-               "Distribution %s not supported for row_sparse" % (distribution)
+               f"Distribution {distribution} not supported for row_sparse"
         # sample index
         if rsp_indices is not None:
             indices = rsp_indices
@@ -476,7 +475,7 @@ def rand_sparse_ndarray(shape, stype, density=None, dtype=None, distribution=Non
             csr = _get_powerlaw_dataset_csr(shape[0], shape[1], density=density, dtype=dtype).as_in_context(ctx)
             return csr, (csr.indptr, csr.indices, csr.data)
         else:
-            assert(False), "Distribution not supported: %s" % (distribution)
+            assert(False), f"Distribution not supported: {distribution}"
             return False
     else:
         assert(False), "unknown storage type"
@@ -650,9 +649,7 @@ def locationError(a, b, index, names, maxError=False):
         Flag indicating that maximum error is reporting.
     """
     maximum = "maximum " if maxError else ""
-    return "Location of %serror: %s, %s=%.8f, %s=%.8f" \
-            % (maximum, str(index), names[0], a[index], names[1], b[index])
-
+    return f"Location of {maximum} error: {str(index)}, {names[0]}={a[index]:.8f}, {names[1]}={b[index]:.8f}"
 def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan=False,
                         use_broadcast=True, mismatches=(10, 10)):
     """Test that two numpy arrays are almost equal. Raise exception message if not.
@@ -716,7 +713,7 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan=
         i = 1
         while i <= a.size:
             if i <= mismatches[0]:
-                print("%3d: Error %f  %s" %(i, rel, locationError(a, b, index, names)))
+                print(f"{i:3d}: Error {rel}  {locationError(a, b, index, names)}")
 
             aTmp[index] = bTmp[index] = 0
             if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan):
@@ -729,11 +726,11 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan=
                 break
 
         mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else ""
-        errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \
-                 (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \
-                  locationError(a, b, indexErr, names, maxError=True))
+        errMsg = f"Error {relErr} exceeds tolerance rtol={rtol:e}, atol={atol:e} " \
+                 f"(mismatch {mismatchDegree}{100*i/a.size}%).\n" \
+                 f"{locationError(a, b, indexErr, names, maxError=True)}"
     else:
-        errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e.\n" % (rel, rtol, atol)
+        errMsg = f"Error {rel} exceeds tolerance rtol={rtol:e}, atol={atol:e}.\n"
 
     np.set_printoptions(threshold=4, suppress=True)
     msg = npt.build_err_msg([a, b], err_msg=errMsg)
@@ -787,7 +784,7 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None,
             i = 1
             while i <= a.size:
                 if i <= mismatches[0]:
-                    print("%3d: Error %f  %s" %(i, rel, locationError(a, b, index, names)))
+                    print(f"{i:3d}: Error {rel}  {locationError(a, b, index, names)}")
 
                 aTmp[index] = bTmp[index] = 0
                 if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan):
@@ -800,9 +797,9 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None,
                     break
 
             mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else ""
-            errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \
-                    (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \
-                    locationError(a, b, indexErr, names, maxError=True))
+            errMsg = f"Error {relErr} exceeds tolerance rtol={rtol:e}, atol={atol:e} " \
+                     f"(mismatch {mismatchDegree}{100*i/a.size}%).\n" \
+                     f"{locationError(a, b, indexErr, names, maxError=True)}"
             np.set_printoptions(threshold=4, suppress=True)
             msg = npt.build_err_msg([a, b], err_msg=errMsg)
             raise AssertionError(msg)
@@ -894,8 +891,7 @@ def _parse_location(sym, location, ctx, dtype=default_dtype()):
     if isinstance(location, dict):
         if set(location.keys()) != set(sym.list_arguments()):
             raise ValueError("Symbol arguments and keys of the given location do not match."
-                             "symbol args:%s, location.keys():%s"
-                             % (str(set(sym.list_arguments())), str(set(location.keys()))))
+                             f"symbol args:{str(set(sym.list_arguments()))}, location.keys():{str(set(location.keys()))}")
     else:
         location = {k: v for k, v in zip(sym.list_arguments(), location)}
     location = {k: mx.nd.array(v, ctx=ctx, dtype=v.dtype if dtype == "asnumpy" else dtype) \
@@ -957,9 +953,7 @@ def _parse_aux_states(sym, aux_states, ctx, dtype=default_dtype()):
         if isinstance(aux_states, dict):
             if set(aux_states.keys()) != set(sym.list_auxiliary_states()):
                 raise ValueError("Symbol aux_states names and given aux_states do not match."
-                                 "symbol aux_names:%s, aux_states.keys:%s"
-                                 % (str(set(sym.list_auxiliary_states())),
-                                    str(set(aux_states.keys()))))
+                                 f"symbol aux_names:{str(set(sym.list_auxiliary_states()))}, aux_states.keys:{str(set(aux_states.keys()))}")
         elif isinstance(aux_states, (list, tuple)):
             aux_names = sym.list_auxiliary_states()
             aux_states = {k:v for k, v in zip(aux_names, aux_states)}
@@ -1156,7 +1150,7 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=None, rto
     inps = executor.arg_arrays
     if len(inps) != len(location):
         raise ValueError("Executor arg_arrays and and location len do not match."
-                         "Got %d inputs and %d locations"%(len(inps), len(location)))
+                         f"Got {len(inps)} inputs and {len(location)} locations")
 
     executor.forward(is_train=True)
     assert len(executor.outputs) == 1
@@ -1179,16 +1173,16 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=None, rto
         sym_grad = symbolic_grads[name]
         if grad_req[name] == 'write':
             assert_almost_equal(fd_grad, sym_grad, rtol, atol,
-                                ("NUMERICAL_%s"%name, "BACKWARD_%s"%name))
+                                (f"NUMERICAL_{name}", f"BACKWARD_{name}"))
         elif grad_req[name] == 'add':
             if isinstance(sym_grad, mx.nd.NDArray):
                 sym_grad = sym_grad.asnumpy()
             assert_almost_equal(fd_grad, sym_grad - orig_grad, rtol, atol,
-                                ("NUMERICAL_%s"%name, "BACKWARD_%s"%name))
+                                (f"NUMERICAL_{name}", f"BACKWARD_{name}"))
         elif grad_req[name] == 'null':
             assert sym_grad is None
         else:
-            raise ValueError("Invalid grad_req %s for argument %s"%(grad_req[name], name))
+            raise ValueError(f"Invalid grad_req {grad_req[name]} for argument {name}")
 
 
 def check_symbolic_forward(sym, location, expected, rtol=None, atol=None,
@@ -1270,7 +1264,7 @@ def check_symbolic_forward(sym, location, expected, rtol=None, atol=None,
     outputs = executor.outputs
     for output_name, expect, output in zip(sym.list_outputs(), expected, outputs):
         assert_almost_equal(expect, output, rtol, atol,
-                            ("EXPECTED_%s"%output_name, "FORWARD_%s"%output_name),
+                            (f"EXPECTED_{output_name}", f"FORWARD_{output_name}"),
                             equal_nan=equal_nan)
     return executor.outputs
 
@@ -1399,19 +1393,19 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=None, atol=
     for name in expected:
         if grad_req[name] == 'write':
             assert_almost_equal(expected[name], grads[name], rtol, atol,
-                                ("EXPECTED_%s"%name, "BACKWARD_%s"%name),
+                                (f"EXPECTED_{name}", f"BACKWARD_{name}"),
                                 equal_nan=equal_nan)
         elif grad_req[name] == 'add':
             grad = grads[name].asnumpy() if isinstance(grads[name], mx.nd.NDArray) else grads[name]
             assert_almost_equal(expected[name], grad - args_grad_npy[name],
-                                rtol, atol, ("EXPECTED_%s"%name, "BACKWARD_%s"%name),
+                                rtol, atol, (f"EXPECTED_{name}", f"BACKWARD_{name}"),
                                 equal_nan=equal_nan)
         elif grad_req[name] == 'null':
             assert_almost_equal(args_grad_npy[name], grads[name],
-                                rtol, atol, ("EXPECTED_%s"%name, "BACKWARD_%s"%name),
+                                rtol, atol, (f"EXPECTED_{name}", f"BACKWARD_{name}"),
                                 equal_nan=equal_nan)
         else:
-            raise ValueError("Invalid grad_req %s for argument %s"%(grad_req[name], name))
+            raise ValueError(f"Invalid grad_req {grad_req[name]} for argument {name}")
     return args_grad_data
 
 def check_speed(sym, location=None, ctx=None, N=20, grad_req=None, typ="whole",
@@ -1448,7 +1442,7 @@ def check_speed(sym, location=None, ctx=None, N=20, grad_req=None, typ="whole",
         location = {k: np.random.normal(size=arr.shape, scale=1.0) for k, arr in
                     exe.arg_dict.items()}
     else:
-        assert isinstance(location, dict), "Expect dict, get \"location\"=%s" %str(location)
+        assert isinstance(location, dict), f'Expect dict, get "location"={str(location)}'
         exe = sym._simple_bind(grad_req=grad_req, ctx=ctx,
                                **{k: v.shape for k, v in location.items()})
 
@@ -1617,7 +1611,7 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write',
             try:
                 assert_almost_equal(arr, gtarr, rtol=rtol, atol=atol, equal_nan=equal_nan)
             except AssertionError as e:
-                print('Predict Err: ctx %d vs ctx %d at %s'%(i, gt_idx, name))
+                print(f'Predict Err: ctx {i} vs ctx {gt_idx} at {name}')
                 traceback.print_exc()
                 if raise_on_err:
                     raise e
@@ -1748,7 +1742,7 @@ def download(url, fname=None, dirname=None, overwrite=False, retries=5):
         # pylint: disable=W0703
         try:
             r = requests.get(url, stream=True)
-            assert r.status_code == 200, "failed to open %s" % url
+            assert r.status_code == 200, f"failed to open {url}"
             with open(fname, 'wb') as f:
                 for chunk in r.iter_content(chunk_size=1024):
                     if chunk: # filter out keep-alive new chunks
@@ -2225,10 +2219,9 @@ def verify_generator(generator, buckets, probs, nsamples=1000000, nrepeat=5, suc
         expected_freq_l.append(expected_freq)
     success_num = (np.array(cs_ret_l) > alpha).sum()
     if success_num < nrepeat * success_rate:
-        raise AssertionError("Generator test fails, Chi-square p=%s, obs_freq=%s, expected_freq=%s."
-                             "\nbuckets=%s, probs=%s"
-                             % (str(cs_ret_l), str(obs_freq_l), str(expected_freq_l),
-                                str(buckets), str(probs)))
+        raise AssertionError(f"Generator test fails, Chi-square p={str(cs_ret_l)}, "
+                             f"obs_freq={str(obs_freq_l)}, expected_freq={str(expected_freq_l)}."
+                             f"\nbuckets={str(buckets)}, probs={str(probs)}")
     return cs_ret_l
 
 
diff --git a/python/mxnet/util.py b/python/mxnet/util.py
index 3a190d7aa2..919e18107c 100644
--- a/python/mxnet/util.py
+++ b/python/mxnet/util.py
@@ -765,7 +765,7 @@ def numpy_fallback(func):
         else:
             if new_device is None:
                 new_device = device
-            assert device == new_device, "inconsistent device %s and %s" % (str(device), str(new_device))
+            assert device == new_device, f"inconsistent device {str(device)} and {str(new_device)}"
             return device
 
     def _as_official_np_array(object):
diff --git a/python/setup.py b/python/setup.py
index ce1130ba86..ca2920ca33 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -87,8 +87,8 @@ def config_cython():
             if not fn.endswith(".pyx"):
                 continue
             ret.append(Extension(
-                "mxnet.%s.%s" % (subdir, fn[:-4]),
-                ["mxnet/cython/%s" % fn],
+                f"mxnet.{subdir}.{fn[:-4]}",
+                [f"mxnet/cython/{fn}"],
                 include_dirs=["../include/", "../3rdparty/tvm/nnvm/include"],
                 library_dirs=library_dirs,
                 libraries=libraries,
@@ -100,8 +100,8 @@ def config_cython():
             if not fn.endswith(".pyx"):
                 continue
             ret.append(Extension(
-                "mxnet._ffi.%s.%s" % (subdir, fn[:-4]),
-                ["mxnet/_ffi/_cython/%s" % fn],
+                f"mxnet._ffi.{subdir}.{fn[:-4]}",
+                [f"mxnet/_ffi/_cython/{fn}"],
                 include_dirs=["../include/", "../3rdparty/tvm/nnvm/include"],
                 library_dirs=library_dirs,
                 libraries=libraries,
diff --git a/tests/nightly/TestDoc/doc_spell_checker.py b/tests/nightly/TestDoc/doc_spell_checker.py
index 53941c3d4d..206960d29f 100644
--- a/tests/nightly/TestDoc/doc_spell_checker.py
+++ b/tests/nightly/TestDoc/doc_spell_checker.py
@@ -175,8 +175,8 @@ if __name__ == "__main__":
             spell_check_res = DOC_PARSER.get_res()[0]
             grammar_check_res = DOC_PARSER.get_res()[1]
             if len(spell_check_res) > 0:
-                print("%s has typo:" % os.path.join(root, read_file))
-                print("%s\n" % spell_check_res)
+                print(f"{os.path.join(root, read_file)} has typo:")
+                print(f"{spell_check_res}\n")
                 ALL_CLEAR = False
     if ALL_CLEAR:
         print("No typo is found.")
diff --git a/tests/nightly/model_backwards_compatibility_check/common.py b/tests/nightly/model_backwards_compatibility_check/common.py
index 4f621bb8aa..dd4fc7e863 100644
--- a/tests/nightly/model_backwards_compatibility_check/common.py
+++ b/tests/nightly/model_backwards_compatibility_check/common.py
@@ -105,10 +105,10 @@ def get_top_level_folders_in_bucket(s3client, bucket_name):
     result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)
     folder_list = list()
     if 'CommonPrefixes' not in result:
-        logging.error('No trained models found in S3 bucket : %s for this file. '
-                      'Please train the models and run inference again' % bucket_name)
-        raise Exception("No trained models found in S3 bucket : %s for this file. "
-                        "Please train the models and run inference again" % bucket_name)
+        logging.error('No trained models found in S3 bucket : {} for this file. '
+                      'Please train the models and run inference again'.format(bucket_name))
+        raise Exception("No trained models found in S3 bucket : {} for this file. "
+                        "Please train the models and run inference again".format(bucket_name))
         return folder_list
     for obj in result['CommonPrefixes']:
         folder_name = obj['Prefix'].strip(backslash)
@@ -121,10 +121,10 @@ def get_top_level_folders_in_bucket(s3client, bucket_name):
         folder_list.append(obj['Prefix'].strip(backslash))
 
     if len(folder_list) == 0:
-        logging.error('No trained models found in S3 bucket : %s for this file. '
-                      'Please train the models and run inference again' % bucket_name)
-        raise Exception("No trained models found in S3 bucket : %s for this file. "
-                        "Please train the models and run inference again" % bucket_name)
+        logging.error('No trained models found in S3 bucket : {} for this file. '
+                      'Please train the models and run inference again'.format(bucket_name))
+        raise Exception("No trained models found in S3 bucket : {} for this file. "
+                        "Please train the models and run inference again".format(bucket_name))
     return folder_list
 
 
diff --git a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py
index c57c0f483e..1206bdab2c 100644
--- a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py
+++ b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_inference.py
@@ -21,13 +21,13 @@ from .common import *
 
 def test_lenet_gluon_load_params_api():
     model_name = 'lenet_gluon_save_params_api'
-    logging.info('Performing inference for model/API %s' % model_name)
+    logging.info(f'Performing inference for model/API {model_name}')
 
     for folder in get_top_level_folders_in_bucket(s3, model_bucket_name):
-        logging.info('Fetching files for MXNet version : %s and model %s' % (folder, model_name))
+        logging.info(f'Fetching files for MXNet version : {folder} and model {model_name}')
         model_files = download_model_files_from_s3(model_name, folder)
         if len(model_files) == 0:
-            logging.warn('No training files found for %s for MXNet version : %s' % (model_name, folder))
+            logging.warn(f'No training files found for {model_name} for MXNet version : {folder}')
             continue
 
         data = mx.npx.load(''.join([model_name, '-data']))
@@ -40,18 +40,18 @@ def test_lenet_gluon_load_params_api():
         assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy(), rtol=rtol_default, atol=atol_default)
         clean_model_files(model_files, model_name)
         logging.info('=================================')
-    logging.info('Assertion passed for model : %s' % model_name)
+    logging.info(f'Assertion passed for model : {model_name}')
 
 
 def test_lenet_gluon_hybrid_imports_api():
     model_name = 'lenet_gluon_hybrid_export_api'
-    logging.info('Performing inference for model/API %s' % model_name)
+    logging.info(f'Performing inference for model/API {model_name}')
 
     for folder in get_top_level_folders_in_bucket(s3, model_bucket_name):
-        logging.info('Fetching files for MXNet version : %s and model %s' % (folder, model_name))
+        logging.info(f'Fetching files for MXNet version : {folder} and model {model_name}')
         model_files = download_model_files_from_s3(model_name, folder)
         if len(model_files) == 0:
-            logging.warn('No training files found for %s for MXNet version : %s' % (model_name, folder))
+            logging.warn(f'No training files found for {model_name} for MXNet version : {folder}')
             continue
             # Load the model and perform inference
         data = mx.npx.load(''.join([model_name, '-data']))
@@ -63,7 +63,7 @@ def test_lenet_gluon_hybrid_imports_api():
         assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy(), rtol=rtol_default, atol=atol_default)
         clean_model_files(model_files, model_name)
         logging.info('=================================')
-    logging.info('Assertion passed for model : %s' % model_name)
+    logging.info(f'Assertion passed for model : {model_name}')
 
 
 def test_lstm_gluon_load_parameters_api():
@@ -71,18 +71,18 @@ def test_lstm_gluon_load_parameters_api():
     # since it uses save_parameters and load_parameters API
 
     if compare_versions(str(mxnet_version), '1.2.1') < 0:
-        logging.warn('Found MXNet version %s and exiting because this version does not contain save_parameters'
-                     ' and load_parameters functions' % str(mxnet_version))
+        logging.warn(f'Found MXNet version {str(mxnet_version)} and exiting because this version does not contain save_parameters'
+                     ' and load_parameters functions')
         return
 
     model_name = 'lstm_gluon_save_parameters_api'
-    logging.info('Performing inference for model/API %s and model' % model_name)
+    logging.info(f'Performing inference for model/API {model_name} and model')
 
     for folder in get_top_level_folders_in_bucket(s3, model_bucket_name):
-        logging.info('Fetching files for MXNet version : %s' % folder)
+        logging.info(f'Fetching files for MXNet version : {folder}')
         model_files = download_model_files_from_s3(model_name, folder)
         if len(model_files) == 0:
-            logging.warn('No training files found for %s for MXNet version : %s' % (model_name, folder))
+            logging.warn(f'No training files found for {model_name} for MXNet version : {folder}')
             continue
 
         data = mx.npx.load(''.join([model_name, '-data']))
@@ -95,7 +95,7 @@ def test_lstm_gluon_load_parameters_api():
         assert_almost_equal(old_inference_results.asnumpy(), output.asnumpy(), rtol=rtol_default, atol=atol_default)
         clean_model_files(model_files, model_name)
         logging.info('=================================')
-    logging.info('Assertion passed for model : %s' % model_name)
+    logging.info(f'Assertion passed for model : {model_name}')
 
 
 if __name__ == '__main__':
diff --git a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_train.py b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_train.py
index 1a817611c4..bc49aeb9a1 100644
--- a/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_train.py
+++ b/tests/nightly/model_backwards_compatibility_check/model_backwards_compat_train.py
@@ -23,7 +23,7 @@ from .common import *
 def train_lenet_gluon_save_params_api():
     model_name = 'lenet_gluon_save_params_api'
     create_model_folder(model_name)
-    logging.info('Saving files for model %s' % model_name)
+    logging.info(f'Saving files for model {model_name}')
     net = Net()
     weights = mx.initializer.Xavier(magnitude=2.57)
     net.initialize(weights, ctx=[mx.cpu(0)])
@@ -41,7 +41,7 @@ def train_lenet_gluon_save_params_api():
 @mx.util.use_np
 def train_lenet_gluon_hybrid_export_api():
     model_name = 'lenet_gluon_hybrid_export_api'
-    logging.info('Saving files for model %s' % model_name)
+    logging.info(f'Saving files for model {model_name}')
     create_model_folder(model_name)
     net = HybridNet()
     weights = mx.initializer.Xavier(magnitude=2.57)
@@ -70,12 +70,12 @@ def train_lstm_gluon_save_parameters_api():
     # If this code is being run on version >= 1.2.1 only then execute it,
     # since it uses save_parameters and load_parameters API
     if compare_versions(str(mxnet_version), '1.2.1') < 0:
-        logging.warn('Found MXNet version %s and exiting because this version does not contain save_parameters'
-                     ' and load_parameters functions' % str(mxnet_version))
+        logging.warn(f'Found MXNet version {str(mxnet_version)} and exiting because this version does not contain save_parameters'
+                     ' and load_parameters functions')
         return
 
     model_name = 'lstm_gluon_save_parameters_api'
-    logging.info('Saving files for model %s' % model_name)
+    logging.info(f'Saving files for model {model_name}')
     create_model_folder(model_name)
     net = SimpleLSTMModel()
     weights = mx.initializer.Xavier(magnitude=2.57)
diff --git a/tests/python/doctest/test_docstring.py b/tests/python/doctest/test_docstring.py
index 23a29588c5..169774d39a 100644
--- a/tests/python/doctest/test_docstring.py
+++ b/tests/python/doctest/test_docstring.py
@@ -29,13 +29,12 @@ def import_into(globs, module, names=None, error_on_overwrite=True):
     mod_names = dir(module)
     if names is not None:
         for name in names:
-            assert name in mod_names, '%s not found in %s' % (
-                    name, module)
+            assert name in mod_names, f'{name} not found in {module}'
         mod_names = names
 
     for name in mod_names:
         if name in globs and globs[name] is not getattr(module, name):
-            error_msg = 'Attempting to overwrite definition of %s' % name
+            error_msg = f'Attempting to overwrite definition of {name}'
             if error_on_overwrite:
                 raise RuntimeError(error_msg)
             logging.warning('%s', error_msg)
diff --git a/tests/python/gpu/test_extensions_gpu.py b/tests/python/gpu/test_extensions_gpu.py
index 339446e511..99bd82eab8 100644
--- a/tests/python/gpu/test_extensions_gpu.py
+++ b/tests/python/gpu/test_extensions_gpu.py
@@ -42,13 +42,13 @@ def test_custom_op_gpu():
         elif os.path.exists(os.path.join(base_path, 'build/'+lib)):
             fname = os.path.join(base_path, 'build/'+lib)
         else:
-            raise MXNetError("library %s not found " % lib)
+            raise MXNetError(f"library {lib} not found ")
     elif (os.name=='nt'):
         lib = 'libcustomop_gpu_lib.dll'
         if os.path.exists('windows_package\\lib\\'+lib):
             fname = 'windows_package\\lib\\'+lib
         else:
-            raise MXNetError("library %s not found " % lib)
+            raise MXNetError(f"library {lib} not found ")
 
     fname = os.path.abspath(fname)
     # load the library containing gemm custom operators
@@ -100,7 +100,7 @@ def test_external_op():
     lib = 'libexternal_lib.so'
     fname = os.path.join(base_path,'example/extensions/lib_external_ops/build/'+lib)
     if not os.path.exists(fname):
-        raise MXNetError("library %s not found " % lib)
+        raise MXNetError(f"library {lib} not found ")
 
     fname = os.path.abspath(fname)
     mx.library.load(fname, False)
diff --git a/tests/python/gpu/test_gluon_model_zoo_gpu.py b/tests/python/gpu/test_gluon_model_zoo_gpu.py
index 4e4d3c6e95..98e92335f2 100644
--- a/tests/python/gpu/test_gluon_model_zoo_gpu.py
+++ b/tests/python/gpu/test_gluon_model_zoo_gpu.py
@@ -44,7 +44,7 @@ def download_data():
 def test_inference(model_name):
     batch_size = 10
     download_data()
-    eprint('testing inference on %s'%model_name)
+    eprint(f'testing inference on {model_name}')
 
     data_shape = (3, 224, 224) if 'inception' not in model_name else (3, 299, 299)
     dataIter = mx.io.ImageRecordIter(
@@ -128,7 +128,7 @@ def test_training():
     softmax_cross_entropy = mx.gluon.loss.SoftmaxCrossEntropyLoss()
 
     for model_name in all_models:
-        eprint('testing %s'%model_name)
+        eprint(f'testing {model_name}')
         #data = mx.nd.random.uniform(shape=(100, 3, 224, 224))
 
         # This is to create a model and run the model once to initialize
diff --git a/tests/python/gpu/test_numpy_fallback.py b/tests/python/gpu/test_numpy_fallback.py
index de8bbe0618..3a9869cff5 100644
--- a/tests/python/gpu/test_numpy_fallback.py
+++ b/tests/python/gpu/test_numpy_fallback.py
@@ -88,11 +88,11 @@ def test_np_fallback_decorator():
     for fallback_out, onp_out in zip(fallback_ret, onp_ret):
         if isinstance(fallback_out, (list, tuple)):
             for fallback_item, onp_item in zip(fallback_out, onp_out):
-                assert fallback_item.device == mx.device.current_device(), "incorrect output device %s vs desired %s" % (str(fallback_item.device), str(mx.device.current_device()))
+                assert fallback_item.device == mx.device.current_device(), f"incorrect output device {str(fallback_item.device)} vs desired {str(mx.device.current_device())}"
                 assert isinstance(fallback_item, np.ndarray)
                 assert_almost_equal(fallback_item.asnumpy(), onp_item, rtol=1e-3, atol=1e-5, equal_nan=False)
         else:
-            assert fallback_out.device == mx.device.current_device(), "incorrect output device %s vs desired %s" % (str(fallback_out.device), str(mx.device.current_device()))
+            assert fallback_out.device == mx.device.current_device(), f"incorrect output device {str(fallback_out.device)} vs desired {str(mx.device.current_device())}"
             assert isinstance(fallback_out, np.ndarray)
             assert_almost_equal(fallback_out.asnumpy(), onp_out, rtol=1e-3, atol=1e-5, equal_nan=False)
 
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index 3870841641..0d9ab70e04 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1980,8 +1980,8 @@ def test_kernel_error_checking():
     try:
         mpctx = mp.get_context('spawn')
     except:
-        print('SKIP: python%s.%s lacks the required process fork-exec support ... ' %
-              sys.version_info[0:2], file=sys.stderr, end='')
+        print(f'SKIP: python{sys.version_info[0]}.{sys.version_info[1]} lacks the required process fork-exec support ... ',
+              file=sys.stderr, end='')
     else:
         with discard_stderr():
             for f in [kernel_error_check_imperative, kernel_error_check_symbolic]:
@@ -1989,7 +1989,7 @@ def test_kernel_error_checking():
                 p.start()
                 p.join()
                 assert p.exitcode != 0,\
-                    "Expected a synchronous kernel error from %s(), none seen." % f.__name__
+                    f"Expected a synchronous kernel error from {f.__name__}(), none seen."
 
 def test_incorrect_gpu():
     # Try setting dev_id to a really big number
diff --git a/tests/python/gpu/test_profiler_gpu.py b/tests/python/gpu/test_profiler_gpu.py
index 79720897cd..8fc009fe46 100644
--- a/tests/python/gpu/test_profiler_gpu.py
+++ b/tests/python/gpu/test_profiler_gpu.py
@@ -85,7 +85,7 @@ def test_gpu_memory_profiler_symbolic():
     # tensordot:in_arg:A,8388608,0,8388608,0
     # tensordot:in_arg:B,33554432,0,33554432,0
 
-    with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file:
+    with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
         csv_reader = csv.DictReader(csv_file)
         for row in csv_reader:
             print(",".join(list(row.values())))
@@ -160,12 +160,12 @@ def test_gpu_memory_profiler_gluon():
 
     # We are only checking for weight parameters here, also making sure that
     # there is no unknown entries in the memory profile.
-    with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file:
+    with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
         csv_reader = csv.DictReader(csv_file)
         for row in csv_reader:
             print(",".join(list(row.values())))
         for param in model.collect_params().values():
-            expected_arg_name = "%sin_arg:" % param.var().attr('__profiler_scope__') + \
+            expected_arg_name = f"{param.var().attr('__profiler_scope__')}in_arg:" + \
                                 param.name
             expected_arg_size = str(4 * np.prod(param.shape))
             csv_file.seek(0)
diff --git a/tests/python/onnx/test_models.py b/tests/python/onnx/test_models.py
index 1a4fec271a..987c8c7f49 100644
--- a/tests/python/onnx/test_models.py
+++ b/tests/python/onnx/test_models.py
@@ -33,12 +33,12 @@ def test_resnet50_v2(tmp_path):
         model.hybridize(static_alloc=True)
         out = model(inp)
 
-        prefix = "%s/resnet50" % tmp_path
+        prefix = f"{tmp_path}/resnet50"
         model.export(prefix)
 
-        sym_file = "%s-symbol.json" % prefix
-        params_file = "%s-0000.params" % prefix
-        onnx_file = "%s.onnx" % prefix
+        sym_file = f"{prefix}-symbol.json"
+        params_file = f"{prefix}-0000.params"
+        onnx_file = f"{prefix}.onnx"
     
         dynamic_input_shapes = [('batch', 3, 224, 224)]
         input_shapes = [(1, 3, 224, 224)]
diff --git a/tests/python/train/test_autograd.py b/tests/python/train/test_autograd.py
index d54c764f91..37b3cd18c6 100644
--- a/tests/python/train/test_autograd.py
+++ b/tests/python/train/test_autograd.py
@@ -88,7 +88,7 @@ def test_autograd(tmpdir):
                 metric.update(labels, outputs)
             name, acc = metric.get()
             metric.reset()
-            print('training acc at epoch %d: %s=%f'%(i, name, acc))
+            print(f'training acc at epoch {i}: {name}={acc}')
 
 
     net1 = get_net()
diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py
index 963abe3bf8..0503073080 100644
--- a/tests/python/unittest/common.py
+++ b/tests/python/unittest/common.py
@@ -47,7 +47,7 @@ def assertRaises(expected_exception, func, *args, **kwargs):
         pass
     else:
         # Did not raise exception
-        assert False, "%s did not raise %s" % (func.__name__, expected_exception.__name__)
+        assert False, f"{func.__name__} did not raise {expected_exception.__name__}"
 
 
 def default_logger():
@@ -201,8 +201,8 @@ def run_in_spawned_process(func, env, *args):
     try:
         mpctx = mp.get_context('spawn')
     except:
-        print('SKIP: python%s.%s lacks the required process fork-exec support ... ' %
-              sys.version_info[0:2], file=sys.stderr, end='')
+        print(f'SKIP: python{sys.version_info[0]}.{sys.version_info[1]} lacks the required process fork-exec support ... ',
+              file=sys.stderr, end='')
         return False
     else:
         seed = np.random.randint(0,1024*1024*1024)
@@ -211,7 +211,7 @@ def run_in_spawned_process(func, env, *args):
             p = mpctx.Process(target=func, args=(seed,)+args)
             p.start()
             p.join()
-            assert p.exitcode == 0, "Non-zero exit code %d from %s()." % (p.exitcode, func.__name__)
+            assert p.exitcode == 0, f"Non-zero exit code {p.exitcode} from {func.__name__}()."
     return True
 
 
diff --git a/tests/python/unittest/test_extensions.py b/tests/python/unittest/test_extensions.py
index 2c4ac9d4b3..b64354ab92 100644
--- a/tests/python/unittest/test_extensions.py
+++ b/tests/python/unittest/test_extensions.py
@@ -42,13 +42,13 @@ def test_custom_op():
         elif os.path.exists(os.path.join(base_path,'build/'+lib)):
             fname = os.path.join(base_path,'build/'+lib)
         else:
-            raise MXNetError("library %s not found " % lib)
+            raise MXNetError(f"library {lib} not found ")
     elif (os.name=='nt'):
         lib = 'libcustomop_lib.dll'
         if os.path.exists('windows_package\\lib\\'+lib):
             fname = 'windows_package\\lib\\'+lib
         else:
-            raise MXNetError("library %s not found " % lib)
+            raise MXNetError(f"library {lib} not found ")
 
     fname = os.path.abspath(fname)
     # load the library containing gemm custom operators
@@ -109,7 +109,7 @@ def test_subgraph():
             # plain cmake build when run in the CI
             fname = os.path.join(base_path, 'build/'+lib)
         else:
-            raise MXNetError("library %s not found " % lib)
+            raise MXNetError(f"library {lib} not found ")
     elif (os.name=='nt'):
         lib = 'libsubgraph_lib.dll'
         if os.path.exists('windows_package\\lib\\'+lib):
@@ -117,7 +117,7 @@ def test_subgraph():
             fname = 'windows_package\\lib\\'+lib
         else:
             # plain cmake build when run in the CI
-            raise MXNetError("library %s not found " % lib)
+            raise MXNetError(f"library {lib} not found ")
 
     fname = os.path.abspath(fname)
     mx.library.load(fname)
@@ -204,7 +204,7 @@ def test_external_op():
     lib = 'libexternal_lib.so'
     fname = os.path.join(base_path,'example/extensions/lib_external_ops/build/'+lib)
     if not os.path.exists(fname):
-        raise MXNetError("library %s not found " % lib)
+        raise MXNetError(f"library {lib} not found ")
 
     fname = os.path.abspath(fname)
     mx.library.load(fname, False)
diff --git a/tests/python/unittest/test_gluon_model_zoo.py b/tests/python/unittest/test_gluon_model_zoo.py
index a9181a865a..e1be887f49 100644
--- a/tests/python/unittest/test_gluon_model_zoo.py
+++ b/tests/python/unittest/test_gluon_model_zoo.py
@@ -45,7 +45,7 @@ def test_models(model_name):
     test_pretrain = model_name in pretrained_to_test
     model = get_model(model_name, pretrained=test_pretrain, root='model/')
     data_shape = (2, 3, 224, 224) if 'inception' not in model_name else (2, 3, 299, 299)
-    eprint('testing forward for %s' % model_name)
+    eprint(f'testing forward for {model_name}')
     print(model)
     if not test_pretrain:
         model.initialize()
diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py
index ac38b73ac4..2a48ba8f86 100644
--- a/tests/python/unittest/test_gluon_rnn.py
+++ b/tests/python/unittest/test_gluon_rnn.py
@@ -244,10 +244,10 @@ def test_stack():
     for i in range(5):
         if i==1:
             continue
-        assert '%d.h2h_weight'%i in keys
-        assert '%d.h2h_bias'%i in keys
-        assert '%d.i2h_weight'%i in keys
-        assert '%d.i2h_bias'%i in keys
+        assert f'{i}.h2h_weight' in keys
+        assert f'{i}.h2h_bias' in keys
+        assert f'{i}.i2h_weight' in keys
+        assert f'{i}.i2h_bias' in keys
     assert '1.base_cell.h2h_weight' in keys
     assert '1.base_cell.h2h_bias' in keys
     assert '1.base_cell.i2h_weight' in keys
@@ -274,10 +274,10 @@ def test_hybridstack():
     for i in range(5):
         if i==1:
             continue
-        assert '%d.h2h_weight'%i in keys
-        assert '%d.h2h_bias'%i in keys
-        assert '%d.i2h_weight'%i in keys
-        assert '%d.i2h_bias'%i in keys
+        assert f'{i}.h2h_weight' in keys
+        assert f'{i}.h2h_bias' in keys
+        assert f'{i}.i2h_weight' in keys
+        assert f'{i}.i2h_bias' in keys
     assert '1.base_cell.h2h_weight' in keys
     assert '1.base_cell.h2h_bias' in keys
     assert '1.base_cell.i2h_weight' in keys
diff --git a/tests/python/unittest/test_memory_opt.py b/tests/python/unittest/test_memory_opt.py
index fbc06f1ae9..e4b1be1e21 100644
--- a/tests/python/unittest/test_memory_opt.py
+++ b/tests/python/unittest/test_memory_opt.py
@@ -56,11 +56,11 @@ def test_mlp_attn():
     num_steps = 5
     in_arg_shapes = {'x': (num_steps, num_hidden,)}
     for i in range(num_steps):
-        y = mx.sym.Variable("y_t%d"%i)
-        tmp.append(mx.sym.broadcast_add(x, y, name="broadcast_add%d"%i))
+        y = mx.sym.Variable(f"y_t{i}")
+        tmp.append(mx.sym.broadcast_add(x, y, name=f"broadcast_add{i}"))
         z.append(mx.sym.Activation(tmp[-1], act_type='tanh',
-                                   name="activation%d"%i))
-        in_arg_shapes["y_t%d"%i] = (1, num_hidden,)
+                                   name=f"activation{i}"))
+        in_arg_shapes[f"y_t{i}"] = (1, num_hidden,)
     z = mx.sym.Group(z)
     exec = z._simple_bind(mx.cpu(), 'write', **in_arg_shapes)
 
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index a032ea5ce8..7e610d2071 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -389,7 +389,7 @@ def test_ndarray_saveload(save_fn):
         for x, y in zip(data, data2 if save_fn is mx.nd.save else data2.values()):
             assert np.sum(x.asnumpy() != y.asnumpy()) == 0
         # test save/load as dict
-        dmap = {'ndarray xx %s' % i : x for i, x in enumerate(data)}
+        dmap = {f'ndarray xx {i}' : x for i, x in enumerate(data)}
         if save_fn is mx.nd.save:
             save_fn(fname, dmap)
         else:
@@ -466,7 +466,7 @@ def test_buffer_load():
                 # test garbage values
                 assertRaises(mx.base.MXNetError,  mx.nd.load_frombuffer, buf_data[:-10])
             # test load_buffer as dict
-            dmap = {'ndarray xx %s' % i : x for i, x in enumerate(data)}
+            dmap = {f'ndarray xx {i}' : x for i, x in enumerate(data)}
             fname = os.path.join(tmpdir, 'dict_{0}.param'.format(repeat))
             mx.nd.save(fname, dmap)
             with open(fname, 'rb') as dfile:
@@ -664,8 +664,8 @@ def test_reduce():
             if type(ndarray_ret) is mx.ndarray.NDArray:
                 ndarray_ret = ndarray_ret.asnumpy()
             assert (ndarray_ret.shape == numpy_ret.shape) or \
-                   (ndarray_ret.shape == (1,) and numpy_ret.shape == ()), "nd:%s, numpy:%s" \
-                                                         %(ndarray_ret.shape, numpy_ret.shape)
+                   (ndarray_ret.shape == (1,) and numpy_ret.shape == ()), \
+                   f"nd:{ndarray_ret.shape}, numpy:{numpy_ret.shape}"
             if check_dtype:
                 assert ndarray_ret.dtype == numpy_ret.dtype,\
                         (ndarray_ret.dtype, numpy_ret.dtype)
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index c2c3341b6c..1fcef46660 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -8543,7 +8543,7 @@ def test_np_take():
         elif axis == 4:
             grad_in[:, :, :, :, idx] += 1.0
         else:
-            raise ValueError("axis %d is not supported..." % axis)
+            raise ValueError(f"axis {axis} is not supported...")
 
     def check_output_n_grad(data_shape, idx_shape, axis, mode):
         data_real = onp.random.normal(size=data_shape).astype('float32')
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 07cf2e8436..2fbc57064c 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -166,7 +166,7 @@ def np_softmax(x, axis=-1, temperature=1.0):
 
 def check_elementwise_sum_with_shape(shape, n):
     # forward
-    inputs = [mx.symbol.Variable('arg%d' % i) for i in range(n)]
+    inputs = [mx.symbol.Variable(f'arg{i}') for i in range(n)]
     out = mx.symbol.ElementWiseSum(*inputs, name='esum')
     arr = [mx.nd.empty(shape) for i in range(n)]
     arr_grad = [mx.nd.empty(shape) for i in range(n)]
@@ -208,7 +208,7 @@ def check_concat_with_shape(shapes, dimension, skip_second):
     for shape in shapes:
         target_dim += shape[dimension]
 
-    inputs = [mx.symbol.Variable('arg%d' % i) for i in range(n)]
+    inputs = [mx.symbol.Variable(f'arg{i}') for i in range(n)]
     out = mx.symbol.Concat(*inputs, name='conc',dim=dimension)
     arr = [mx.nd.empty(shape) for shape in shapes]
     for i in range(n):
@@ -1362,15 +1362,15 @@ def test_deconvolution_forward_with_bias(shape, num_filter, num_group, kernel, p
 
 
 def check_nearest_upsampling_with_shape(shapes, scale, root_scale):
-    arr = {'arg_%d'%i: mx.random.uniform(-10.0, 10.0, shape, ctx=mx.cpu()).copyto(default_device()) for i, shape in zip(range(len(shapes)), shapes)}
-    arr_grad = {'arg_%d'%i: mx.nd.zeros(shape) for i, shape in zip(range(len(shapes)), shapes)}
+    arr = {f'arg_{i}': mx.random.uniform(-10.0, 10.0, shape, ctx=mx.cpu()).copyto(default_device()) for i, shape in zip(range(len(shapes)), shapes)}
+    arr_grad = {f'arg_{i}': mx.nd.zeros(shape) for i, shape in zip(range(len(shapes)), shapes)}
 
-    up = mx.sym.UpSampling(*[mx.sym.Variable('arg_%d'%i) for i in range(len(shapes))], sample_type='nearest', scale=root_scale)
+    up = mx.sym.UpSampling(*[mx.sym.Variable(f'arg_{i}') for i in range(len(shapes))], sample_type='nearest', scale=root_scale)
     exe = up._bind(default_device(), args=arr, args_grad=arr_grad)
     exe.forward(is_train=True)
     exe.backward(exe.outputs)
     for k in range(len(shapes)):
-        name = 'arg_%d'%k
+        name = f'arg_{k}'
         assert_allclose(arr[name].asnumpy()*root_scale**2*scale**(2*k), arr_grad[name].asnumpy(), rtol=1e-4)
 
 
@@ -2365,21 +2365,18 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape):
     net = mx.sym.fromjson(js)
     _, output_shape, __ = net.infer_shape(data=src_shape)
     assert output_shape[0] == dst_shape, \
-        'Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s, ' \
-        'Output Shape = %s' %(str(src_shape), str(shape_args), str(reverse),
-                              str(dst_shape), str(output_shape[0]))
+        f'Src Shape = {str(src_shape)}, Shape Arguments = {str(shape_args)}, Reverse = {str(reverse)}, Dst Shape = {str(dst_shape)}, ' \
+        f'Output Shape = {str(output_shape[0])}'
     dat_npy = np.random.rand(*src_shape)
     grad_npy = np.random.rand(*dst_shape)
     exe = net._simple_bind(default_device(), data=src_shape)
     exe.arg_dict['data'][:] = dat_npy
     exe.forward(is_train=True)
     assert np.square(exe.outputs[0].asnumpy() - dat_npy.reshape(dst_shape)).mean() < 1E-7, \
-        'Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s'\
-        %(str(src_shape), str(shape_args), str(reverse), str(dst_shape))
+        f'Src Shape = {str(src_shape)}, Shape Arguments = {str(shape_args)}, Reverse = {str(reverse)}, Dst Shape = {str(dst_shape)}'
     exe.backward(out_grads=mx.nd.array(grad_npy))
     assert np.square(exe.grad_dict['data'].asnumpy() - grad_npy.reshape(src_shape)).mean() < 1E-7, \
-        'Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s'\
-        %(str(src_shape), str(shape_args), str(reverse), str(dst_shape))
+        f'Src Shape = {str(src_shape)}, Shape Arguments = {str(shape_args)}, Reverse = {str(reverse)}, Dst Shape = {str(dst_shape)}'
 
     for i in range(len(src_shape)):
         holdout_src_shape = list(src_shape)
@@ -2389,13 +2386,11 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape):
         net = mx.sym.elemwise_add(net.reshape(shape_args, reverse=reverse), mx.sym.ones(shape=dst_shape))
         input_shape, output_shape, __ = net.infer_shape(data=holdout_src_shape)
         assert output_shape[0] == dst_shape, \
-            'Holdout Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s, ' \
-            'Output Shape = %s' %(str(holdout_src_shape), str(shape_args), str(reverse),
-                                  str(dst_shape), str(output_shape[0]))
+            f'Holdout Src Shape = {str(holdout_src_shape)}, Shape Arguments = {str(shape_args)}, ' \
+            f'Reverse = {str(reverse)}, Dst Shape = {str(dst_shape)}, Output Shape = {str(output_shape[0])}'
         assert input_shape[0] == src_shape, \
-            'Holdout Src Shape = %s, Shape Arguments = %s, Reverse = %s, Dst Shape = %s, ' \
-            'Output Shape = %s' %(str(holdout_src_shape), str(shape_args), str(reverse),
-                                  str(dst_shape), str(output_shape[0]))
+            f'Holdout Src Shape = {str(holdout_src_shape)}, Shape Arguments = {str(shape_args)}, ' \
+            f'Reverse = {str(reverse)}, Dst Shape = {str(dst_shape)}, Output Shape = {str(output_shape[0])}'
 
 def test_reshape_old():
     net = mx.sym.Variable("data")
@@ -2426,8 +2421,8 @@ def test_reshape_like():
         _, output_shape, __ = net.infer_shape(lhs=lhs_shape, rhs=rhs_shape)
 
         assert output_shape[0] == dst_shape, \
-            'LHS Shape = %s, RHS Shape = %s, lhs_begin = %s, lhs_end = %s, rhs_begin= %s, rhs_end= %s'\
-            %(str(lhs_shape), str(rhs_shape), str(lbeg), str(lend), str(rbeg), str(rend))
+            f'LHS Shape = {str(lhs_shape)}, RHS Shape = {str(rhs_shape)}, lhs_begin = {str(lbeg)}, ' \
+            f'lhs_end = {str(lend)}, rhs_begin = {str(rbeg)}, rhs_end = {str(rend)}'
 
         lhs_npy = np.random.rand(*lhs_shape)
         rhs_npy = np.random.rand(*rhs_shape)
@@ -2438,12 +2433,12 @@ def test_reshape_like():
         exe.arg_dict['rhs'][:] = rhs_npy
         exe.forward(is_train=True)
         assert np.square(exe.outputs[0].asnumpy() - lhs_npy.reshape(dst_shape)).mean() < 1E-7, \
-            'LHS Shape = %s, RHS Shape = %s, lhs_begin = %s, lhs_end = %s, rhs_begin= %s, rhs_end= %s'\
-            %(str(lhs_shape), str(rhs_shape), str(lbeg), str(lend), str(rbeg), str(rend))
+            f'LHS Shape = {str(lhs_shape)}, RHS Shape = {str(rhs_shape)}, lhs_begin = {str(lbeg)}, ' \
+            f'lhs_end = {str(lend)}, rhs_begin = {str(rbeg)}, rhs_end = {str(rend)}'
         exe.backward(out_grads=mx.nd.array(grad_npy))
         assert np.square(exe.grad_dict['lhs'].asnumpy() - grad_npy.reshape(lhs_shape)).mean() < 1E-7, \
-            'LHS Shape = %s, RHS Shape = %s, lhs_begin = %s, lhs_end = %s, rhs_begin= %s, rhs_end= %s'\
-            %(str(lhs_shape), str(rhs_shape), str(lbeg), str(lend), str(rbeg), str(rend))
+            f'LHS Shape = {str(lhs_shape)}, RHS Shape = {str(rhs_shape)}, lhs_begin = {str(lbeg)}, ' \
+            f'lhs_end = {str(lend)}, rhs_begin = {str(rbeg)}, rhs_end = {str(rend)}'
     # Test new api (Using shape)
     test_cases = [
         [(30,), (15,2,4), 0, None, 0, 2, (15,2)],
@@ -3231,17 +3226,15 @@ def test_correlation():
         if arg_type1[0] != np.dtype(dtype) and arg_type1[1] != np.dtype(dtype) and out_type1[0] != np.dtype(dtype):
             msg = npt.npt.build_err_msg([a, b],
                                         err_msg="Inferred type from a is not as expected, "
-                                                "Expected :%s %s %s, Got: %s %s %s"
-                                                % (dtype, dtype, dtype, arg_type1[0], arg_type1[1], out_type1[0]),
-                                                names=['a', 'b'])
+                                                f"Expected :{dtype} {dtype} {dtype}, Got: {arg_type1[0]} {arg_type1[1]} {out_type1[0]}",
+                                        names=['a', 'b'])
             raise AssertionError(msg)
         arg_type2, out_type2, _ = corr.infer_type(b=dtype)
         if arg_type2[0] != np.dtype(dtype) and arg_type2[1] != np.dtype(dtype) and out_type2[0] != np.dtype(dtype):
             msg = npt.npt.build_err_msg([a, b],
                                         err_msg="Inferred type from b is not as expected, "
-                                                "Expected :%s %s %s, Got: %s %s %s"
-                                                % (dtype, dtype, dtype, arg_type1[0], arg_type1[1], out_type1[0]),
-                                                names=['a', 'b'])
+                                                f"Expected :{dtype} {dtype} {dtype}, Got: {arg_type1[0]} {arg_type1[1]} {out_type1[0]}",
+                                        names=['a', 'b'])
             raise AssertionError(msg)
 
     for dtype in ['float16', 'float32']:
@@ -4238,7 +4231,7 @@ def test_take(mode, out_of_range, data_ndim, idx_ndim):
         elif axis == 4:
             grad_in[:, :, :, :, idx] += 1.0
         else:
-            raise ValueError("axis %d is not supported..." % axis)
+            raise ValueError(f"axis {axis} is not supported...")
             
     for axis in range(-data_ndim, data_ndim):
             data_shape = ()
@@ -4278,7 +4271,7 @@ def test_take(mode, out_of_range, data_ndim, idx_ndim):
                     return
                 else:
                     # Did not raise exception
-                    assert False, "did not raise %s" % MXNetError.__name__
+                    assert False, f"did not raise {MXNetError.__name__}"
 
             assert_almost_equal(exe.outputs[0], np.take(data_real, idx_real, axis=axis, mode=mode))
 
@@ -6653,7 +6646,7 @@ def test_stack():
         dshape = [random.randint(1, 5) for _ in range(ndim)]
         inputs = [np.random.uniform(size=dshape) for _ in range(nin)]
         output = np.stack(inputs, axis=axis)
-        sym_ins = [mx.sym.var('x%d'%i) for i in range(nin)]
+        sym_ins = [mx.sym.var(f'x{i}') for i in range(nin)]
         out = mx.sym.stack(*sym_ins, axis=axis)
         check_symbolic_forward(out, inputs, [output])
         check_numeric_gradient(out, inputs)
@@ -7747,7 +7740,7 @@ def allclose_function(contexts):
                         a_g = a_ctx.asnumpy()
                         b_g = b_ctx.asnumpy()
 
-                    print('\n *** Violations found on %s, but not on %s side  ***' % (v_ctx, v_cmp))
+                    print(f'\n *** Violations found on {v_ctx}, but not on {v_cmp} side  ***')
                     frmt = "                 a[{0:d}]:                 b[{0:d}]:"  \
                            "          abs(a[{0:d}]-b[{0:d}]) - atol + rtol*abs(b[{0:d}]):"
 
@@ -7760,11 +7753,11 @@ def allclose_function(contexts):
                     idx_flat = np.asarray(np.where(bad_indexes.flatten() == True)).flatten()
                     for i in range(len(a_values[0])):
                         flat_idx = idx_flat[i]
-                        print('{}:  index = {}   flat_index = {}'.format('%4d'%i, idx[i], flat_idx))
+                        print(f'{i:4d}:  index = {idx[i]}   flat_index = {flat_idx}')
                         print(frmt.format(flat_idx))
                         for j in range(2):
                             diff = np.abs(a_values[j][i]-b_values[j][i]) - atol + rtol*abs(b_values[j][i])
-                            print('{}:  {}  {}              {}'.format('%6s'%v_ctx, a_values[j][i], b_values[j][i], diff))
+                            print(f'{v_ctx:6s}:  {a_values[j][i]}  {b_values[j][i]}              {diff}')
 
 
             if num_ctx == 1:
@@ -7937,7 +7930,7 @@ def test_op_roi_align():
             Input data types to compare
         '''
         assert dtype_a == dtype_b,\
-            TypeError('Unmatched data types: %s vs %s' % (dtype_a, dtype_b))
+            TypeError(f'Unmatched data types: {dtype_a} vs {dtype_b}')
 
     def bilinear_interpolate(bottom, height, width, y, x):
         if y < -1.0 or y > height or x < -1.0 or x > width:
@@ -7986,10 +7979,10 @@ def test_op_roi_align():
         PH, PW = pooled_size
         assert rois.ndim == 2,\
             ValueError(
-                'The ndim of rois should be 2 rather than %d' % rois.ndim)
+                f'The ndim of rois should be 2 rather than {rois.ndim}')
         assert rois.shape[1] == 5,\
             ValueError(
-                'The length of the axis 1 of rois should be 5 rather than %d' % rois.shape[1])
+                f'The length of the axis 1 of rois should be 5 rather than {rois.shape[1]}')
         assert_same_dtype(data.dtype, T)
         assert_same_dtype(rois.dtype, T)
 
@@ -8107,7 +8100,7 @@ def test_op_rroi_align():
             Input data types to compare
         '''
         assert dtype_a == dtype_b,\
-            TypeError('Unmatched data types: %s vs %s' % (dtype_a, dtype_b))
+            TypeError(f'Unmatched data types: {dtype_a} vs {dtype_b}')
 
     def bilinear_interpolate(bottom, height, width, y, x):
         if y < -1.0 or y > height or x < -1.0 or x > width:
@@ -8153,10 +8146,10 @@ def test_op_rroi_align():
         PH, PW = pooled_size
         assert rois.ndim == 2,\
             ValueError(
-                'The ndim of rois should be 2 rather than %d' % rois.ndim)
+                f'The ndim of rois should be 2 rather than {rois.ndim}')
         assert rois.shape[1] == 6,\
             ValueError(
-                'The length of the axis 1 of rois should be 6 rather than %d' % rois.shape[1])
+                f'The length of the axis 1 of rois should be 6 rather than {rois.shape[1]}')
         assert_same_dtype(data.dtype, T)
         assert_same_dtype(rois.dtype, T)
 
diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py
index d3d3eafad6..f33e508b8e 100644
--- a/tests/python/unittest/test_profiler.py
+++ b/tests/python/unittest/test_profiler.py
@@ -96,7 +96,7 @@ def test_profile_create_domain_dept():
 def test_profile_task():
     def makeParams():
         objects = tuple('foo' for _ in range(50))
-        template = ''.join('{%d}' % i for i in range(len(objects)))
+        template = ''.join(f'{{{i}}}' for i in range(len(objects)))
         return template, objects
 
     def get_log():
@@ -122,7 +122,7 @@ def test_profile_task():
 def test_profile_frame():
     def makeParams():
         objects = tuple('foo' for _ in range(50))
-        template = ''.join('{%d}' % i for i in range(len(objects)))
+        template = ''.join(f'{{{i}}}' for i in range(len(objects)))
         return template, objects
 
     def get_log():
@@ -149,7 +149,7 @@ def test_profile_frame():
 def test_profile_event(do_enable_profiler=True):
     def makeParams():
         objects = tuple('foo' for _ in range(50))
-        template = ''.join('{%d}' % i for i in range(len(objects)))
+        template = ''.join(f'{{{i}}}' for i in range(len(objects)))
         return template, objects
 
     def get_log():
@@ -189,7 +189,7 @@ def test_profile_tune_pause_resume():
 def test_profile_counter(do_enable_profiler=True):
     def makeParams():
         objects = tuple('foo' for _ in range(50))
-        template = ''.join('{%d}' % i for i in range(len(objects)))
+        template = ''.join(f'{{{i}}}' for i in range(len(objects)))
         return template, objects
 
     def get_log(counter):
diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py
index cb3eae3976..3f892693ff 100644
--- a/tests/python/unittest/test_random.py
+++ b/tests/python/unittest/test_random.py
@@ -245,10 +245,10 @@ def check_with_device(device, dtype):
         mx.random.seed(128)
         ret2 = ndop(*args, **params).asnumpy()
         assert same(ret1, ret2), \
-                "ndarray test: `%s` should give the same result with the same seed" % name
+                f"ndarray test: `{name}` should give the same result with the same seed"
 
         for check_name, check_func, tol in symbdic['checks']:
-            assert np.abs(check_func(ret1, params)) < tol, "ndarray test: %s check for `%s` did not pass" % (check_name, name)
+            assert np.abs(check_func(ret1, params)) < tol, f"ndarray test: {check_name} check for `{name}` did not pass"
 
         # check multi-distribution sampling
         if 'inputs' not in symbdic: continue  # randn does not support multi-distribution sampling
@@ -263,13 +263,13 @@ def check_with_device(device, dtype):
         mx.random.seed(128)
         ret2 = ndop(*args, **params).asnumpy()
         assert same(ret1, ret2), \
-                "ndarray test: `%s` should give the same result with the same seed" % name
+                f"ndarray test: `{name}` should give the same result with the same seed"
         for i in range(2):
             for j in range(2):
                 stats = {k : v[i][j] for k, v in symbdic['inputs']}
                 for check_name, check_func, tol in symbdic['checks']:
                     err = np.abs(check_func(ret2[i,j], stats))
-                    assert err < tol, "%f vs %f: symbolic test: %s check for `%s` did not pass" % (err, tol, check_name, name)
+                    assert err < tol, f"{err} vs {tol}: symbolic test: {check_name} check for `{name}` did not pass"
 
         # check symbolic
         symbol = symbdic['symbol']
@@ -291,11 +291,11 @@ def check_with_device(device, dtype):
         yexec.forward()
         un2 = (yexec.outputs[0] - x).copyto(device)
         assert same(un1.asnumpy(), un2.asnumpy()), \
-                "symbolic test: `%s` should give the same result with the same seed" % name
+                f"symbolic test: `{name}` should give the same result with the same seed"
 
         ret1 = un1.asnumpy()
         for check_name, check_func, tol in symbdic['checks']:
-            assert np.abs(check_func(ret1, params)) < tol, "symbolic test: %s check for `%s` did not pass" % (check_name, name)
+            assert np.abs(check_func(ret1, params)) < tol, f"symbolic test: {check_name} check for `{name}` did not pass"
         if name.endswith('_like'): continue
 
         # check multi-distribution sampling
@@ -324,7 +324,7 @@ def check_with_device(device, dtype):
                    params.update({ symbdic['inputs'][1][0] : symbdic['inputs'][1][1][i][j] })
                 samples = un1[i,j]
                 for check_name, check_func, tol in symbdic['checks']:
-                    assert np.abs(check_func(samples, params)) < tol, "symbolic test: %s check for `%s` did not pass" % (check_name, name)
+                    assert np.abs(check_func(samples, params)) < tol, f"symbolic test: {check_name} check for `{name}` did not pass"
 
         if 'pdfsymbol' not in symbdic: continue  # randn not tested for pdf
 
@@ -812,11 +812,11 @@ def test_with_random_seed():
 
     def check_same(x, y, name):
         assert same(x, y), \
-            "%s rng should give the same result with the same seed" % name
+            f"{name} rng should give the same result with the same seed"
 
     def check_diff(x, y, name):
         assert not same(x, y), \
-            "%s rng should give different results with different seeds" % name
+            f"{name} rng should give different results with different seeds"
 
     # generate python, numpy and mxnet datasets with the given seed
     def gen_data(seed=None):
diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py
index 6a338ec3b1..750505c1eb 100644
--- a/tests/python/unittest/test_sparse_ndarray.py
+++ b/tests/python/unittest/test_sparse_ndarray.py
@@ -566,7 +566,7 @@ def test_sparse_nd_save_load(save_fn):
     for x, y in zip(data_list1, data_list2):
         assert same(x.asnumpy(), y.asnumpy())
 
-    data_map1 = {'ndarray xx %s' % i: x for i, x in enumerate(data_list1)}
+    data_map1 = {f'ndarray xx {i}': x for i, x in enumerate(data_list1)}
     if save_fn is mx.nd.save:
         save_fn(fname, data_map1)
     else:
diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py
index d82d1925ca..72ba80164e 100644
--- a/tests/python/unittest/test_sparse_operator.py
+++ b/tests/python/unittest/test_sparse_operator.py
@@ -1700,7 +1700,7 @@ def test_sparse_storage_fallback():
 def test_sparse_elementwise_sum():
     def check_sparse_elementwise_sum_with_shape(stypes, shape, n):
         # forward
-        inputs = [mx.symbol.Variable('arg%d' % i) for i in range(n)]
+        inputs = [mx.symbol.Variable(f'arg{i}') for i in range(n)]
         out = mx.symbol.sparse.add_n(*inputs, name='esum')
         arr = []
         arr_grad = [mx.nd.empty(shape, stype=stype) for stype in stypes]
diff --git a/tools/bandwidth/measure.py b/tools/bandwidth/measure.py
index cd4f0fe843..851d32fb74 100644
--- a/tools/bandwidth/measure.py
+++ b/tools/bandwidth/measure.py
@@ -94,7 +94,7 @@ def run(network, optimizer, gpus, kv_store, image_shape, disp_batches,
     shapes = get_shapes(symbol, data_shape)
 
     size = float(sum([reduce(lambda x,y : x*y, s, 1) for s in shapes])) * 4 / 1e6
-    logging.info('num of arrays = %d, total size = %f MB' % (len(shapes), size))
+    logging.info(f'num of arrays = {len(shapes)}, total size = {size} MB')
 
     for i, s in enumerate(shapes):
         kv.init(i, mx.nd.zeros(s))
@@ -136,8 +136,7 @@ def run(network, optimizer, gpus, kv_store, image_shape, disp_batches,
                 # 0 is used for warmup, ignored
                 r = Results(iter=b, time=toc, error=err,
                             bandwidth=size*2*(len(devs)-1)/len(devs)/toc/1e3)
-                logging.info('iter %d, %f sec, %f GB/sec per gpu, error %f' % (
-                    r.iter, r.time, r.bandwidth, r.error))
+                logging.info(f'iter {r.iter}, {r.time} sec, {r.bandwidth} GB/sec per gpu, error {r.error}')
                 res.append(r)
             toc = 0
     return res
diff --git a/tools/im2rec.py b/tools/im2rec.py
index da3a1dddc8..b20b3adef1 100644
--- a/tools/im2rec.py
+++ b/tools/im2rec.py
@@ -84,10 +84,10 @@ def write_list(path_out, image_list):
     """
     with open(path_out, 'w') as fout:
         for i, item in enumerate(image_list):
-            line = '%d\t' % item[0]
+            line = f'{item[0]}\t'
             for j in item[2:]:
-                line += '%f\t' % j
-            line += '%s\n' % item[1]
+                line += f'{j}\t'
+            line += f'{item[1]}\n'
             fout.write(line)
 
 def make_list(args):
@@ -106,7 +106,7 @@ def make_list(args):
     for i in range(args.chunks):
         chunk = image_list[i * chunk_size:(i + 1) * chunk_size]
         if args.chunks > 1:
-            str_chunk = '_%d' % i
+            str_chunk = f'_{i}'
         else:
             str_chunk = ''
         sep = int(chunk_size * args.train_ratio)
@@ -138,12 +138,12 @@ def read_list(path_in):
             line_len = len(line)
             # check the data format of .lst file
             if line_len < 3:
-                print('lst should have at least has three parts, but only has %s parts for %s' % (line_len, line))
+                print(f'lst should have at least has three parts, but only has {line_len} parts for {line}')
                 continue
             try:
                 item = [int(line[0])] + [line[-1]] + [float(i) for i in line[1:-1]]
             except Exception as e:
-                print('Parsing lst met error for %s, detail: %s' % (line, e))
+                print(f'Parsing lst met error for {line}, detail: {e}')
                 continue
             yield item
 
@@ -179,11 +179,11 @@ def image_encode(args, i, item, q_out):
         img = cv2.imread(fullpath, args.color)
     except:
         traceback.print_exc()
-        print('imread error trying to load file: %s ' % fullpath)
+        print(f'imread error trying to load file: {fullpath} ')
         q_out.put((i, None, item))
         return
     if img is None:
-        print('imread read blank (None) image for file: %s' % fullpath)
+        print(f'imread read blank (None) image for file: {fullpath}')
         q_out.put((i, None, item))
         return
     if args.center_crop:
@@ -205,7 +205,7 @@ def image_encode(args, i, item, q_out):
         q_out.put((i, s, item))
     except Exception as e:
         traceback.print_exc()
-        print('pack_img error on file: %s' % fullpath, e)
+        print(f'pack_img error on file: {fullpath}', e)
         q_out.put((i, None, item))
         return
 
@@ -390,4 +390,4 @@ if __name__ == '__main__':
                             pre_time = cur_time
                         cnt += 1
         if not count:
-            print('Did not find and list file with prefix %s'%args.prefix)
+            print(f'Did not find and list file with prefix {args.prefix}')
diff --git a/tools/kill-mxnet.py b/tools/kill-mxnet.py
index 321b2b82af..e95f414da8 100644
--- a/tools/kill-mxnet.py
+++ b/tools/kill-mxnet.py
@@ -23,7 +23,7 @@ import os, sys
 import subprocess
 
 if len(sys.argv) != 4:
-  print("usage: %s <hostfile> <user> <prog>" % sys.argv[0])
+  print(f"usage: {sys.argv[0]} <hostfile> <user> <prog>")
   sys.exit(1)
 
 host_file = sys.argv[1]
@@ -45,7 +45,7 @@ with open(host_file, "r") as f:
     if ':' in host:
       host = host[:host.index(':')]
     print(host)
-    subprocess.Popen(["ssh", "-oStrictHostKeyChecking=no", "%s" % host, kill_cmd],
+    subprocess.Popen(["ssh", "-oStrictHostKeyChecking=no", f"{host}", kill_cmd],
             shell=False,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
diff --git a/tools/launch.py b/tools/launch.py
index 117dab69b1..f021eda3b0 100644
--- a/tools/launch.py
+++ b/tools/launch.py
@@ -111,7 +111,7 @@ def main():
             from dmlc_tracker import sge
             sge.submit(args)
         else:
-            raise RuntimeError('Unknown submission cluster type %s' % args.cluster)
+            raise RuntimeError(f'Unknown submission cluster type {args.cluster}')
     else:
         if args.cluster == 'ssh':
             from dmlc_tracker import ssh
@@ -120,7 +120,7 @@ def main():
             from dmlc_tracker import mpi
             mpi.submit(args)
         else:
-            raise RuntimeError('Unknown submission cluster type %s' % args.cluster)
+            raise RuntimeError(f'Unknown submission cluster type {args.cluster}')
 
 
 def signal_handler(signal, frame):
diff --git a/tools/parse_log.py b/tools/parse_log.py
index 406919a058..dcca6fdc64 100644
--- a/tools/parse_log.py
+++ b/tools/parse_log.py
@@ -66,10 +66,10 @@ if args.format == 'markdown':
     print("| epoch | " + " | ".join(['train-'+s for s in args.metric_names]) + " | " + " | ".join(['val-'+s for s in args.metric_names]) + " | time |")
     print("| --- "*(len(res)+1) + "|")
     for k, v in data.items():
-        print("| %2d | " % (k+1)\
-              + " | ".join(["%f" % (v[2*j]/v[2*j+1]) for j in range(2*len(args.metric_names))])\
-              + " | %.1f |" % (v[-2]/v[-1]))
+        print(f"| {k+1:2d} | "\
+              + " | ".join([f"{(v[2*j]/v[2*j+1])}" for j in range(2*len(args.metric_names))])\
+              + f" | {v[-2]/v[-1]:.1f} |")
 elif args.format == 'none':
     print("\t".join(['epoch'] + ['train-' + s for s in args.metric_names] + ['val-' + s for s in args.metric_names] + ['time']))
     for k, v in data.items():
-        print("\t".join(["%2d" % (k+1)] + ["%f" % (v[2*j]/v[2*j+1]) for j in range(2*len(args.metric_names))] + ["%.1f" % (v[-2]/v[-1])]))
+        print("\t".join([f"{k+1:2d}"] + [f"{v[2*j]/v[2*j+1]}" for j in range(2*len(args.metric_names))] + [f"{v[-2]/v[-1]:.1f}"]))
diff --git a/tools/rec2idx.py b/tools/rec2idx.py
index 0219d2fc2a..e36125f77b 100644
--- a/tools/rec2idx.py
+++ b/tools/rec2idx.py
@@ -84,7 +84,7 @@ class IndexCreator(mx.recordio.MXRecordIO):
             if cont is None:
                 break
             key = self.key_type(counter)
-            self.fidx.write('%s\t%d\n'%(str(key), pos))
+            self.fidx.write(f'{str(key)}\t{pos}\n')
             counter = counter + 1
 
 def parse_args():