You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/04/13 16:08:45 UTC

[GitHub] [spark] xinrong-databricks commented on a change in pull request #32139: [WIP][SPARK-35032][PYTHON] Port Koalas Index unit tests into PySpark

xinrong-databricks commented on a change in pull request #32139:
URL: https://github.com/apache/spark/pull/32139#discussion_r612586633



##########
File path: python/pyspark/pandas/tests/indexes/test_base.py
##########
@@ -0,0 +1,2308 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import inspect
+import unittest
+from distutils.version import LooseVersion
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+import pyspark
+
+import pyspark.pandas as ps
+from pyspark.pandas.exceptions import PandasNotImplementedError
+from pyspark.pandas.missing.indexes import (
+    MissingPandasLikeCategoricalIndex,
+    MissingPandasLikeDatetimeIndex,
+    MissingPandasLikeIndex,
+    MissingPandasLikeMultiIndex,
+)
+from pyspark.pandas.testing.utils import ReusedSQLTestCase, TestUtils, SPARK_CONF_ARROW_ENABLED
+
+
+class IndexesTest(ReusedSQLTestCase, TestUtils):
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+
+    @property
+    def kdf(self):
+        return ps.from_pandas(self.pdf)
+
+    def test_index_basic(self):
+        for pdf in [
+            pd.DataFrame(np.random.randn(10, 5), index=np.random.randint(100, size=10)),
+            pd.DataFrame(
+                np.random.randn(10, 5), index=np.random.randint(100, size=10).astype(np.int32)
+            ),
+            pd.DataFrame(np.random.randn(10, 5), index=np.random.randn(10)),
+            pd.DataFrame(np.random.randn(10, 5), index=np.random.randn(10).astype(np.float32)),
+            pd.DataFrame(np.random.randn(10, 5), index=list("abcdefghij")),
+            pd.DataFrame(
+                np.random.randn(10, 5), index=pd.date_range("2011-01-01", freq="D", periods=10)
+            ),
+            pd.DataFrame(np.random.randn(10, 5), index=pd.Categorical(list("abcdefghij"))),
+            pd.DataFrame(np.random.randn(10, 5), columns=list("abcde")).set_index(["a", "b"]),
+        ]:
+            kdf = ps.from_pandas(pdf)
+            self.assert_eq(kdf.index, pdf.index)
+            self.assert_eq(type(kdf.index).__name__, type(pdf.index).__name__)
+
+    def test_index_from_series(self):
+        pser = pd.Series([1, 2, 3], name="a", index=[10, 20, 30])
+        kser = ps.from_pandas(pser)
+
+        self.assert_eq(ps.Index(kser), pd.Index(pser))
+        self.assert_eq(ps.Index(kser, dtype="float"), pd.Index(pser, dtype="float"))
+        self.assert_eq(ps.Index(kser, name="x"), pd.Index(pser, name="x"))
+
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
+            self.assert_eq(ps.Int64Index(kser), pd.Int64Index(pser))
+            self.assert_eq(ps.Float64Index(kser), pd.Float64Index(pser))
+        else:
+            self.assert_eq(ps.Int64Index(kser), pd.Int64Index(pser).rename("a"))
+            self.assert_eq(ps.Float64Index(kser), pd.Float64Index(pser).rename("a"))
+
+        pser = pd.Series([datetime(2021, 3, 1), datetime(2021, 3, 2)], name="x", index=[10, 20])
+        kser = ps.from_pandas(pser)
+
+        self.assert_eq(ps.Index(kser), pd.Index(pser))
+        self.assert_eq(ps.DatetimeIndex(kser), pd.DatetimeIndex(pser))
+
+    def test_index_from_index(self):
+        pidx = pd.Index([1, 2, 3], name="a")
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(ps.Index(kidx), pd.Index(pidx))
+        self.assert_eq(ps.Index(kidx, dtype="float"), pd.Index(pidx, dtype="float"))
+        self.assert_eq(ps.Index(kidx, name="x"), pd.Index(pidx, name="x"))
+
+        self.assert_eq(ps.Int64Index(kidx), pd.Int64Index(pidx))
+        self.assert_eq(ps.Float64Index(kidx), pd.Float64Index(pidx))
+
+        pidx = pd.DatetimeIndex(["2021-03-01", "2021-03-02"])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(ps.Index(kidx), pd.Index(pidx))
+        self.assert_eq(ps.DatetimeIndex(kidx), pd.DatetimeIndex(pidx))
+
+    def test_index_getattr(self):
+        kidx = self.kdf.index
+        item = "databricks"
+
+        expected_error_message = "'.*Index' object has no attribute '{}'".format(item)
+        with self.assertRaisesRegex(AttributeError, expected_error_message):
+            kidx.__getattr__(item)
+        with self.assertRaisesRegex(AttributeError, expected_error_message):
+            ps.from_pandas(pd.date_range("2011-01-01", freq="D", periods=10)).__getattr__(item)
+
+    def test_multi_index_getattr(self):
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
+        kdf = ps.from_pandas(pdf)
+        kidx = kdf.index
+        item = "databricks"
+
+        expected_error_message = "'MultiIndex' object has no attribute '{}'".format(item)
+        with self.assertRaisesRegex(AttributeError, expected_error_message):
+            kidx.__getattr__(item)
+
+    def test_to_series(self):
+        pidx = self.pdf.index
+        kidx = self.kdf.index
+
+        self.assert_eq(kidx.to_series(), pidx.to_series())
+        self.assert_eq(kidx.to_series(name="a"), pidx.to_series(name="a"))
+
+        # With name
+        pidx.name = "Koalas"
+        kidx.name = "Koalas"
+        self.assert_eq(kidx.to_series(), pidx.to_series())
+        self.assert_eq(kidx.to_series(name=("x", "a")), pidx.to_series(name=("x", "a")))
+
+        # With tupled name
+        pidx.name = ("x", "a")
+        kidx.name = ("x", "a")
+        self.assert_eq(kidx.to_series(), pidx.to_series())
+        self.assert_eq(kidx.to_series(name="a"), pidx.to_series(name="a"))
+
+        self.assert_eq((kidx + 1).to_series(), (pidx + 1).to_series())
+
+        pidx = self.pdf.set_index("b", append=True).index
+        kidx = self.kdf.set_index("b", append=True).index
+
+        with self.sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
+            self.assert_eq(kidx.to_series(), pidx.to_series())
+            self.assert_eq(kidx.to_series(name="a"), pidx.to_series(name="a"))
+
+        expected_error_message = "Series.name must be a hashable type"
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            kidx.to_series(name=["x", "a"])
+
+    def test_to_frame(self):
+        pidx = self.pdf.index
+        kidx = self.kdf.index
+
+        self.assert_eq(kidx.to_frame(), pidx.to_frame())
+        self.assert_eq(kidx.to_frame(index=False), pidx.to_frame(index=False))
+
+        pidx.name = "a"
+        kidx.name = "a"
+
+        self.assert_eq(kidx.to_frame(), pidx.to_frame())
+        self.assert_eq(kidx.to_frame(index=False), pidx.to_frame(index=False))
+
+        if LooseVersion(pd.__version__) >= LooseVersion("0.24"):
+            # The `name` argument is added in pandas 0.24.
+            self.assert_eq(kidx.to_frame(name="x"), pidx.to_frame(name="x"))
+            self.assert_eq(
+                kidx.to_frame(index=False, name="x"), pidx.to_frame(index=False, name="x"),
+            )
+
+            self.assertRaises(TypeError, lambda: kidx.to_frame(name=["x"]))
+
+            # non-string name
+            self.assert_eq(kidx.to_frame(name=10), pidx.to_frame(name=10))
+            self.assert_eq(kidx.to_frame(name=("x", 10)), pidx.to_frame(name=("x", 10)))
+
+        pidx = self.pdf.set_index("b", append=True).index
+        kidx = self.kdf.set_index("b", append=True).index
+
+        self.assert_eq(kidx.to_frame(), pidx.to_frame())
+        self.assert_eq(kidx.to_frame(index=False), pidx.to_frame(index=False))
+
+        if LooseVersion(pd.__version__) >= LooseVersion("0.24"):
+            # The `name` argument is added in pandas 0.24.
+            self.assert_eq(kidx.to_frame(name=["x", "y"]), pidx.to_frame(name=["x", "y"]))
+            self.assert_eq(kidx.to_frame(name=("x", "y")), pidx.to_frame(name=("x", "y")))
+            self.assert_eq(
+                kidx.to_frame(index=False, name=["x", "y"]),
+                pidx.to_frame(index=False, name=["x", "y"]),
+            )
+
+            self.assertRaises(TypeError, lambda: kidx.to_frame(name="x"))
+            self.assertRaises(ValueError, lambda: kidx.to_frame(name=["x"]))
+
+            # non-string names
+            self.assert_eq(kidx.to_frame(name=[10, 20]), pidx.to_frame(name=[10, 20]))
+            self.assert_eq(kidx.to_frame(name=("x", 10)), pidx.to_frame(name=("x", 10)))
+            self.assert_eq(
+                kidx.to_frame(name=[("x", 10), ("y", 20)]),
+                pidx.to_frame(name=[("x", 10), ("y", 20)]),
+            )
+
+    def test_index_names(self):
+        kdf = self.kdf
+        self.assertIsNone(kdf.index.name)
+
+        idx = pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], name="x")
+        pdf = pd.DataFrame(np.random.randn(10, 5), index=idx, columns=list("abcde"))
+        kdf = ps.from_pandas(pdf)
+
+        pser = pdf.a
+        kser = kdf.a
+
+        self.assertEqual(kdf.index.name, pdf.index.name)
+        self.assertEqual(kdf.index.names, pdf.index.names)
+
+        pidx = pdf.index
+        kidx = kdf.index
+        pidx.name = "renamed"
+        kidx.name = "renamed"
+        self.assertEqual(kidx.name, pidx.name)
+        self.assertEqual(kidx.names, pidx.names)
+        self.assert_eq(kidx, pidx)
+        self.assertEqual(kdf.index.name, pdf.index.name)
+        self.assertEqual(kdf.index.names, pdf.index.names)
+        self.assertEqual(kser.index.names, pser.index.names)
+
+        pidx.name = None
+        kidx.name = None
+        self.assertEqual(kidx.name, pidx.name)
+        self.assertEqual(kidx.names, pidx.names)
+        self.assert_eq(kidx, pidx)
+        self.assertEqual(kdf.index.name, pdf.index.name)
+        self.assertEqual(kdf.index.names, pdf.index.names)
+        self.assertEqual(kser.index.names, pser.index.names)
+
+        with self.assertRaisesRegex(ValueError, "Names must be a list-like"):
+            kidx.names = "hi"
+
+        expected_error_message = "Length of new names must be {}, got {}".format(
+            kdf._internal.index_level, len(["0", "1"])
+        )
+        with self.assertRaisesRegex(ValueError, expected_error_message):
+            kidx.names = ["0", "1"]
+
+        expected_error_message = "Index.name must be a hashable type"
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            ps.Index([1, 2, 3], name=["0", "1"])
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            kidx.name = ["renamed"]
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            kidx.name = ["0", "1"]
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            ps.Index([(1, 2), (3, 4)], names=["a", ["b"]])
+
+    def test_multi_index_names(self):
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
+        kdf = ps.from_pandas(pdf)
+
+        self.assertEqual(kdf.index.names, pdf.index.names)
+
+        pidx = pdf.index
+        kidx = kdf.index
+        pidx.names = ["renamed_number", "renamed_color"]
+        kidx.names = ["renamed_number", "renamed_color"]
+        self.assertEqual(kidx.names, pidx.names)
+
+        pidx.names = ["renamed_number", None]
+        kidx.names = ["renamed_number", None]
+        self.assertEqual(kidx.names, pidx.names)
+        if LooseVersion(pyspark.__version__) < LooseVersion("2.4"):
+            # PySpark < 2.4 does not support struct type with arrow enabled.
+            with self.sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
+                self.assert_eq(kidx, pidx)
+        else:
+            self.assert_eq(kidx, pidx)
+
+        with self.assertRaises(PandasNotImplementedError):
+            kidx.name
+        with self.assertRaises(PandasNotImplementedError):
+            kidx.name = "renamed"
+
+    def test_index_rename(self):
+        pdf = pd.DataFrame(
+            np.random.randn(10, 5), index=pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], name="x")
+        )
+        kdf = ps.from_pandas(pdf)
+
+        pidx = pdf.index
+        kidx = kdf.index
+
+        self.assert_eq(kidx.rename("y"), pidx.rename("y"))
+        self.assert_eq(kdf.index.names, pdf.index.names)
+
+        # non-string names
+        self.assert_eq(kidx.rename(0), pidx.rename(0))
+        self.assert_eq(kidx.rename(("y", 0)), pidx.rename(("y", 0)))
+
+        kidx.rename("z", inplace=True)
+        pidx.rename("z", inplace=True)
+
+        self.assert_eq(kidx, pidx)
+        self.assert_eq(kdf.index.names, pdf.index.names)
+
+        self.assert_eq(kidx.rename(None), pidx.rename(None))
+        self.assert_eq(kdf.index.names, pdf.index.names)
+
+        self.assertRaises(TypeError, lambda: kidx.rename(["x", "y"]))
+
+    def test_multi_index_rename(self):
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
+        kdf = ps.from_pandas(pdf)
+
+        pmidx = pdf.index
+        kmidx = kdf.index
+
+        self.assert_eq(kmidx.rename(["n", "c"]), pmidx.rename(["n", "c"]))
+        self.assert_eq(kdf.index.names, pdf.index.names)
+
+        # non-string names
+        self.assert_eq(kmidx.rename([0, 1]), pmidx.rename([0, 1]))
+        self.assert_eq(
+            kmidx.rename([("x", "a"), ("y", "b")]), pmidx.rename([("x", "a"), ("y", "b")])
+        )
+
+        kmidx.rename(["num", "col"], inplace=True)
+        pmidx.rename(["num", "col"], inplace=True)
+
+        self.assert_eq(kmidx, pmidx)
+        self.assert_eq(kdf.index.names, pdf.index.names)
+
+        self.assert_eq(kmidx.rename([None, None]), pmidx.rename([None, None]))
+        self.assert_eq(kdf.index.names, pdf.index.names)
+
+        self.assertRaises(TypeError, lambda: kmidx.rename("number"))
+        self.assertRaises(TypeError, lambda: kmidx.rename(None))
+        self.assertRaises(ValueError, lambda: kmidx.rename(["number"]))
+
+    def test_multi_index_levshape(self):
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2)])
+        kidx = ps.from_pandas(pidx)
+        self.assertEqual(pidx.levshape, kidx.levshape)
+
+    def test_index_unique(self):
+        kidx = self.kdf.index
+
+        # here the output is different than pandas in terms of order
+        expected = [0, 1, 3, 5, 6, 8, 9]
+
+        self.assert_eq(expected, sorted(kidx.unique().to_pandas()))
+        self.assert_eq(expected, sorted(kidx.unique(level=0).to_pandas()))
+
+        expected = [1, 2, 4, 6, 7, 9, 10]
+        self.assert_eq(expected, sorted((kidx + 1).unique().to_pandas()))
+
+        with self.assertRaisesRegex(IndexError, "Too many levels*"):
+            kidx.unique(level=1)
+
+        with self.assertRaisesRegex(KeyError, "Requested level (hi)*"):
+            kidx.unique(level="hi")
+
+    def test_multi_index_copy(self):
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
+        kdf = ps.from_pandas(pdf)
+
+        self.assert_eq(kdf.index.copy(), pdf.index.copy())
+
+    def test_drop_duplicates(self):
+        pidx = pd.Index([4, 2, 4, 1, 4, 3])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(kidx.drop_duplicates().sort_values(), pidx.drop_duplicates().sort_values())
+        self.assert_eq(
+            (kidx + 1).drop_duplicates().sort_values(), (pidx + 1).drop_duplicates().sort_values()
+        )
+
+    def test_dropna(self):
+        pidx = pd.Index([np.nan, 2, 4, 1, np.nan, 3])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(kidx.dropna(), pidx.dropna())
+        self.assert_eq((kidx + 1).dropna(), (pidx + 1).dropna())
+
+    def test_index_symmetric_difference(self):
+        pidx1 = pd.Index([1, 2, 3, 4])
+        pidx2 = pd.Index([2, 3, 4, 5])
+        kidx1 = ps.from_pandas(pidx1)
+        kidx2 = ps.from_pandas(pidx2)
+
+        self.assert_eq(
+            kidx1.symmetric_difference(kidx2).sort_values(),
+            pidx1.symmetric_difference(pidx2).sort_values(),
+        )
+        self.assert_eq(
+            (kidx1 + 1).symmetric_difference(kidx2).sort_values(),
+            (pidx1 + 1).symmetric_difference(pidx2).sort_values(),
+        )
+
+        pmidx1 = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 0, 1, 2]],
+        )
+        pmidx2 = pd.MultiIndex(
+            [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 0, 1, 2]],
+        )
+        kmidx1 = ps.from_pandas(pmidx1)
+        kmidx2 = ps.from_pandas(pmidx2)
+
+        self.assert_eq(
+            kmidx1.symmetric_difference(kmidx2).sort_values(),
+            pmidx1.symmetric_difference(pmidx2).sort_values(),
+        )
+
+        idx = ps.Index(["a", "b", "c"])
+        midx = ps.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+
+        with self.assertRaisesRegex(NotImplementedError, "Doesn't support*"):
+            idx.symmetric_difference(midx)
+
+    def test_multi_index_symmetric_difference(self):
+        idx = ps.Index(["a", "b", "c"])
+        midx = ps.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        midx_ = ps.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+
+        self.assert_eq(
+            midx.symmetric_difference(midx_),
+            midx.to_pandas().symmetric_difference(midx_.to_pandas()),
+        )
+
+        with self.assertRaisesRegex(NotImplementedError, "Doesn't support*"):
+            midx.symmetric_difference(idx)
+
+    def test_missing(self):
+        kdf = ps.DataFrame(
+            {
+                "a": [1, 2, 3],
+                "b": [4, 5, 6],
+                "c": pd.date_range("2011-01-01", freq="D", periods=3),
+                "d": pd.Categorical(["a", "b", "c"]),
+            }
+        )
+
+        # Index functions
+        missing_functions = inspect.getmembers(MissingPandasLikeIndex, inspect.isfunction)
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index("a").index, name)()
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "method.*Index.*{}.*is deprecated".format(name)
+            ):
+                getattr(kdf.set_index("a").index, name)()
+
+        # MultiIndex functions
+        missing_functions = inspect.getmembers(MissingPandasLikeMultiIndex, inspect.isfunction)
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index(["a", "b"]).index, name)()
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "method.*Index.*{}.*is deprecated".format(name)
+            ):
+                getattr(kdf.set_index(["a", "b"]).index, name)()
+
+        # DatetimeIndex functions
+        missing_functions = inspect.getmembers(MissingPandasLikeDatetimeIndex, inspect.isfunction)
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index("c").index, name)()
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "method.*Index.*{}.*is deprecated".format(name)
+            ):
+                getattr(kdf.set_index("c").index, name)()
+
+        # CategoricalIndex functions
+        missing_functions = inspect.getmembers(
+            MissingPandasLikeCategoricalIndex, inspect.isfunction
+        )
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index("d").index, name)()
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "method.*Index.*{}.*is deprecated".format(name)
+            ):
+                getattr(kdf.set_index("d").index, name)()
+
+        # Index properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeIndex, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index("a").index, name)
+
+        deprecated_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "deprecated_property"
+        ]
+        for name in deprecated_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "property.*Index.*{}.*is deprecated".format(name)
+            ):
+                getattr(kdf.set_index("a").index, name)
+
+        # MultiIndex properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeMultiIndex, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index(["a", "b"]).index, name)
+
+        deprecated_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "deprecated_property"
+        ]
+        for name in deprecated_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "property.*Index.*{}.*is deprecated".format(name)
+            ):
+                getattr(kdf.set_index(["a", "b"]).index, name)
+
+        # DatetimeIndex properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeDatetimeIndex, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index("c").index, name)
+
+        # CategoricalIndex properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeCategoricalIndex, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*Index.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(kdf.set_index("d").index, name)
+
+    def test_index_has_duplicates(self):
+        indexes = [("a", "b", "c"), ("a", "a", "c"), (1, 3, 3), (1, 2, 3)]
+        names = [None, "ks", "ks", None]
+        has_dup = [False, True, True, False]
+
+        for idx, name, expected in zip(indexes, names, has_dup):
+            pdf = pd.DataFrame({"a": [1, 2, 3]}, index=pd.Index(idx, name=name))
+            kdf = ps.from_pandas(pdf)
+
+            self.assertEqual(kdf.index.has_duplicates, expected)
+
+    def test_multiindex_has_duplicates(self):
+        indexes = [
+            [list("abc"), list("edf")],
+            [list("aac"), list("edf")],
+            [list("aac"), list("eef")],
+            [[1, 4, 4], [4, 6, 6]],
+        ]
+        has_dup = [False, False, True, True]
+
+        for idx, expected in zip(indexes, has_dup):
+            pdf = pd.DataFrame({"a": [1, 2, 3]}, index=idx)
+            kdf = ps.from_pandas(pdf)
+
+            self.assertEqual(kdf.index.has_duplicates, expected)
+
+    def test_multi_index_not_supported(self):
+        kdf = ps.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+
+        with self.assertRaisesRegex(TypeError, "cannot perform any with this index type"):
+            kdf.set_index(["a", "b"]).index.any()
+
+        with self.assertRaisesRegex(TypeError, "cannot perform all with this index type"):
+            kdf.set_index(["a", "b"]).index.all()
+
+    def test_index_nlevels(self):
+        pdf = pd.DataFrame({"a": [1, 2, 3]}, index=pd.Index(["a", "b", "c"]))
+        kdf = ps.from_pandas(pdf)
+
+        self.assertEqual(kdf.index.nlevels, 1)
+
+    def test_multiindex_nlevel(self):
+        pdf = pd.DataFrame({"a": [1, 2, 3]}, index=[list("abc"), list("def")])
+        kdf = ps.from_pandas(pdf)
+
+        self.assertEqual(kdf.index.nlevels, 2)
+
+    def test_multiindex_from_arrays(self):
+        arrays = [["a", "a", "b", "b"], ["red", "blue", "red", "blue"]]
+        pidx = pd.MultiIndex.from_arrays(arrays)
+        kidx = ps.MultiIndex.from_arrays(arrays)
+
+        self.assert_eq(pidx, kidx)
+
+    def test_multiindex_swaplevel(self):
+        pidx = pd.MultiIndex.from_arrays([["a", "b"], [1, 2]])
+        kidx = ps.from_pandas(pidx)
+        self.assert_eq(pidx.swaplevel(0, 1), kidx.swaplevel(0, 1))
+
+        pidx = pd.MultiIndex.from_arrays([["a", "b"], [1, 2]], names=["word", "number"])
+        kidx = ps.from_pandas(pidx)
+        self.assert_eq(pidx.swaplevel(0, 1), kidx.swaplevel(0, 1))
+
+        pidx = pd.MultiIndex.from_arrays([["a", "b"], [1, 2]], names=["word", None])
+        kidx = ps.from_pandas(pidx)
+        self.assert_eq(pidx.swaplevel(-2, -1), kidx.swaplevel(-2, -1))
+        self.assert_eq(pidx.swaplevel(0, 1), kidx.swaplevel(0, 1))
+        self.assert_eq(pidx.swaplevel("word", 1), kidx.swaplevel("word", 1))
+
+        with self.assertRaisesRegex(IndexError, "Too many levels: Index"):
+            kidx.swaplevel(-3, "word")
+        with self.assertRaisesRegex(IndexError, "Too many levels: Index"):
+            kidx.swaplevel(0, 2)
+        with self.assertRaisesRegex(IndexError, "Too many levels: Index"):
+            kidx.swaplevel(0, -3)
+        with self.assertRaisesRegex(KeyError, "Level work not found"):
+            kidx.swaplevel(0, "work")
+
+    def test_multiindex_droplevel(self):
+        pidx = pd.MultiIndex.from_tuples(
+            [("a", "x", 1), ("b", "y", 2)], names=["level1", "level2", "level3"]
+        )
+        kidx = ps.from_pandas(pidx)
+        with self.assertRaisesRegex(IndexError, "Too many levels: Index has only 3 levels, not 5"):
+            kidx.droplevel(4)
+
+        with self.assertRaisesRegex(KeyError, "Level level4 not found"):
+            kidx.droplevel("level4")
+
+        with self.assertRaisesRegex(KeyError, "Level.*level3.*level4.*not found"):
+            kidx.droplevel([("level3", "level4")])
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Cannot remove 4 levels from an index with 3 levels: at least one "
+            "level must be left.",
+        ):
+            kidx.droplevel([0, 0, 1, 2])
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Cannot remove 3 levels from an index with 3 levels: at least one "
+            "level must be left.",
+        ):
+            kidx.droplevel([0, 1, 2])
+
+        self.assert_eq(pidx.droplevel(0), kidx.droplevel(0))
+        self.assert_eq(pidx.droplevel([0, 1]), kidx.droplevel([0, 1]))
+        self.assert_eq(pidx.droplevel((0, 1)), kidx.droplevel((0, 1)))
+        self.assert_eq(pidx.droplevel([0, "level2"]), kidx.droplevel([0, "level2"]))
+        self.assert_eq(pidx.droplevel((0, "level2")), kidx.droplevel((0, "level2")))
+
+        # non-string names
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2)], names=[1.0, 2.0, 3.0])
+        kidx = ps.from_pandas(pidx)
+        self.assert_eq(pidx.droplevel(1.0), kidx.droplevel(1.0))
+        self.assert_eq(pidx.droplevel([0, 2.0]), kidx.droplevel([0, 2.0]))
+
+    def test_index_fillna(self):
+        pidx = pd.Index([1, 2, None])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.fillna(0), kidx.fillna(0), almost=True)
+        self.assert_eq(pidx.rename("name").fillna(0), kidx.rename("name").fillna(0), almost=True)
+
+        with self.assertRaisesRegex(TypeError, "Unsupported type list"):
+            kidx.fillna([1, 2])
+
+    def test_index_drop(self):
+        pidx = pd.Index([1, 2, 3])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.drop(1), kidx.drop(1))
+        self.assert_eq(pidx.drop([1, 2]), kidx.drop([1, 2]))
+        self.assert_eq((pidx + 1).drop([2, 3]), (kidx + 1).drop([2, 3]))
+
+    def test_multiindex_drop(self):
+        pidx = pd.MultiIndex.from_tuples(
+            [("a", "x"), ("b", "y"), ("c", "z")], names=["level1", "level2"]
+        )
+        kidx = ps.from_pandas(pidx)
+        self.assert_eq(pidx.drop("a"), kidx.drop("a"))
+        self.assert_eq(pidx.drop(["a", "b"]), kidx.drop(["a", "b"]))
+        self.assert_eq(pidx.drop(["x", "y"], level=1), kidx.drop(["x", "y"], level=1))
+        self.assert_eq(pidx.drop(["x", "y"], level="level2"), kidx.drop(["x", "y"], level="level2"))
+
+        pidx.names = ["lv1", "lv2"]
+        kidx.names = ["lv1", "lv2"]
+        self.assert_eq(pidx.drop(["x", "y"], level="lv2"), kidx.drop(["x", "y"], level="lv2"))
+
+        self.assertRaises(IndexError, lambda: kidx.drop(["a", "b"], level=2))
+        self.assertRaises(KeyError, lambda: kidx.drop(["a", "b"], level="level"))
+
+        kidx.names = ["lv", "lv"]
+        self.assertRaises(ValueError, lambda: kidx.drop(["x", "y"], level="lv"))
+
+    def test_sort_values(self):
+        pidx = pd.Index([-10, -100, 200, 100])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.sort_values(), kidx.sort_values())
+        self.assert_eq(pidx.sort_values(ascending=False), kidx.sort_values(ascending=False))
+
+        pidx.name = "koalas"
+        kidx.name = "koalas"
+
+        self.assert_eq(pidx.sort_values(), kidx.sort_values())
+        self.assert_eq(pidx.sort_values(ascending=False), kidx.sort_values(ascending=False))
+
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        kidx = ps.from_pandas(pidx)
+
+        pidx.names = ["hello", "koalas", "goodbye"]
+        kidx.names = ["hello", "koalas", "goodbye"]
+
+        self.assert_eq(pidx.sort_values(), kidx.sort_values())
+        self.assert_eq(pidx.sort_values(ascending=False), kidx.sort_values(ascending=False))
+
+    def test_index_drop_duplicates(self):
+        pidx = pd.Index([1, 1, 2])
+        kidx = ps.from_pandas(pidx)
+        self.assert_eq(pidx.drop_duplicates().sort_values(), kidx.drop_duplicates().sort_values())
+
+        pidx = pd.MultiIndex.from_tuples([(1, 1), (1, 1), (2, 2)], names=["level1", "level2"])
+        kidx = ps.from_pandas(pidx)
+        self.assert_eq(pidx.drop_duplicates().sort_values(), kidx.drop_duplicates().sort_values())
+
+    def test_index_sort(self):
+        idx = ps.Index([1, 2, 3, 4, 5])
+        midx = ps.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2)])
+
+        with self.assertRaisesRegex(
+            TypeError, "cannot sort an Index object in-place, use sort_values instead"
+        ):
+            idx.sort()
+        with self.assertRaisesRegex(
+            TypeError, "cannot sort an Index object in-place, use sort_values instead"
+        ):
+            midx.sort()
+
+    def test_multiindex_isna(self):
+        kidx = ps.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+
+        with self.assertRaisesRegex(NotImplementedError, "isna is not defined for MultiIndex"):
+            kidx.isna()
+
+        with self.assertRaisesRegex(NotImplementedError, "isna is not defined for MultiIndex"):
+            kidx.isnull()
+
+        with self.assertRaisesRegex(NotImplementedError, "notna is not defined for MultiIndex"):
+            kidx.notna()
+
+        with self.assertRaisesRegex(NotImplementedError, "notna is not defined for MultiIndex"):
+            kidx.notnull()
+
+    def test_index_nunique(self):
+        pidx = pd.Index([1, 1, 2, None])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.nunique(), kidx.nunique())
+        self.assert_eq(pidx.nunique(dropna=True), kidx.nunique(dropna=True))
+
+    def test_multiindex_nunique(self):
+        kidx = ps.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        with self.assertRaisesRegex(NotImplementedError, "notna is not defined for MultiIndex"):
+            kidx.notnull()
+
+    def test_multiindex_rename(self):
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        kidx = ps.from_pandas(pidx)
+
+        pidx = pidx.rename(list("ABC"))
+        kidx = kidx.rename(list("ABC"))
+        self.assert_eq(pidx, kidx)
+
+        pidx = pidx.rename(["my", "name", "is"])
+        kidx = kidx.rename(["my", "name", "is"])
+        self.assert_eq(pidx, kidx)
+
+    def test_multiindex_set_names(self):
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        kidx = ps.from_pandas(pidx)
+
+        pidx = pidx.set_names(["set", "new", "names"])
+        kidx = kidx.set_names(["set", "new", "names"])
+        self.assert_eq(pidx, kidx)
+
+        pidx.set_names(["set", "new", "names"], inplace=True)
+        kidx.set_names(["set", "new", "names"], inplace=True)
+        self.assert_eq(pidx, kidx)
+
+        pidx = pidx.set_names("first", level=0)
+        kidx = kidx.set_names("first", level=0)
+        self.assert_eq(pidx, kidx)
+
+        pidx = pidx.set_names("second", level=1)
+        kidx = kidx.set_names("second", level=1)
+        self.assert_eq(pidx, kidx)
+
+        pidx = pidx.set_names("third", level=2)
+        kidx = kidx.set_names("third", level=2)
+        self.assert_eq(pidx, kidx)
+
+        pidx.set_names("first", level=0, inplace=True)
+        kidx.set_names("first", level=0, inplace=True)
+        self.assert_eq(pidx, kidx)
+
+        pidx.set_names("second", level=1, inplace=True)
+        kidx.set_names("second", level=1, inplace=True)
+        self.assert_eq(pidx, kidx)
+
+        pidx.set_names("third", level=2, inplace=True)
+        kidx.set_names("third", level=2, inplace=True)
+        self.assert_eq(pidx, kidx)
+
+    def test_multiindex_from_tuples(self):
+        tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")]
+        pidx = pd.MultiIndex.from_tuples(tuples)
+        kidx = ps.MultiIndex.from_tuples(tuples)
+
+        self.assert_eq(pidx, kidx)
+
+    def test_multiindex_from_product(self):
+        iterables = [[0, 1, 2], ["green", "purple"]]
+        pidx = pd.MultiIndex.from_product(iterables)
+        kidx = ps.MultiIndex.from_product(iterables)
+
+        self.assert_eq(pidx, kidx)
+
+    def test_multiindex_tuple_column_name(self):
+        column_labels = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")])
+        pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=column_labels)
+        pdf.set_index(("a", "x"), append=True, inplace=True)
+        kdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf, kdf)
+
+    def test_len(self):
+        pidx = pd.Index(range(10000))
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(len(pidx), len(kidx))
+
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        kidx = ps.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+
+        self.assert_eq(len(pidx), len(kidx))
+
+    def test_delete(self):
+        pidx = pd.Index([10, 9, 8, 7, 6, 7, 8, 9, 10])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.delete(8).sort_values(), kidx.delete(8).sort_values())
+        self.assert_eq(pidx.delete(-9).sort_values(), kidx.delete(-9).sort_values())
+        self.assert_eq(pidx.delete([-9, 0, 8]).sort_values(), kidx.delete([-9, 0, 8]).sort_values())
+
+        with self.assertRaisesRegex(IndexError, "index 9 is out of bounds for axis 0 with size 9"):
+            kidx.delete([0, 9])
+        with self.assertRaisesRegex(
+            IndexError, "index -10 is out of bounds for axis 0 with size 9"
+        ):
+            kidx.delete([-10, 0])
+        with self.assertRaisesRegex(IndexError, "index 9 is out of bounds for axis 0 with size 9"):
+            kidx.delete(9)
+        with self.assertRaisesRegex(
+            IndexError, "index -10 is out of bounds for axis 0 with size 9"
+        ):
+            kidx.delete(-10)
+
+        # MultiIndex
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        kidx = ps.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+
+        self.assert_eq(pidx.delete(2).sort_values(), kidx.delete(2).sort_values())
+        self.assert_eq(pidx.delete(-3).sort_values(), kidx.delete(-3).sort_values())
+        self.assert_eq(pidx.delete([-3, 0, 2]).sort_values(), kidx.delete([-3, 0, 2]).sort_values())
+
+        with self.assertRaisesRegex(IndexError, "index 3 is out of bounds for axis 0 with size 3"):
+            kidx.delete([0, 3])
+        with self.assertRaisesRegex(IndexError, "index -4 is out of bounds for axis 0 with size 3"):
+            kidx.delete([-4, 0])
+        with self.assertRaisesRegex(IndexError, "index 3 is out of bounds for axis 0 with size 3"):
+            kidx.delete(3)
+        with self.assertRaisesRegex(IndexError, "index -4 is out of bounds for axis 0 with size 3"):
+            kidx.delete(-4)
+
+    def test_append(self):
+        # Index
+        pidx = pd.Index(range(10000))
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.append(pidx), kidx.append(kidx))
+
+        # Index with name
+        pidx1 = pd.Index(range(10000), name="a")
+        pidx2 = pd.Index(range(10000), name="b")
+        kidx1 = ps.from_pandas(pidx1)
+        kidx2 = ps.from_pandas(pidx2)
+
+        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))
+
+        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))
+
+        # Index from DataFrame
+        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"])
+        pdf2 = pd.DataFrame({"a": [7, 8, 9], "d": [10, 11, 12]}, index=["x", "y", "z"])
+        kdf1 = ps.from_pandas(pdf1)
+        kdf2 = ps.from_pandas(pdf2)
+
+        pidx1 = pdf1.set_index("a").index
+        pidx2 = pdf2.set_index("d").index
+        kidx1 = kdf1.set_index("a").index
+        kidx2 = kdf2.set_index("d").index
+
+        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))
+
+        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))
+
+        # Index from DataFrame with MultiIndex columns
+        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        pdf2 = pd.DataFrame({"a": [7, 8, 9], "d": [10, 11, 12]})
+        pdf1.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")])
+        pdf2.columns = pd.MultiIndex.from_tuples([("a", "x"), ("d", "y")])
+        kdf1 = ps.from_pandas(pdf1)
+        kdf2 = ps.from_pandas(pdf2)
+
+        pidx1 = pdf1.set_index(("a", "x")).index
+        pidx2 = pdf2.set_index(("d", "y")).index
+        kidx1 = kdf1.set_index(("a", "x")).index
+        kidx2 = kdf2.set_index(("d", "y")).index
+
+        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))
+
+        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))
+
+        # MultiIndex
+        pmidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        kmidx = ps.from_pandas(pmidx)
+
+        self.assert_eq(pmidx.append(pmidx), kmidx.append(kmidx))
+
+        # MultiIndex with names
+        pmidx1 = pd.MultiIndex.from_tuples(
+            [("a", "x", 1), ("b", "y", 2), ("c", "z", 3)], names=["x", "y", "z"]
+        )
+        pmidx2 = pd.MultiIndex.from_tuples(
+            [("a", "x", 1), ("b", "y", 2), ("c", "z", 3)], names=["p", "q", "r"]
+        )
+        kmidx1 = ps.from_pandas(pmidx1)
+        kmidx2 = ps.from_pandas(pmidx2)
+
+        self.assert_eq(pmidx1.append(pmidx2), kmidx1.append(kmidx2))
+
+        self.assert_eq(pmidx2.append(pmidx1), kmidx2.append(kmidx1))
+
+        self.assert_eq(pmidx1.append(pmidx2).names, kmidx1.append(kmidx2).names)
+
+        self.assert_eq(pmidx1.append(pmidx2).names, kmidx1.append(kmidx2).names)
+
+        # Index & MultiIndex currently is not supported
+        expected_error_message = r"append\(\) between Index & MultiIndex currently is not supported"
+        with self.assertRaisesRegex(NotImplementedError, expected_error_message):
+            kidx.append(kmidx)
+        with self.assertRaisesRegex(NotImplementedError, expected_error_message):
+            kmidx.append(kidx)
+
+    def test_argmin(self):
+        pidx = pd.Index([100, 50, 10, 20, 30, 60, 0, 50, 0, 100, 100, 100, 20, 0, 0])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.argmin(), kidx.argmin())
+
+        # MultiIndex
+        kidx = ps.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        with self.assertRaisesRegex(
+            TypeError, "reduction operation 'argmin' not allowed for this dtype"
+        ):
+            kidx.argmin()
+
+    def test_argmax(self):
+        pidx = pd.Index([100, 50, 10, 20, 30, 60, 0, 50, 0, 100, 100, 100, 20, 0, 0])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.argmax(), kidx.argmax())
+
+        # MultiIndex
+        kidx = ps.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
+        with self.assertRaisesRegex(
+            TypeError, "reduction operation 'argmax' not allowed for this dtype"
+        ):
+            kidx.argmax()
+
+    def test_min(self):
+        pidx = pd.Index([3, 2, 1])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.min(), kidx.min())
+
+        # MultiIndex
+        pmidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2)])
+        kmidx = ps.from_pandas(pmidx)
+
+        self.assert_eq(pmidx.min(), kmidx.min())
+
+        pidx = pd.DatetimeIndex(["2021-02-01", "2021-01-01", "2021-04-01", "2021-03-01"])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.min(), kidx.min())
+
+    def test_max(self):
+        pidx = pd.Index([3, 2, 1])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.max(), kidx.max())
+
+        # MultiIndex
+        pmidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2)])
+        kmidx = ps.from_pandas(pmidx)
+
+        self.assert_eq(pmidx.max(), kmidx.max())
+
+        pidx = pd.DatetimeIndex(["2021-02-01", "2021-01-01", "2021-04-01", "2021-03-01"])
+        kidx = ps.from_pandas(pidx)
+
+        self.assert_eq(pidx.max(), kidx.max())
+
+    def test_monotonic(self):
+        # test monotonic_increasing & monotonic_decreasing for MultiIndex.
+        # Since the Behavior for null value was changed in pandas >= 1.0.0,
+        # several cases are tested differently.
+        datas = []
+
+        # increasing / decreasing ordered each index level with string
+        datas.append([("w", "a"), ("x", "b"), ("y", "c"), ("z", "d")])
+        datas.append([("w", "d"), ("x", "c"), ("y", "b"), ("z", "a")])
+        datas.append([("z", "a"), ("y", "b"), ("x", "c"), ("w", "d")])
+        datas.append([("z", "d"), ("y", "c"), ("x", "b"), ("w", "a")])
+        # mixed order each index level with string
+        datas.append([("z", "a"), ("x", "b"), ("y", "c"), ("w", "d")])
+        datas.append([("z", "a"), ("y", "c"), ("x", "b"), ("w", "d")])
+
+        # increasing / decreasing ordered each index level with integer
+        datas.append([(1, 100), (2, 200), (3, 300), (4, 400), (5, 500)])
+        datas.append([(1, 500), (2, 400), (3, 300), (4, 200), (5, 100)])
+        datas.append([(5, 100), (4, 200), (3, 300), (2, 400), (1, 500)])
+        datas.append([(5, 500), (4, 400), (3, 300), (2, 200), (1, 100)])
+        # mixed order each index level with integer
+        datas.append([(1, 500), (3, 400), (2, 300), (4, 200), (5, 100)])
+        datas.append([(1, 100), (2, 300), (3, 200), (4, 400), (5, 500)])
+
+        # integer / negative mixed tests
+        datas.append([("a", -500), ("b", -400), ("c", -300), ("d", -200), ("e", -100)])
+        datas.append([("e", -500), ("d", -400), ("c", -300), ("b", -200), ("a", -100)])
+        datas.append([(-5, "a"), (-4, "b"), (-3, "c"), (-2, "d"), (-1, "e")])
+        datas.append([(-5, "e"), (-4, "d"), (-3, "c"), (-2, "b"), (-1, "a")])
+        datas.append([(-5, "e"), (-3, "d"), (-2, "c"), (-4, "b"), (-1, "a")])
+        datas.append([(-5, "e"), (-4, "c"), (-3, "b"), (-2, "d"), (-1, "a")])
+
+        # boolean type tests
+        datas.append([(True, True), (True, True)])
+        datas.append([(True, True), (True, False)])
+        datas.append([(True, False), (True, True)])
+        datas.append([(False, True), (False, True)])
+        datas.append([(False, True), (False, False)])
+        datas.append([(False, False), (False, True)])
+        datas.append([(True, True), (False, True)])
+        datas.append([(True, True), (False, False)])
+        datas.append([(True, False), (False, True)])
+        datas.append([(False, True), (True, True)])
+        datas.append([(False, True), (True, False)])
+        datas.append([(False, False), (True, True)])
+
+        # duplicated index value tests
+        datas.append([("x", "d"), ("y", "c"), ("y", "b"), ("z", "a")])
+        datas.append([("x", "d"), ("y", "b"), ("y", "c"), ("z", "a")])
+
+        # more depth tests
+        datas.append([("x", "d", "o"), ("y", "c", "p"), ("y", "c", "q"), ("z", "a", "r")])
+        datas.append([("x", "d", "o"), ("y", "c", "q"), ("y", "c", "p"), ("z", "a", "r")])
+
+        # None type tests (None type is treated as False from pandas >= 1.1.4)
+        # Refer https://github.com/pandas-dev/pandas/issues/37220
+        datas.append([(1, 100), (2, 200), (None, 300), (4, 400), (5, 500)])
+        datas.append([(1, 100), (2, 200), (None, None), (4, 400), (5, 500)])
+        datas.append([("x", "d"), ("y", "c"), ("y", None), ("z", "a")])
+        datas.append([("x", "d"), ("y", "c"), ("y", "b"), (None, "a")])
+        datas.append([("x", "d"), ("y", "b"), ("y", "c"), (None, "a")])
+        datas.append([("x", "d", "o"), ("y", "c", "p"), ("y", "c", None), ("z", "a", "r")])
+
+        for data in datas:
+            with self.subTest(data=data):
+                pmidx = pd.MultiIndex.from_tuples(data)
+                kmidx = ps.from_pandas(pmidx)
+                self.assert_eq(kmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
+                self.assert_eq(kmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
+
+        # datas below return different result depends on pandas version.
+        # Because the behavior of handling null values is changed in pandas >= 1.1.4.
+        # Since Koalas follows latest pandas, all of them should return `False`.
+        datas = []
+        datas.append([(1, 100), (2, 200), (3, None), (4, 400), (5, 500)])
+        datas.append([(1, None), (2, 200), (3, 300), (4, 400), (5, 500)])
+        datas.append([(1, 100), (2, 200), (3, 300), (4, 400), (5, None)])
+        datas.append([(False, None), (True, True)])
+        datas.append([(None, False), (True, True)])
+        datas.append([(False, False), (True, None)])
+        datas.append([(False, False), (None, True)])
+        datas.append([("x", "d"), ("y", None), ("y", None), ("z", "a")])
+        datas.append([("x", "d", "o"), ("y", "c", None), ("y", "c", None), ("z", "a", "r")])
+        datas.append([(1, 100), (2, 200), (3, 300), (4, 400), (None, 500)])
+        datas.append([(1, 100), (2, 200), (3, 300), (4, 400), (None, None)])
+        datas.append([(5, 100), (4, 200), (3, None), (2, 400), (1, 500)])
+        datas.append([(5, None), (4, 200), (3, 300), (2, 400), (1, 500)])
+        datas.append([(5, 100), (4, 200), (3, None), (2, 400), (1, 500)])
+        datas.append([(5, 100), (4, 200), (3, 300), (2, 400), (1, None)])
+        datas.append([(True, None), (True, True)])
+        datas.append([(None, True), (True, True)])
+        datas.append([(True, True), (None, True)])
+        datas.append([(True, True), (True, None)])
+        datas.append([(None, 100), (2, 200), (3, 300), (4, 400), (5, 500)])
+        datas.append([(None, None), (2, 200), (3, 300), (4, 400), (5, 500)])
+        datas.append([("x", "d"), ("y", None), ("y", "c"), ("z", "a")])
+        datas.append([("x", "d", "o"), ("y", "c", None), ("y", "c", "q"), ("z", "a", "r")])
+
+        for data in datas:
+            with self.subTest(data=data):
+                pmidx = pd.MultiIndex.from_tuples(data)
+                kmidx = ps.from_pandas(pmidx)
+                if LooseVersion(pd.__version__) < LooseVersion("1.1.4"):
+                    self.assert_eq(kmidx.is_monotonic_increasing, False)
+                    self.assert_eq(kmidx.is_monotonic_decreasing, False)
+                else:
+                    self.assert_eq(kmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
+                    self.assert_eq(kmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
+
+        # The datas below are tested another way since they cannot be an arguments for
+        # `MultiIndex.from_tuples` in pandas >= 1.1.0.
+        # Refer https://github.com/databricks/koalas/pull/1688#issuecomment-667156560 for detail.
+        if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
+            pmidx = pd.MultiIndex.from_tuples(
+                [(-5, None), (-4, None), (-3, None), (-2, None), (-1, None)]
+            )
+            kmidx = ps.from_pandas(pmidx)
+            self.assert_eq(kmidx.is_monotonic_increasing, False)
+            self.assert_eq(kmidx.is_monotonic_decreasing, False)
+
+            pmidx = pd.MultiIndex.from_tuples(
+                [(None, "e"), (None, "c"), (None, "b"), (None, "d"), (None, "a")]
+            )
+            kmidx = ps.from_pandas(pmidx)
+            self.assert_eq(kmidx.is_monotonic_increasing, False)
+            self.assert_eq(kmidx.is_monotonic_decreasing, False)
+
+            pmidx = pd.MultiIndex.from_tuples(
+                [(None, None), (None, None), (None, None), (None, None), (None, None)]
+            )
+            kmidx = ps.from_pandas(pmidx)
+            self.assert_eq(kmidx.is_monotonic_increasing, False)
+            self.assert_eq(kmidx.is_monotonic_decreasing, False)
+
+            pmidx = pd.MultiIndex.from_tuples([(None, None)])
+            kmidx = ps.from_pandas(pmidx)
+            self.assert_eq(kmidx.is_monotonic_increasing, False)
+            self.assert_eq(kmidx.is_monotonic_decreasing, False)
+        else:
+            # Disable the test cases below because pandas returns `True` or `False` randomly.

Review comment:
       Please refer to https://github.com/apache/spark/runs/2329674423 for the test failure.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org