You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/05/12 04:25:43 UTC

[GitHub] [spark] Yikun commented on a diff in pull request #36509: [SPARK-38961][PYTHON][DOCS] Enhance to automatically generate the the pandas API support list

Yikun commented on code in PR #36509:
URL: https://github.com/apache/spark/pull/36509#discussion_r870928853


##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parmeters
+    ---------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    Implemented.PARTIALLY_IMPLEMENTED.value,
+                    transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(Implemented.IMPLEMENTED.value)
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(Implemented.NOT_IMPLEMENTED.value)
+    return pd_dict
+
+
+def transform_missing(
+    module_name: str,
+    pd_func_name: str,
+    missing_set: Set[str],
+    pd_module_path: str,
+    ps_module_path: str,
+) -> str:
+    """
+    Transform missing parameters into table information string.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_func_name : str
+        Name of pandas API.
+    missing_set : Set[str]
+        A set of parameters not yet implemented.
+    pd_module_path : str
+        Path string of pandas module.
+    ps_module_path : str
+        Path string of pyspark.pandas module.
+
+    Examples
+    --------
+    >>> transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
+    ...                     "pandas.DataFrame", "pyspark.pandas.DataFrame")
+    '``axis`` , ``fill_value`` , ``level``'
+    """
+    missing_str = " , ".join(f"``{x}``" for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+    if len(missing_set) > MAX_MISSING_PARAMS_SIZE:
+        module_dot_func = f"{module_name}.{pd_func_name}" if module_name else pd_func_name
+        additional_str = (
+            " and more. See the "
+            f"`{pd_module_path}.{module_dot_func} "
+            "<https://pandas.pydata.org/docs/reference/api/"
+            f"{pd_module_path}.{module_dot_func}.html>`__ and "
+            f"`{ps_module_path}.{module_dot_func} "
+            "<https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/"
+            f"{ps_module_path}.{module_dot_func}.html>`__ for detail."
+        )
+        missing_str += additional_str
+    return missing_str
+
+
+def get_pd_modules(pd_module_group: Any) -> List[str]:
+    """
+    Returns sorted pandas memeber list from pandas module path.
+
+    Parameters
+    ----------
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    """
+    return sorted([m[0] for m in getmembers(pd_module_group, isclass) if not m[0].startswith("_")])
+
+
+def update_all_supported_status(
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
+    pd_modules: List[str],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> None:
+    """
+    Updates supported status across multiple module paths.
+
+    Parmeters
+    ---------
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]
+        Data that stores the supported status across multiple module paths.
+    pd_modles: List[str]
+        Name list of pandas modules.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_modules += [""]  # for General Function APIs
+    for module_name in pd_modules:
+        supported_status = create_supported_by_module(module_name, pd_module_group, ps_module_group)
+        if supported_status:
+            all_supported_status[(module_name, ps_module_group.__name__)] = supported_status
+
+
+def write_table(
+    module_name: str,
+    module_path: str,
+    supported_status: Dict[str, SupportedStatus],
+    w_fd: TextIO,
+) -> None:
+    """
+    Write table by using Sphinx list-table directive.
+    """
+    lines = []
+    lines.append("Supported ")
+    if module_name:
+        lines.append(module_name)
+    else:
+        lines.append("General Function")
+    lines.append(" APIs\n")
+    lines.append("-" * 100)
+    lines.append("\n")
+    lines.append(f".. currentmodule:: {module_path}")
+    if module_name:
+        lines.append(f".{module_name}\n")
+    else:
+        lines.append("\n")
+    lines.append("\n")
+    lines.append(".. list-table::\n")
+    lines.append("    :header-rows: 1\n")
+    lines.append("\n")
+    lines.append("    * - API\n")
+    lines.append("      - Implemented\n")
+    lines.append("      - Missing parameters\n")
+    for func_str, status in supported_status.items():
+        func_str = escape_func_str(func_str)
+        if status.implemented == Implemented.NOT_IMPLEMENTED.value:
+            lines.append(f"    * - {func_str}\n")
+        else:
+            lines.append(f"    * - :func:`{func_str}`\n")
+        lines.append(f"      - {status.implemented}\n")
+        lines.append("      - \n") if not status.missing else lines.append(
+            f"      - {status.missing}\n"
+        )
+    w_fd.writelines(lines)
+
+
+def escape_func_str(func_str: str) -> str:

Review Comment:
   Looks like we need some special process in here for:
   - Link to alias link, such as `Dataframe.sub/subtract`
   - Link to parent method, such as `Dataframe.ewm`, `SeriesGroupBy` doc
   
   Otherwise, the gen link has some problem, feel free to do it in a separate PR.



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parmeters
+    ---------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    Implemented.PARTIALLY_IMPLEMENTED.value,
+                    transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(Implemented.IMPLEMENTED.value)
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(Implemented.NOT_IMPLEMENTED.value)
+    return pd_dict
+
+
+def transform_missing(
+    module_name: str,
+    pd_func_name: str,
+    missing_set: Set[str],
+    pd_module_path: str,
+    ps_module_path: str,
+) -> str:
+    """
+    Transform missing parameters into table information string.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_func_name : str
+        Name of pandas API.
+    missing_set : Set[str]
+        A set of parameters not yet implemented.
+    pd_module_path : str
+        Path string of pandas module.
+    ps_module_path : str
+        Path string of pyspark.pandas module.
+
+    Examples
+    --------
+    >>> transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
+    ...                     "pandas.DataFrame", "pyspark.pandas.DataFrame")
+    '``axis`` , ``fill_value`` , ``level``'
+    """
+    missing_str = " , ".join(f"``{x}``" for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+    if len(missing_set) > MAX_MISSING_PARAMS_SIZE:
+        module_dot_func = f"{module_name}.{pd_func_name}" if module_name else pd_func_name
+        additional_str = (
+            " and more. See the "
+            f"`{pd_module_path}.{module_dot_func} "
+            "<https://pandas.pydata.org/docs/reference/api/"
+            f"{pd_module_path}.{module_dot_func}.html>`__ and "
+            f"`{ps_module_path}.{module_dot_func} "
+            "<https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/"
+            f"{ps_module_path}.{module_dot_func}.html>`__ for detail."
+        )
+        missing_str += additional_str
+    return missing_str
+
+
+def get_pd_modules(pd_module_group: Any) -> List[str]:
+    """
+    Returns sorted pandas memeber list from pandas module path.
+
+    Parameters
+    ----------
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    """
+    return sorted([m[0] for m in getmembers(pd_module_group, isclass) if not m[0].startswith("_")])
+
+
+def update_all_supported_status(
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
+    pd_modules: List[str],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> None:
+    """
+    Updates supported status across multiple module paths.
+
+    Parmeters
+    ---------
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]
+        Data that stores the supported status across multiple module paths.
+    pd_modles: List[str]
+        Name list of pandas modules.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_modules += [""]  # for General Function APIs
+    for module_name in pd_modules:
+        supported_status = create_supported_by_module(module_name, pd_module_group, ps_module_group)
+        if supported_status:
+            all_supported_status[(module_name, ps_module_group.__name__)] = supported_status
+
+
+def write_table(

Review Comment:
   ```suggestion
   def _write_table(
   ```
   
   I guess we should mark this method as private method. It might also help some when understand how this module  work. : )
   
   It would be good if you could also rename above other private methods with `_`.



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd

Review Comment:
   Another concern is should we force to validated the pd.__version in this `supported_api_gen` to make sure the doc content consist?
   
   cc @HyukjinKwon @itholic 



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parmeters
+    ---------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    Implemented.PARTIALLY_IMPLEMENTED.value,
+                    transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(Implemented.IMPLEMENTED.value)
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(Implemented.NOT_IMPLEMENTED.value)
+    return pd_dict
+
+
+def transform_missing(
+    module_name: str,
+    pd_func_name: str,
+    missing_set: Set[str],
+    pd_module_path: str,
+    ps_module_path: str,
+) -> str:
+    """
+    Transform missing parameters into table information string.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_func_name : str
+        Name of pandas API.
+    missing_set : Set[str]
+        A set of parameters not yet implemented.
+    pd_module_path : str
+        Path string of pandas module.
+    ps_module_path : str
+        Path string of pyspark.pandas module.
+
+    Examples
+    --------
+    >>> transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
+    ...                     "pandas.DataFrame", "pyspark.pandas.DataFrame")
+    '``axis`` , ``fill_value`` , ``level``'
+    """
+    missing_str = " , ".join(f"``{x}``" for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+    if len(missing_set) > MAX_MISSING_PARAMS_SIZE:

Review Comment:
   nice!



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parmeters
+    ---------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    Implemented.PARTIALLY_IMPLEMENTED.value,
+                    transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(Implemented.IMPLEMENTED.value)
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(Implemented.NOT_IMPLEMENTED.value)
+    return pd_dict
+
+
+def transform_missing(
+    module_name: str,
+    pd_func_name: str,
+    missing_set: Set[str],
+    pd_module_path: str,
+    ps_module_path: str,
+) -> str:
+    """
+    Transform missing parameters into table information string.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_func_name : str
+        Name of pandas API.
+    missing_set : Set[str]
+        A set of parameters not yet implemented.
+    pd_module_path : str
+        Path string of pandas module.
+    ps_module_path : str
+        Path string of pyspark.pandas module.
+
+    Examples
+    --------
+    >>> transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
+    ...                     "pandas.DataFrame", "pyspark.pandas.DataFrame")
+    '``axis`` , ``fill_value`` , ``level``'
+    """
+    missing_str = " , ".join(f"``{x}``" for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+    if len(missing_set) > MAX_MISSING_PARAMS_SIZE:
+        module_dot_func = f"{module_name}.{pd_func_name}" if module_name else pd_func_name
+        additional_str = (
+            " and more. See the "
+            f"`{pd_module_path}.{module_dot_func} "
+            "<https://pandas.pydata.org/docs/reference/api/"
+            f"{pd_module_path}.{module_dot_func}.html>`__ and "
+            f"`{ps_module_path}.{module_dot_func} "
+            "<https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/"
+            f"{ps_module_path}.{module_dot_func}.html>`__ for detail."
+        )
+        missing_str += additional_str
+    return missing_str
+
+
+def get_pd_modules(pd_module_group: Any) -> List[str]:

Review Comment:
   ditto



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(

Review Comment:
   ```suggestion
   def _create_supported_by_module(
   ```



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(

Review Comment:
   ```suggestion
   def _organize_by_implementation_status(
   ```



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parmeters
+    ---------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    Implemented.PARTIALLY_IMPLEMENTED.value,
+                    transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(Implemented.IMPLEMENTED.value)
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(Implemented.NOT_IMPLEMENTED.value)
+    return pd_dict
+
+
+def transform_missing(

Review Comment:
   ```suggestion
   def _transform_missing(
   ```



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parmeters
+    ---------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    Implemented.PARTIALLY_IMPLEMENTED.value,
+                    transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(Implemented.IMPLEMENTED.value)
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(Implemented.NOT_IMPLEMENTED.value)
+    return pd_dict
+
+
+def transform_missing(
+    module_name: str,
+    pd_func_name: str,
+    missing_set: Set[str],
+    pd_module_path: str,
+    ps_module_path: str,
+) -> str:
+    """
+    Transform missing parameters into table information string.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_func_name : str
+        Name of pandas API.
+    missing_set : Set[str]
+        A set of parameters not yet implemented.
+    pd_module_path : str
+        Path string of pandas module.
+    ps_module_path : str
+        Path string of pyspark.pandas module.
+
+    Examples
+    --------
+    >>> transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
+    ...                     "pandas.DataFrame", "pyspark.pandas.DataFrame")
+    '``axis`` , ``fill_value`` , ``level``'
+    """
+    missing_str = " , ".join(f"``{x}``" for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+    if len(missing_set) > MAX_MISSING_PARAMS_SIZE:
+        module_dot_func = f"{module_name}.{pd_func_name}" if module_name else pd_func_name
+        additional_str = (
+            " and more. See the "
+            f"`{pd_module_path}.{module_dot_func} "
+            "<https://pandas.pydata.org/docs/reference/api/"
+            f"{pd_module_path}.{module_dot_func}.html>`__ and "
+            f"`{ps_module_path}.{module_dot_func} "
+            "<https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/"
+            f"{ps_module_path}.{module_dot_func}.html>`__ for detail."
+        )
+        missing_str += additional_str
+    return missing_str
+
+
+def get_pd_modules(pd_module_group: Any) -> List[str]:
+    """
+    Returns sorted pandas memeber list from pandas module path.
+
+    Parameters
+    ----------
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    """
+    return sorted([m[0] for m in getmembers(pd_module_group, isclass) if not m[0].startswith("_")])
+
+
+def update_all_supported_status(

Review Comment:
   Just some think aloud here: Do we want to add a public interface to help dev to see which method/parameter are missing with pretty print, like: `get_supported_status("Dataframe", pd, ps)`?
   
   If no need, just ignore this comment.
    
   cc @HyukjinKwon



##########
python/pyspark/pandas/supported_api_gen.py:
##########
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import os
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.find_spark_home import _find_spark_home
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+SPARK_HOME = _find_spark_home()
+TARGET_RST_FILE = os.path.join(
+    SPARK_HOME, "python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst"
+)
+RST_HEADER = """
+=====================
+Supported pandas APIs
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark.
+
+Some pandas APIs do not implement full parameters, so the third column shows missing parameters for
+each API.
+
+'Y' in the second column means it's implemented including its whole parameter.
+'N' means it's not implemented yet.
+'P' means it's partially implemented with the missing of some parameters.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by your
+own.
+
+The API list is updated based on the `latest pandas official API
+reference <https://pandas.pydata.org/docs/reference/index.html#>`__.
+
+All implemented APIs listed here are distributed except the ones that requires the local
+computation by design. For example, `DataFrame.to_numpy() <https://spark.apache.org
+/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.
+to_numpy.html>`__ requires to collect the data to the driver side.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+    """
+    SupportedStatus class that defines a supported status for a specific pandas API
+    """
+
+    def __init__(self, implemented: str, missing: str = ""):
+        self.implemented = implemented
+        self.missing = missing
+
+
+def generate_supported_api() -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Write supported APIs documentation.
+    """
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = get_pd_modules(pd_module_group)
+        update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    write_rst(all_supported_status)
+
+
+def create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except AttributeError:
+        # module not implemented
+        return {}
+
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
+
+    return organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parmeters
+    ---------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    Implemented.PARTIALLY_IMPLEMENTED.value,
+                    transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(Implemented.IMPLEMENTED.value)
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(Implemented.NOT_IMPLEMENTED.value)
+    return pd_dict
+
+
+def transform_missing(
+    module_name: str,
+    pd_func_name: str,
+    missing_set: Set[str],
+    pd_module_path: str,
+    ps_module_path: str,
+) -> str:
+    """
+    Transform missing parameters into table information string.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_func_name : str
+        Name of pandas API.
+    missing_set : Set[str]
+        A set of parameters not yet implemented.
+    pd_module_path : str
+        Path string of pandas module.
+    ps_module_path : str
+        Path string of pyspark.pandas module.
+
+    Examples
+    --------
+    >>> transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
+    ...                     "pandas.DataFrame", "pyspark.pandas.DataFrame")
+    '``axis`` , ``fill_value`` , ``level``'
+    """
+    missing_str = " , ".join(f"``{x}``" for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+    if len(missing_set) > MAX_MISSING_PARAMS_SIZE:
+        module_dot_func = f"{module_name}.{pd_func_name}" if module_name else pd_func_name
+        additional_str = (
+            " and more. See the "
+            f"`{pd_module_path}.{module_dot_func} "
+            "<https://pandas.pydata.org/docs/reference/api/"
+            f"{pd_module_path}.{module_dot_func}.html>`__ and "
+            f"`{ps_module_path}.{module_dot_func} "
+            "<https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/"
+            f"{ps_module_path}.{module_dot_func}.html>`__ for detail."
+        )
+        missing_str += additional_str
+    return missing_str
+
+
+def get_pd_modules(pd_module_group: Any) -> List[str]:
+    """
+    Returns sorted pandas memeber list from pandas module path.
+
+    Parameters
+    ----------
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    """
+    return sorted([m[0] for m in getmembers(pd_module_group, isclass) if not m[0].startswith("_")])
+
+
+def update_all_supported_status(
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
+    pd_modules: List[str],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> None:
+    """
+    Updates supported status across multiple module paths.
+
+    Parmeters
+    ---------
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]
+        Data that stores the supported status across multiple module paths.
+    pd_modles: List[str]
+        Name list of pandas modules.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_modules += [""]  # for General Function APIs
+    for module_name in pd_modules:
+        supported_status = create_supported_by_module(module_name, pd_module_group, ps_module_group)
+        if supported_status:
+            all_supported_status[(module_name, ps_module_group.__name__)] = supported_status
+
+
+def write_table(
+    module_name: str,
+    module_path: str,
+    supported_status: Dict[str, SupportedStatus],
+    w_fd: TextIO,
+) -> None:
+    """
+    Write table by using Sphinx list-table directive.
+    """
+    lines = []
+    lines.append("Supported ")
+    if module_name:
+        lines.append(module_name)
+    else:
+        lines.append("General Function")
+    lines.append(" APIs\n")
+    lines.append("-" * 100)
+    lines.append("\n")
+    lines.append(f".. currentmodule:: {module_path}")
+    if module_name:
+        lines.append(f".{module_name}\n")
+    else:
+        lines.append("\n")
+    lines.append("\n")
+    lines.append(".. list-table::\n")
+    lines.append("    :header-rows: 1\n")
+    lines.append("\n")
+    lines.append("    * - API\n")
+    lines.append("      - Implemented\n")
+    lines.append("      - Missing parameters\n")
+    for func_str, status in supported_status.items():
+        func_str = escape_func_str(func_str)
+        if status.implemented == Implemented.NOT_IMPLEMENTED.value:
+            lines.append(f"    * - {func_str}\n")
+        else:
+            lines.append(f"    * - :func:`{func_str}`\n")
+        lines.append(f"      - {status.implemented}\n")
+        lines.append("      - \n") if not status.missing else lines.append(
+            f"      - {status.missing}\n"
+        )
+    w_fd.writelines(lines)
+
+
+def escape_func_str(func_str: str) -> str:
+    """
+    Transforms which affecting rst data format.
+    """
+    if func_str.endswith("_"):
+        return func_str[:-1] + "\_"  # noqa: W605
+    else:
+        return func_str
+
+
+def write_rst(all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]) -> None:

Review Comment:
   ```suggestion
   def _write_rst(all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]) -> None:
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org