You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Hyukjin Kwon (Jira)" <ji...@apache.org> on 2023/03/08 10:40:00 UTC

[jira] [Resolved] (SPARK-42266) Local mode should work with IPython

     [ https://issues.apache.org/jira/browse/SPARK-42266?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Hyukjin Kwon resolved SPARK-42266.
----------------------------------
    Fix Version/s: 3.4.0
       Resolution: Fixed

Issue resolved by pull request 40327
[https://github.com/apache/spark/pull/40327]

> Local mode should work with IPython
> -----------------------------------
>
>                 Key: SPARK-42266
>                 URL: https://issues.apache.org/jira/browse/SPARK-42266
>             Project: Spark
>          Issue Type: Sub-task
>          Components: Connect, PySpark
>    Affects Versions: 3.4.0
>            Reporter: Ruifeng Zheng
>            Assignee: Hyukjin Kwon
>            Priority: Major
>             Fix For: 3.4.0
>
>
> {code:java}
> (spark_dev) ➜  spark git:(master) bin/pyspark --remote "local[*]"
> Python 3.9.15 (main, Nov 24 2022, 08:28:41) 
> Type 'copyright', 'credits' or 'license' for more information
> IPython 8.9.0 -- An enhanced Interactive Python. Type '?' for help.
> /Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py:45: UserWarning: Failed to initialize Spark session.
>   warnings.warn("Failed to initialize Spark session.")
> Traceback (most recent call last):
>   File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py", line 40, in <module>
>     spark = SparkSession.builder.getOrCreate()
>   File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/session.py", line 429, in getOrCreate
>     from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
>   File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/__init__.py", line 21, in <module>
>     from pyspark.sql.connect.dataframe import DataFrame  # noqa: F401
>   File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/dataframe.py", line 35, in <module>
>     import pandas
>   File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", line 29, in <module>
>     from pyspark.pandas.missing.general_functions import MissingPandasLikeGeneralFunctions
>   File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", line 34, in <module>
>     require_minimum_pandas_version()
>   File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/pandas/utils.py", line 37, in require_minimum_pandas_version
>     if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
> AttributeError: partially initialized module 'pandas' has no attribute '__version__' (most likely due to a circular import)
> [TerminalIPythonApp] WARNING | Unknown error in handling PYTHONSTARTUP file /Users/ruifeng.zheng/Dev/spark//python/pyspark/shell.py:
> ---------------------------------------------------------------------------
> AttributeError                            Traceback (most recent call last)
> File ~/Dev/spark/python/pyspark/shell.py:40
>      38 try:
>      39     # Creates pyspark.sql.connect.SparkSession.
> ---> 40     spark = SparkSession.builder.getOrCreate()
>      41 except Exception:
> File ~/Dev/spark/python/pyspark/sql/session.py:429, in SparkSession.Builder.getOrCreate(self)
>     428 with SparkContext._lock:
> --> 429     from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
>     431     if (
>     432         SparkContext._active_spark_context is None
>     433         and SparkSession._instantiatedSession is None
>     434     ):
> File ~/Dev/spark/python/pyspark/sql/connect/__init__.py:21
>      18 """Currently Spark Connect is very experimental and the APIs to interact with
>      19 Spark through this API are can be changed at any time without warning."""
> ---> 21 from pyspark.sql.connect.dataframe import DataFrame  # noqa: F401
>      22 from pyspark.sql.pandas.utils import (
>      23     require_minimum_pandas_version,
>      24     require_minimum_pyarrow_version,
>      25     require_minimum_grpc_version,
>      26 )
> File ~/Dev/spark/python/pyspark/sql/connect/dataframe.py:35
>      34 import random
> ---> 35 import pandas
>      36 import json
> File ~/Dev/spark/python/pyspark/pandas/__init__.py:29
>      27 from typing import Any
> ---> 29 from pyspark.pandas.missing.general_functions import MissingPandasLikeGeneralFunctions
>      30 from pyspark.pandas.missing.scalars import MissingPandasLikeScalars
> File ~/Dev/spark/python/pyspark/pandas/__init__.py:34
>      33 try:
> ---> 34     require_minimum_pandas_version()
>      35     require_minimum_pyarrow_version()
> File ~/Dev/spark/python/pyspark/sql/pandas/utils.py:37, in require_minimum_pandas_version()
>      34     raise ImportError(
>      35         "Pandas >= %s must be installed; however, " "it was not found." % minimum_pandas_version
>      36     ) from raised_error
> ---> 37 if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
>      38     raise ImportError(
>      39         "Pandas >= %s must be installed; however, "
>      40         "your version was %s." % (minimum_pandas_version, pandas.__version__)
>      41     )
> AttributeError: partially initialized module 'pandas' has no attribute '__version__' (most likely due to a circular import)
> During handling of the above exception, another exception occurred:
> SystemExit                                Traceback (most recent call last)
> File ~/.dev/miniconda3/envs/spark_dev/lib/python3.9/site-packages/IPython/core/shellapp.py:359, in InteractiveShellApp._exec_file(self, fname, shell_futures)
>     355                 self.shell.safe_execfile_ipy(full_filename,
>     356                                              shell_futures=shell_futures)
>     357             else:
>     358                 # default to python, even without extension
> --> 359                 self.shell.safe_execfile(full_filename,
>     360                                          self.shell.user_ns,
>     361                                          shell_futures=shell_futures,
>     362                                          raise_exceptions=True)
>     363 finally:
>     364     sys.argv = save_argv
> File ~/.dev/miniconda3/envs/spark_dev/lib/python3.9/site-packages/IPython/core/interactiveshell.py:2802, in InteractiveShell.safe_execfile(self, fname, exit_ignore, raise_exceptions, shell_futures, *where)
>    2800 try:
>    2801     glob, loc = (where + (None, ))[:2]
> -> 2802     py3compat.execfile(
>    2803         fname, glob, loc,
>    2804         self.compile if shell_futures else None)
>    2805 except SystemExit as status:
>    2806     # If the call was made with 0 or None exit status (sys.exit(0)
>    2807     # or sys.exit() ), don't bother showing a traceback, as both of
>    (...)
>    2813     # For other exit status, we show the exception unless
>    2814     # explicitly silenced, but only in short form.
>    2815     if status.code:
> File ~/.dev/miniconda3/envs/spark_dev/lib/python3.9/site-packages/IPython/utils/py3compat.py:55, in execfile(fname, glob, loc, compiler)
>      53 with open(fname, "rb") as f:
>      54     compiler = compiler or compile
> ---> 55     exec(compiler(f.read(), fname, "exec"), glob, loc)
> File ~/Dev/spark/python/pyspark/shell.py:47
>      45     warnings.warn("Failed to initialize Spark session.")
>      46     traceback.print_exc(file=sys.stderr)
> ---> 47     sys.exit(1)
>      48 version = pyspark.__version__
>      49 sc = None
> SystemExit: 1
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org