You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by grjohnson35 <gj...@artemishealth.com> on 2017/04/03 02:30:52 UTC

org.apache.spark.sql.AnalysisException: resolved attribute(s) code#906 missing from code#1992,

The exception org.apache.spark.sql.AnalysisException: resolved attribute(s)
code#906 missing from code#1992, is being thrown on a dataframe.  When I
print the schema the dataframe contains the field.   Any help is much
appreciated.


    val spark = SparkSession.builder()
      .master("spark://localhost:7077")
      .enableHiveSupport()
      .appName("Refresh Medical Claims")
      .config("fs.s3.awsAccessKeyId", S3_ACCESS)
      .config("fs.s3.awsSecretAccessKey", S3_SECRET)
      .config("fs.s3a.awsAccessKeyId", S3_ACCESS)
      .config("fs.s3a.awsSecretAccessKey", S3_SECRET)
      .getOrCreate()

    val startTm: Long = getTimeMS()

  def updateMinRtos8Thru27(spark: SparkSession, url: String, prop:
Properties, baseDF: DataFrame,
    revCdDF: DataFrame, mcdDF: DataFrame, mccDF: DataFrame): DataFrame = {

    printDFSchema(mccDF, "mccDF")
    printDFSchema(baseDF, "baseDF")
    printDFSchema(revCdDF, "revCdDF")

    baseDF.join(mccDF, mccDF("medical_claim_id") <=>
baseDF("medical_claim_id") &&
      mccDF("medical_claim_detail_id") <=>
baseDF("medical_claim_detail_id"), "left")
      .join(revCdDF, revCdDF("revenue_code_padded_str") <=> mccDF("code"),
"left").where(revCdDF("code_type").equalTo("Revenue_Center"))
      .where(revCdDF("rtos_2_code").isNotNull)
      .where(revCdDF("rtos_2_code").between(8, 27))
      .groupBy(baseDF("medical_claim_id"),
baseDF("medical_claim_detail_id"))
      .agg(min(revCdDF("rtos_2_code").alias("min_rtos_2_8_thru_27")))
     
.agg(min(revCdDF("rtos_2_hierarchy").alias("min_rtos_2_8_thru_27_hier")))
      .select(baseDF("medical_claim_id"), baseDF("medical_claim_detail_id"),
mccDF("code"), baseDF("revenue_code"), baseDF("rev_code_distinct_count"),
        baseDF("rtos_1_1_count"), baseDF("rtos_1_0_count"),
baseDF("er_visit_flag"), baseDF("observation_stay_flag"))
}


mccDF displaying Schema
root
 |-- medical_claim_id: long (nullable = true)
 |-- medical_claim_detail_id: long (nullable = true)
 |-- from_date: date (nullable = true)
 |-- member_id: long (nullable = true)
 |-- member_history_id: long (nullable = true)
 |-- code: string (nullable = true)
 |-- code_type: string (nullable = true)

baseDF displaying Schema
root
 |-- medical_claim_id: long (nullable = true)
 |-- medical_claim_detail_id: long (nullable = true)
 |-- revenue_code: string (nullable = true)
 |-- rev_code_distinct_count: long (nullable = false)
 |-- rtos_1_1_count: long (nullable = false)
 |-- rtos_1_0_count: long (nullable = false)
 |-- er_visit_flag: integer (nullable = true)
 |-- observation_stay_flag: long (nullable = false)

revCdDF displaying Schema
root
 |-- revenue_code_int: integer (nullable = false)
 |-- revenue_code_padded_str: string (nullable = false)
 |-- revenue_code_desc: string (nullable = true)
 |-- rtos_1_code: integer (nullable = true)
 |-- rtos_2_code: integer (nullable = true)
 |-- rtos_2_desc: string (nullable = true)
 |-- rtos_2_hierarchy: integer (nullable = true)
 |-- rtos_3_code: integer (nullable = true)
 |-- rtos_3_desc: string (nullable = true)

Exception in thread "main" org.apache.spark.sql.AnalysisException: resolved
attribute(s) code#906 missing from
code#1992,revenue_code#1353,medical_claim_id#901L,rtos_2_desc#5,from_date#1989,rtos_1_1_count#1367L,medical_claim_detail_id#902L,medical_claim_detail_id#1988L,rtos_2_hierarchy#6,revenue_code_desc#2,observation_stay_flag#1374L,medical_claim_id#1987L,revenue_code_padded_str#1,member_history_id#1991L,er_visit_flag#1372,member_id#1990L,code_type#1993,rtos_1_code#3,rtos_2_code#4,rtos_3_code#7,rtos_3_desc#8,rev_code_distinct_count#1365L,rtos_1_0_count#1369L,revenue_code_int#0
in operator !Join LeftOuter, (revenue_code_padded_str#1 <=> code#906);;
!Join LeftOuter, (revenue_code_padded_str#1 <=> code#906)
:- Join LeftOuter, ((medical_claim_id#901L <=> medical_claim_id#901L) &&
(medical_claim_detail_id#902L <=> medical_claim_detail_id#902L))
:  :- Sort [medical_claim_id#901L ASC NULLS FIRST,
medical_claim_detail_id#902L ASC NULLS FIRST], true
:  :  +- Aggregate [medical_claim_id#901L, medical_claim_detail_id#902L,
code#906], [medical_claim_id#901L, medical_claim_detail_id#902L, code#906 AS
revenue_code#1353, count(distinct code#906) AS
rev_code_distinct_count#1365L, count(CASE WHEN (rtos_1_code#3 = 1) THEN
rtos_1_code#3 ELSE cast(null as int) END) AS rtos_1_1_count#1367L,
count(CASE WHEN (rtos_1_code#3 = 0) THEN rtos_1_code#3 ELSE cast(null as
int) END) AS rtos_1_0_count#1369L, max(CASE WHEN lpad(code#906, 4, 0) IN
(0450,0452,0456,0459) THEN 1 ELSE 0 END) AS er_visit_flag#1372,
count(distinct CASE WHEN (rtos_2_code#4 = 9) THEN 1 ELSE cast(null as
string) END) AS observation_stay_flag#1374L]
:  :     +- Project [medical_claim_id#901L, medical_claim_detail_id#902L,
code#906, code_type#907, rtos_1_code#3, rtos_2_code#4, rtos_2_hierarchy#6,
line_er_visit_flag#1332, CASE WHEN (rtos_2_code#4 = 9) THEN 1 ELSE 0 END AS
line_observation_stay_flag#1342]
:  :        +- Project [medical_claim_id#901L, medical_claim_detail_id#902L,
code#906, code_type#907, rtos_1_code#3, rtos_2_code#4, rtos_2_hierarchy#6,
CASE WHEN lpad(code#906, 4, 0) IN (0450,0452,0456,0459) THEN 1 ELSE 0 END AS
line_er_visit_flag#1332]
:  :           +- Project [medical_claim_id#901L,
medical_claim_detail_id#902L, code#906, code_type#907, rtos_1_code#3,
rtos_2_code#4, rtos_2_hierarchy#6]
:  :              +- Filter (code_type#907 = Revenue_Center)
:  :                 +- Join LeftOuter, (medical_claim_detail_id#902L <=>
medical_claim_detail_id#605L)
:  :                    :- Filter (code_type#907 = Revenue_Center)
:  :                    :  +- Join LeftOuter, (code#906 <=>
revenue_code_padded_str#1)
:  :                    :     :- Project [medical_claim_id#901L,
medical_claim_detail_id#902L, from_date#903, member_id#904L,
member_history_id#905L, code#906, code_type#907]
:  :                    :     :  +- SubqueryAlias tempmedclaimcode
:  :                    :     :     +-
Relation[medical_claim_id#901L,medical_claim_detail_id#902L,from_date#903,member_id#904L,member_history_id#905L,code#906,code_type#907]
parquet
:  :                    :     +-
Relation[revenue_code_int#0,revenue_code_padded_str#1,revenue_code_desc#2,rtos_1_code#3,rtos_2_code#4,rtos_2_desc#5,rtos_2_hierarchy#6,rtos_3_code#7,rtos_3_desc#8]
JDBCRelation(proto_reference_tables.reference_revenue_center_codes)
[numPartitions=1]
:  :                    +-
Relation[medical_claim_detail_id#605L,data_source_id#606L,record_import_date#607L,medical_claim_id#608L,medical_claim_line_num#609,member_id#610L,source_member_id#611,member_history_id#612L,employer_id#613L,provider_id#614L,source_provider_specialty_code#615,medical_carrier_id#616L,medical_plan_id#617L,location_id#618L,revenue_center_code#619,from_date#620,thru_date#621,paid_date#622,place_of_service_code#623,emergency_indicator#624,procedure_code#625,procedure_code_modifier#626,procedure_code_type#627,type_of_service_code#628,...
74 more fields] JDBCRelation(medical_claim_detail) [numPartitions=1]
:  +- Project [medical_claim_id#1987L, medical_claim_detail_id#1988L,
from_date#1989, member_id#1990L, member_history_id#1991L, code#1992,
code_type#1993]
:     +- SubqueryAlias tempmedclaimcode
:        +-
Relation[medical_claim_id#1987L,medical_claim_detail_id#1988L,from_date#1989,member_id#1990L,member_history_id#1991L,code#1992,code_type#1993]
parquet
+-
Relation[revenue_code_int#0,revenue_code_padded_str#1,revenue_code_desc#2,rtos_1_code#3,rtos_2_code#4,rtos_2_desc#5,rtos_2_hierarchy#6,rtos_3_code#7,rtos_3_desc#8]
JDBCRelation(proto_reference_tables.reference_revenue_center_codes)
[numPartitions=1]




--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/org-apache-spark-sql-AnalysisException-resolved-attribute-s-code-906-missing-from-code-1992-tp28555.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org