You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by grjohnson35 <gj...@artemishealth.com> on 2017/04/03 02:30:52 UTC
org.apache.spark.sql.AnalysisException: resolved attribute(s)
code#906 missing from code#1992,
The exception org.apache.spark.sql.AnalysisException: resolved attribute(s)
code#906 missing from code#1992, is being thrown on a dataframe. When I
print the schema the dataframe contains the field. Any help is much
appreciated.
val spark = SparkSession.builder()
.master("spark://localhost:7077")
.enableHiveSupport()
.appName("Refresh Medical Claims")
.config("fs.s3.awsAccessKeyId", S3_ACCESS)
.config("fs.s3.awsSecretAccessKey", S3_SECRET)
.config("fs.s3a.awsAccessKeyId", S3_ACCESS)
.config("fs.s3a.awsSecretAccessKey", S3_SECRET)
.getOrCreate()
val startTm: Long = getTimeMS()
def updateMinRtos8Thru27(spark: SparkSession, url: String, prop:
Properties, baseDF: DataFrame,
revCdDF: DataFrame, mcdDF: DataFrame, mccDF: DataFrame): DataFrame = {
printDFSchema(mccDF, "mccDF")
printDFSchema(baseDF, "baseDF")
printDFSchema(revCdDF, "revCdDF")
baseDF.join(mccDF, mccDF("medical_claim_id") <=>
baseDF("medical_claim_id") &&
mccDF("medical_claim_detail_id") <=>
baseDF("medical_claim_detail_id"), "left")
.join(revCdDF, revCdDF("revenue_code_padded_str") <=> mccDF("code"),
"left").where(revCdDF("code_type").equalTo("Revenue_Center"))
.where(revCdDF("rtos_2_code").isNotNull)
.where(revCdDF("rtos_2_code").between(8, 27))
.groupBy(baseDF("medical_claim_id"),
baseDF("medical_claim_detail_id"))
.agg(min(revCdDF("rtos_2_code").alias("min_rtos_2_8_thru_27")))
.agg(min(revCdDF("rtos_2_hierarchy").alias("min_rtos_2_8_thru_27_hier")))
.select(baseDF("medical_claim_id"), baseDF("medical_claim_detail_id"),
mccDF("code"), baseDF("revenue_code"), baseDF("rev_code_distinct_count"),
baseDF("rtos_1_1_count"), baseDF("rtos_1_0_count"),
baseDF("er_visit_flag"), baseDF("observation_stay_flag"))
}
mccDF displaying Schema
root
|-- medical_claim_id: long (nullable = true)
|-- medical_claim_detail_id: long (nullable = true)
|-- from_date: date (nullable = true)
|-- member_id: long (nullable = true)
|-- member_history_id: long (nullable = true)
|-- code: string (nullable = true)
|-- code_type: string (nullable = true)
baseDF displaying Schema
root
|-- medical_claim_id: long (nullable = true)
|-- medical_claim_detail_id: long (nullable = true)
|-- revenue_code: string (nullable = true)
|-- rev_code_distinct_count: long (nullable = false)
|-- rtos_1_1_count: long (nullable = false)
|-- rtos_1_0_count: long (nullable = false)
|-- er_visit_flag: integer (nullable = true)
|-- observation_stay_flag: long (nullable = false)
revCdDF displaying Schema
root
|-- revenue_code_int: integer (nullable = false)
|-- revenue_code_padded_str: string (nullable = false)
|-- revenue_code_desc: string (nullable = true)
|-- rtos_1_code: integer (nullable = true)
|-- rtos_2_code: integer (nullable = true)
|-- rtos_2_desc: string (nullable = true)
|-- rtos_2_hierarchy: integer (nullable = true)
|-- rtos_3_code: integer (nullable = true)
|-- rtos_3_desc: string (nullable = true)
Exception in thread "main" org.apache.spark.sql.AnalysisException: resolved
attribute(s) code#906 missing from
code#1992,revenue_code#1353,medical_claim_id#901L,rtos_2_desc#5,from_date#1989,rtos_1_1_count#1367L,medical_claim_detail_id#902L,medical_claim_detail_id#1988L,rtos_2_hierarchy#6,revenue_code_desc#2,observation_stay_flag#1374L,medical_claim_id#1987L,revenue_code_padded_str#1,member_history_id#1991L,er_visit_flag#1372,member_id#1990L,code_type#1993,rtos_1_code#3,rtos_2_code#4,rtos_3_code#7,rtos_3_desc#8,rev_code_distinct_count#1365L,rtos_1_0_count#1369L,revenue_code_int#0
in operator !Join LeftOuter, (revenue_code_padded_str#1 <=> code#906);;
!Join LeftOuter, (revenue_code_padded_str#1 <=> code#906)
:- Join LeftOuter, ((medical_claim_id#901L <=> medical_claim_id#901L) &&
(medical_claim_detail_id#902L <=> medical_claim_detail_id#902L))
: :- Sort [medical_claim_id#901L ASC NULLS FIRST,
medical_claim_detail_id#902L ASC NULLS FIRST], true
: : +- Aggregate [medical_claim_id#901L, medical_claim_detail_id#902L,
code#906], [medical_claim_id#901L, medical_claim_detail_id#902L, code#906 AS
revenue_code#1353, count(distinct code#906) AS
rev_code_distinct_count#1365L, count(CASE WHEN (rtos_1_code#3 = 1) THEN
rtos_1_code#3 ELSE cast(null as int) END) AS rtos_1_1_count#1367L,
count(CASE WHEN (rtos_1_code#3 = 0) THEN rtos_1_code#3 ELSE cast(null as
int) END) AS rtos_1_0_count#1369L, max(CASE WHEN lpad(code#906, 4, 0) IN
(0450,0452,0456,0459) THEN 1 ELSE 0 END) AS er_visit_flag#1372,
count(distinct CASE WHEN (rtos_2_code#4 = 9) THEN 1 ELSE cast(null as
string) END) AS observation_stay_flag#1374L]
: : +- Project [medical_claim_id#901L, medical_claim_detail_id#902L,
code#906, code_type#907, rtos_1_code#3, rtos_2_code#4, rtos_2_hierarchy#6,
line_er_visit_flag#1332, CASE WHEN (rtos_2_code#4 = 9) THEN 1 ELSE 0 END AS
line_observation_stay_flag#1342]
: : +- Project [medical_claim_id#901L, medical_claim_detail_id#902L,
code#906, code_type#907, rtos_1_code#3, rtos_2_code#4, rtos_2_hierarchy#6,
CASE WHEN lpad(code#906, 4, 0) IN (0450,0452,0456,0459) THEN 1 ELSE 0 END AS
line_er_visit_flag#1332]
: : +- Project [medical_claim_id#901L,
medical_claim_detail_id#902L, code#906, code_type#907, rtos_1_code#3,
rtos_2_code#4, rtos_2_hierarchy#6]
: : +- Filter (code_type#907 = Revenue_Center)
: : +- Join LeftOuter, (medical_claim_detail_id#902L <=>
medical_claim_detail_id#605L)
: : :- Filter (code_type#907 = Revenue_Center)
: : : +- Join LeftOuter, (code#906 <=>
revenue_code_padded_str#1)
: : : :- Project [medical_claim_id#901L,
medical_claim_detail_id#902L, from_date#903, member_id#904L,
member_history_id#905L, code#906, code_type#907]
: : : : +- SubqueryAlias tempmedclaimcode
: : : : +-
Relation[medical_claim_id#901L,medical_claim_detail_id#902L,from_date#903,member_id#904L,member_history_id#905L,code#906,code_type#907]
parquet
: : : +-
Relation[revenue_code_int#0,revenue_code_padded_str#1,revenue_code_desc#2,rtos_1_code#3,rtos_2_code#4,rtos_2_desc#5,rtos_2_hierarchy#6,rtos_3_code#7,rtos_3_desc#8]
JDBCRelation(proto_reference_tables.reference_revenue_center_codes)
[numPartitions=1]
: : +-
Relation[medical_claim_detail_id#605L,data_source_id#606L,record_import_date#607L,medical_claim_id#608L,medical_claim_line_num#609,member_id#610L,source_member_id#611,member_history_id#612L,employer_id#613L,provider_id#614L,source_provider_specialty_code#615,medical_carrier_id#616L,medical_plan_id#617L,location_id#618L,revenue_center_code#619,from_date#620,thru_date#621,paid_date#622,place_of_service_code#623,emergency_indicator#624,procedure_code#625,procedure_code_modifier#626,procedure_code_type#627,type_of_service_code#628,...
74 more fields] JDBCRelation(medical_claim_detail) [numPartitions=1]
: +- Project [medical_claim_id#1987L, medical_claim_detail_id#1988L,
from_date#1989, member_id#1990L, member_history_id#1991L, code#1992,
code_type#1993]
: +- SubqueryAlias tempmedclaimcode
: +-
Relation[medical_claim_id#1987L,medical_claim_detail_id#1988L,from_date#1989,member_id#1990L,member_history_id#1991L,code#1992,code_type#1993]
parquet
+-
Relation[revenue_code_int#0,revenue_code_padded_str#1,revenue_code_desc#2,rtos_1_code#3,rtos_2_code#4,rtos_2_desc#5,rtos_2_hierarchy#6,rtos_3_code#7,rtos_3_desc#8]
JDBCRelation(proto_reference_tables.reference_revenue_center_codes)
[numPartitions=1]
--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/org-apache-spark-sql-AnalysisException-resolved-attribute-s-code-906-missing-from-code-1992-tp28555.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org