You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ambari.apache.org by "Jonathan Hurley (JIRA)" <ji...@apache.org> on 2015/09/18 16:38:04 UTC
[jira] [Created] (AMBARI-13145) RU - Skipping failed task caused
remaining pending tasks to be ABORTED
Jonathan Hurley created AMBARI-13145:
----------------------------------------
Summary: RU - Skipping failed task caused remaining pending tasks to be ABORTED
Key: AMBARI-13145
URL: https://issues.apache.org/jira/browse/AMBARI-13145
Project: Ambari
Issue Type: Bug
Components: ambari-server
Affects Versions: 2.1.0
Reporter: Jonathan Hurley
Assignee: Jonathan Hurley
Priority: Blocker
Fix For: 2.1.2
Aborting a failed task during an upgrade causes the entire upgrade request to become ABORTED.
{code:title=Failed & Skipped command}
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414?fields=UpgradeItem/*,tasks/Tasks/*",
"UpgradeItem": {
"cluster_host_info": "{\"hs_host\":[\"9\"],\"namenode_host\":[\"17\",\"14\"],\"hive_metastore_host\":[\"19\",\"8\",\"11\"],\"hbase_rs_hosts\":[\"0-19\"],\"zookeeper_hosts\":[\"3\",\"14\",\"11\"],\"metrics_monitor_hosts\":[\"0-19\"],\"rm_host\":[\"16\",\"14\"],\"slave_hosts\":[\"0-19\"],\"app_timeline_server_hosts\":[\"6\"],\"ambari_server_host\":[\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\"],\"metrics_collector_hosts\":[\"3\"],\"all_ping_ports\":[\"8670:0-19\"],\"all_racks\":[\"/default-rack:0-19\"],\"knox_gateway_hosts\":[\"7\"],\"flume_hosts\":[\"0-19\"],\"falcon_server_hosts\":[\"15\"],\"hbase_master_hosts\":[\"19\",\"8\",\"14\"],\"journalnode_hosts\":[\"14,15\",\"11\"],\"nm_hosts\":[\"0-19\"],\"hive_server_host\":[\"19\",\"8\",\"11\"],\"zkfc_hosts\":[\"17\",\"14\"],\"all_hosts\":[\"os-s11-3-ectjrs-c102dalsechanr-8.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-2.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-5.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-1.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-3.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-16.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-19.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-17.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-14.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-18.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-11.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-7.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-9.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-10.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-12.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-15.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-13.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-6.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-20.novalocal\"],\"all_ipv4_ips\":[\"172.22.65.185:4\",\"172.22.65.222:12\",\"172.22.65.95:7\",\"172.22.65.225:2\",\"172.22.65.10:11\",\"172.22.65.46:17\",\"172.22.65.109:10\",\"172.22.65.17:14\",\"172.22.65.181:3\",\"172.22.65.33:15\",\"172.22.65.218:18\",\"172.22.65.21:13\",\"172.22.65.119:19\",\"172.22.65.58:9\",\"172.22.65.182:1\",\"172.22.65.100:6\",\"172.22.65.66:16\",\"172.22.65.68:5\",\"172.22.65.239:0\",\"172.22.65.131:8\"],\"oozie_server\":[\"14,15\",\"11\"],\"webhcat_server_host\":[\"19\",\"8\",\"11\"]}",
"cluster_name": "cl1",
"command_params": "{\"original_stack\":\"HDP-2.2\",\"upgrade_direction\":\"upgrade\",\"target_stack\":\"HDP-2.3\",\"forceRefreshConfigTagsBeforeExecution\":\"*\",\"version\":\"2.3.2.0-2844\"}",
"context": "Service Check Hive",
"end_time": -1,
"group_id": 11,
"host_params": "{\"ambari_db_rca_driver\":\"org.postgresql.Driver\",\"ambari_db_rca_password\":\"bigdatacustom\",\"ambari_db_rca_url\":\"jdbc:postgresql://172.22.65.184:5432/ambaricustom\",\"ambari_db_rca_username\":\"ambaricustomuser\",\"current_version\":\"2.2.6.0-2800\",\"db_driver_filename\":\"mysql-connector-java.jar\",\"db_name\":\"ambaricustom\",\"host_sys_prepped\":\"false\",\"java_home\":\"/usr/jdk64/jdk1.7.0_67\",\"java_version\":\"7\",\"jdk_location\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources/\",\"mysql_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//mysql-connector-java.jar\",\"oracle_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//ojdbc6.jar\",\"stack_name\":\"HDP\",\"stack_version\":\"2.3\"}",
"log_info": null,
"progress_percent": 100,
"request_id": 64,
"skippable": true,
"stage_id": 414,
"start_time": 1442469303768,
"status": "COMPLETED",
"text": "Service Check Hive"
},
"tasks": [
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414/tasks/3459",
"Tasks": {
"attempt_cnt": 5,
"cluster_name": "cl1",
"command": "SERVICE_CHECK",
"command_detail": "SERVICE_CHECK HIVE",
"end_time": -1,
"error_log": "/var/lib/ambari-agent/data/errors-3459.txt",
"exit_code": 999,
"host_name": "os-s11-3-ectjrs-c102dalsechanr-8.novalocal",
"id": 3459,
"output_log": "/var/lib/ambari-agent/data/output-3459.txt",
"request_id": 64,
"role": "HIVE_SERVICE_CHECK",
"stage_id": 414,
"start_time": 1442469303768,
"status": "FAILED",
"stderr": "Python script has been killed due to timeout after waiting 300 secs",
"stdout": "2015-09-17 05:55:08,128 - hadoop-client is currently at version 2.3.2.0-2844\n2015-09-17 05:55:08,168 - checked_call['conf-select set-conf-dir --package hadoop --stack-version 2.3.2.0-2844 --conf-version 0'] {'logoutput': False, 'sudo': True, 'quiet': False}\n2015-09-17 05:55:08,209 - checked_call returned (0, '/usr/hdp/2.3.2.0-2844/hadoop/conf -> /etc/hadoop/2.3.2.0-2844/0')\n2015-09-17 05:55:08,247 - hadoop-client is currently at version 2.3.2.0-2844\nTest connectivity to hive server\nWaiting for the Hive server to start...\n2015-09-17 05:55:08,346 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; '] {'user': 'ambari-qa'}\n2015-09-17 05:55:08,415 - Execute['! beeline -u 'jdbc:hive2://os-s11-3-ectjrs-c102dalsechanr-20.novalocal:10010/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL''] {'path': ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'], 'user': 'ambari-qa', 'timeout': 30}\nSuccessfully connected to os-s11-3-ectjrs-c102dalsechanr-20.novalocal on port 10010\nSuccessfully connected to Hive at os-s11-3-ectjrs-c102dalsechanr-8.novalocal on port 10010 after 8 seconds\n2015-09-17 05:55:16,613 - File['/var/lib/ambari-agent/tmp/hcatSmoke.sh'] {'content': StaticFile('hcatSmoke.sh'), 'mode': 0755}\n2015-09-17 05:55:16,614 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; env JAVA_HOME=/usr/jdk64/jdk1.7.0_67 /var/lib/ambari-agent/tmp/hcatSmoke.sh hcatsmokeid16acef41_date551715 prepare true'] {'logoutput': True, 'path': ['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', '/usr/sbin:/sbin:/usr/lib/ambari-server/*:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/var/lib/ambari-agent:/usr/hdp/2.3.2.0-2844/hadoop/bin:/usr/hdp/2.3.2.0-2844/hive/bin'], 'tries': 3, 'user': 'ambari-qa', 'try_sleep': 5}\nWARNING: Use \"yarn jar\" to launch YARN applications.\nivysettings.xml file not found in HIVE_HOME or HIVE_CONF_DIR,file:/grid/0/hdp/2.3.2.0-2844/hadoop/lib/hadoop-lzo-0.6.0.2.3.2.0-2844-sources.jar!/ivysettings.xml will be used",
"structured_out": {}
}
}
]
}
{code}
{code:title=ABORTED upgrade request}
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64?fields=upgrade_groups/UpgradeGroup/*",
"Upgrade": {
"cluster_name": "cl1",
"request_id": 64
},
"upgrade_groups": [
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/1",
"UpgradeGroup": {
"completed_task_count": 5,
"group_id": 1,
"in_progress_task_count": 0,
"name": "PRE_CLUSTER",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "Prepare Upgrade",
"total_task_count": 5
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/2",
"UpgradeGroup": {
"completed_task_count": 4,
"group_id": 2,
"in_progress_task_count": 0,
"name": "ZOOKEEPER",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "ZooKeeper",
"total_task_count": 4
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/3",
"UpgradeGroup": {
"completed_task_count": 36,
"group_id": 3,
"in_progress_task_count": 0,
"name": "CORE_MASTER",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "Core Masters",
"total_task_count": 36
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/4",
"UpgradeGroup": {
"completed_task_count": 15,
"group_id": 4,
"in_progress_task_count": 0,
"name": "SERVICE_CHECK",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "All Service Checks",
"total_task_count": 15
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/5",
"UpgradeGroup": {
"completed_task_count": 61,
"group_id": 5,
"in_progress_task_count": 0,
"name": "CORE_SLAVES",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "Core Slaves",
"total_task_count": 61
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/6",
"UpgradeGroup": {
"completed_task_count": 15,
"group_id": 6,
"in_progress_task_count": 0,
"name": "SERVICE_CHECK",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "All Service Checks",
"total_task_count": 15
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/7",
"UpgradeGroup": {
"completed_task_count": 21,
"group_id": 7,
"in_progress_task_count": 0,
"name": "HIVE",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "Hive",
"total_task_count": 21
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/8",
"UpgradeGroup": {
"completed_task_count": 28,
"group_id": 8,
"in_progress_task_count": 0,
"name": "OOZIE",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "Oozie",
"total_task_count": 28
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/9",
"UpgradeGroup": {
"completed_task_count": 22,
"group_id": 9,
"in_progress_task_count": 0,
"name": "FALCON",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "Falcon",
"total_task_count": 22
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/10",
"UpgradeGroup": {
"completed_task_count": 201,
"group_id": 10,
"in_progress_task_count": 0,
"name": "CLIENTS",
"progress_percent": 100,
"request_id": 64,
"status": "COMPLETED",
"title": "Client Components",
"total_task_count": 201
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11",
"UpgradeGroup": {
"completed_task_count": 22,
"group_id": 11,
"in_progress_task_count": 0,
"name": "SERVICE_CHECK",
"progress_percent": 100,
"request_id": 64,
"status": "ABORTED",
"title": "All Service Checks",
"total_task_count": 22
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/12",
"UpgradeGroup": {
"completed_task_count": 10,
"group_id": 12,
"in_progress_task_count": 0,
"name": "KNOX",
"progress_percent": 100,
"request_id": 64,
"status": "ABORTED",
"title": "Knox",
"total_task_count": 10
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/13",
"UpgradeGroup": {
"completed_task_count": 40,
"group_id": 13,
"in_progress_task_count": 0,
"name": "SLIDER",
"progress_percent": 100,
"request_id": 64,
"status": "ABORTED",
"title": "Slider",
"total_task_count": 40
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/14",
"UpgradeGroup": {
"completed_task_count": 42,
"group_id": 14,
"in_progress_task_count": 0,
"name": "FLUME",
"progress_percent": 100,
"request_id": 64,
"status": "ABORTED",
"title": "Flume",
"total_task_count": 42
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/15",
"UpgradeGroup": {
"completed_task_count": 40,
"group_id": 15,
"in_progress_task_count": 0,
"name": "ALL_HOST_OPS",
"progress_percent": 100,
"request_id": 64,
"status": "ABORTED",
"title": "Finalize Hosts",
"total_task_count": 40
}
},
{
"href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/16",
"UpgradeGroup": {
"completed_task_count": 6,
"group_id": 16,
"in_progress_task_count": 0,
"name": "POST_CLUSTER",
"progress_percent": 100,
"request_id": 64,
"status": "ABORTED",
"title": "Finalize Upgrade",
"total_task_count": 6
}
}
]
}
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)