You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ambari.apache.org by "Jonathan Hurley (JIRA)" <ji...@apache.org> on 2015/09/18 19:04:04 UTC
[jira] [Updated] (AMBARI-13145) RU - Skipping failed task caused
remaining pending tasks to be ABORTED
[ https://issues.apache.org/jira/browse/AMBARI-13145?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jonathan Hurley updated AMBARI-13145:
-------------------------------------
Attachment: AMBARI-13145.patch
> RU - Skipping failed task caused remaining pending tasks to be ABORTED
> ----------------------------------------------------------------------
>
> Key: AMBARI-13145
> URL: https://issues.apache.org/jira/browse/AMBARI-13145
> Project: Ambari
> Issue Type: Bug
> Components: ambari-server
> Affects Versions: 2.1.0
> Reporter: Jonathan Hurley
> Assignee: Jonathan Hurley
> Priority: Blocker
> Fix For: 2.1.2
>
> Attachments: AMBARI-13145.patch
>
>
> Aborting a failed task during an upgrade causes the entire upgrade request to become ABORTED.
> {code:title=Failed & Skipped command}
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414?fields=UpgradeItem/*,tasks/Tasks/*",
> "UpgradeItem": {
> "cluster_host_info": "{\"hs_host\":[\"9\"],\"namenode_host\":[\"17\",\"14\"],\"hive_metastore_host\":[\"19\",\"8\",\"11\"],\"hbase_rs_hosts\":[\"0-19\"],\"zookeeper_hosts\":[\"3\",\"14\",\"11\"],\"metrics_monitor_hosts\":[\"0-19\"],\"rm_host\":[\"16\",\"14\"],\"slave_hosts\":[\"0-19\"],\"app_timeline_server_hosts\":[\"6\"],\"ambari_server_host\":[\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\"],\"metrics_collector_hosts\":[\"3\"],\"all_ping_ports\":[\"8670:0-19\"],\"all_racks\":[\"/default-rack:0-19\"],\"knox_gateway_hosts\":[\"7\"],\"flume_hosts\":[\"0-19\"],\"falcon_server_hosts\":[\"15\"],\"hbase_master_hosts\":[\"19\",\"8\",\"14\"],\"journalnode_hosts\":[\"14,15\",\"11\"],\"nm_hosts\":[\"0-19\"],\"hive_server_host\":[\"19\",\"8\",\"11\"],\"zkfc_hosts\":[\"17\",\"14\"],\"all_hosts\":[\"os-s11-3-ectjrs-c102dalsechanr-8.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-2.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-5.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-1.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-3.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-16.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-19.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-17.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-14.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-18.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-11.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-7.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-9.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-10.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-12.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-15.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-13.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-6.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-20.novalocal\"],\"all_ipv4_ips\":[\"172.22.65.185:4\",\"172.22.65.222:12\",\"172.22.65.95:7\",\"172.22.65.225:2\",\"172.22.65.10:11\",\"172.22.65.46:17\",\"172.22.65.109:10\",\"172.22.65.17:14\",\"172.22.65.181:3\",\"172.22.65.33:15\",\"172.22.65.218:18\",\"172.22.65.21:13\",\"172.22.65.119:19\",\"172.22.65.58:9\",\"172.22.65.182:1\",\"172.22.65.100:6\",\"172.22.65.66:16\",\"172.22.65.68:5\",\"172.22.65.239:0\",\"172.22.65.131:8\"],\"oozie_server\":[\"14,15\",\"11\"],\"webhcat_server_host\":[\"19\",\"8\",\"11\"]}",
> "cluster_name": "cl1",
> "command_params": "{\"original_stack\":\"HDP-2.2\",\"upgrade_direction\":\"upgrade\",\"target_stack\":\"HDP-2.3\",\"forceRefreshConfigTagsBeforeExecution\":\"*\",\"version\":\"2.3.2.0-2844\"}",
> "context": "Service Check Hive",
> "end_time": -1,
> "group_id": 11,
> "host_params": "{\"ambari_db_rca_driver\":\"org.postgresql.Driver\",\"ambari_db_rca_password\":\"bigdatacustom\",\"ambari_db_rca_url\":\"jdbc:postgresql://172.22.65.184:5432/ambaricustom\",\"ambari_db_rca_username\":\"ambaricustomuser\",\"current_version\":\"2.2.6.0-2800\",\"db_driver_filename\":\"mysql-connector-java.jar\",\"db_name\":\"ambaricustom\",\"host_sys_prepped\":\"false\",\"java_home\":\"/usr/jdk64/jdk1.7.0_67\",\"java_version\":\"7\",\"jdk_location\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources/\",\"mysql_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//mysql-connector-java.jar\",\"oracle_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//ojdbc6.jar\",\"stack_name\":\"HDP\",\"stack_version\":\"2.3\"}",
> "log_info": null,
> "progress_percent": 100,
> "request_id": 64,
> "skippable": true,
> "stage_id": 414,
> "start_time": 1442469303768,
> "status": "COMPLETED",
> "text": "Service Check Hive"
> },
> "tasks": [
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414/tasks/3459",
> "Tasks": {
> "attempt_cnt": 5,
> "cluster_name": "cl1",
> "command": "SERVICE_CHECK",
> "command_detail": "SERVICE_CHECK HIVE",
> "end_time": -1,
> "error_log": "/var/lib/ambari-agent/data/errors-3459.txt",
> "exit_code": 999,
> "host_name": "os-s11-3-ectjrs-c102dalsechanr-8.novalocal",
> "id": 3459,
> "output_log": "/var/lib/ambari-agent/data/output-3459.txt",
> "request_id": 64,
> "role": "HIVE_SERVICE_CHECK",
> "stage_id": 414,
> "start_time": 1442469303768,
> "status": "FAILED",
> "stderr": "Python script has been killed due to timeout after waiting 300 secs",
> "stdout": "2015-09-17 05:55:08,128 - hadoop-client is currently at version 2.3.2.0-2844\n2015-09-17 05:55:08,168 - checked_call['conf-select set-conf-dir --package hadoop --stack-version 2.3.2.0-2844 --conf-version 0'] {'logoutput': False, 'sudo': True, 'quiet': False}\n2015-09-17 05:55:08,209 - checked_call returned (0, '/usr/hdp/2.3.2.0-2844/hadoop/conf -> /etc/hadoop/2.3.2.0-2844/0')\n2015-09-17 05:55:08,247 - hadoop-client is currently at version 2.3.2.0-2844\nTest connectivity to hive server\nWaiting for the Hive server to start...\n2015-09-17 05:55:08,346 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; '] {'user': 'ambari-qa'}\n2015-09-17 05:55:08,415 - Execute['! beeline -u 'jdbc:hive2://os-s11-3-ectjrs-c102dalsechanr-20.novalocal:10010/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL''] {'path': ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'], 'user': 'ambari-qa', 'timeout': 30}\nSuccessfully connected to os-s11-3-ectjrs-c102dalsechanr-20.novalocal on port 10010\nSuccessfully connected to Hive at os-s11-3-ectjrs-c102dalsechanr-8.novalocal on port 10010 after 8 seconds\n2015-09-17 05:55:16,613 - File['/var/lib/ambari-agent/tmp/hcatSmoke.sh'] {'content': StaticFile('hcatSmoke.sh'), 'mode': 0755}\n2015-09-17 05:55:16,614 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; env JAVA_HOME=/usr/jdk64/jdk1.7.0_67 /var/lib/ambari-agent/tmp/hcatSmoke.sh hcatsmokeid16acef41_date551715 prepare true'] {'logoutput': True, 'path': ['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', '/usr/sbin:/sbin:/usr/lib/ambari-server/*:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/var/lib/ambari-agent:/usr/hdp/2.3.2.0-2844/hadoop/bin:/usr/hdp/2.3.2.0-2844/hive/bin'], 'tries': 3, 'user': 'ambari-qa', 'try_sleep': 5}\nWARNING: Use \"yarn jar\" to launch YARN applications.\nivysettings.xml file not found in HIVE_HOME or HIVE_CONF_DIR,file:/grid/0/hdp/2.3.2.0-2844/hadoop/lib/hadoop-lzo-0.6.0.2.3.2.0-2844-sources.jar!/ivysettings.xml will be used",
> "structured_out": {}
> }
> }
> ]
> }
> {code}
> {code:title=ABORTED upgrade request}
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64?fields=upgrade_groups/UpgradeGroup/*",
> "Upgrade": {
> "cluster_name": "cl1",
> "request_id": 64
> },
> "upgrade_groups": [
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/1",
> "UpgradeGroup": {
> "completed_task_count": 5,
> "group_id": 1,
> "in_progress_task_count": 0,
> "name": "PRE_CLUSTER",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "Prepare Upgrade",
> "total_task_count": 5
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/2",
> "UpgradeGroup": {
> "completed_task_count": 4,
> "group_id": 2,
> "in_progress_task_count": 0,
> "name": "ZOOKEEPER",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "ZooKeeper",
> "total_task_count": 4
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/3",
> "UpgradeGroup": {
> "completed_task_count": 36,
> "group_id": 3,
> "in_progress_task_count": 0,
> "name": "CORE_MASTER",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "Core Masters",
> "total_task_count": 36
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/4",
> "UpgradeGroup": {
> "completed_task_count": 15,
> "group_id": 4,
> "in_progress_task_count": 0,
> "name": "SERVICE_CHECK",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "All Service Checks",
> "total_task_count": 15
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/5",
> "UpgradeGroup": {
> "completed_task_count": 61,
> "group_id": 5,
> "in_progress_task_count": 0,
> "name": "CORE_SLAVES",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "Core Slaves",
> "total_task_count": 61
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/6",
> "UpgradeGroup": {
> "completed_task_count": 15,
> "group_id": 6,
> "in_progress_task_count": 0,
> "name": "SERVICE_CHECK",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "All Service Checks",
> "total_task_count": 15
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/7",
> "UpgradeGroup": {
> "completed_task_count": 21,
> "group_id": 7,
> "in_progress_task_count": 0,
> "name": "HIVE",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "Hive",
> "total_task_count": 21
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/8",
> "UpgradeGroup": {
> "completed_task_count": 28,
> "group_id": 8,
> "in_progress_task_count": 0,
> "name": "OOZIE",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "Oozie",
> "total_task_count": 28
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/9",
> "UpgradeGroup": {
> "completed_task_count": 22,
> "group_id": 9,
> "in_progress_task_count": 0,
> "name": "FALCON",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "Falcon",
> "total_task_count": 22
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/10",
> "UpgradeGroup": {
> "completed_task_count": 201,
> "group_id": 10,
> "in_progress_task_count": 0,
> "name": "CLIENTS",
> "progress_percent": 100,
> "request_id": 64,
> "status": "COMPLETED",
> "title": "Client Components",
> "total_task_count": 201
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11",
> "UpgradeGroup": {
> "completed_task_count": 22,
> "group_id": 11,
> "in_progress_task_count": 0,
> "name": "SERVICE_CHECK",
> "progress_percent": 100,
> "request_id": 64,
> "status": "ABORTED",
> "title": "All Service Checks",
> "total_task_count": 22
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/12",
> "UpgradeGroup": {
> "completed_task_count": 10,
> "group_id": 12,
> "in_progress_task_count": 0,
> "name": "KNOX",
> "progress_percent": 100,
> "request_id": 64,
> "status": "ABORTED",
> "title": "Knox",
> "total_task_count": 10
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/13",
> "UpgradeGroup": {
> "completed_task_count": 40,
> "group_id": 13,
> "in_progress_task_count": 0,
> "name": "SLIDER",
> "progress_percent": 100,
> "request_id": 64,
> "status": "ABORTED",
> "title": "Slider",
> "total_task_count": 40
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/14",
> "UpgradeGroup": {
> "completed_task_count": 42,
> "group_id": 14,
> "in_progress_task_count": 0,
> "name": "FLUME",
> "progress_percent": 100,
> "request_id": 64,
> "status": "ABORTED",
> "title": "Flume",
> "total_task_count": 42
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/15",
> "UpgradeGroup": {
> "completed_task_count": 40,
> "group_id": 15,
> "in_progress_task_count": 0,
> "name": "ALL_HOST_OPS",
> "progress_percent": 100,
> "request_id": 64,
> "status": "ABORTED",
> "title": "Finalize Hosts",
> "total_task_count": 40
> }
> },
> {
> "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/16",
> "UpgradeGroup": {
> "completed_task_count": 6,
> "group_id": 16,
> "in_progress_task_count": 0,
> "name": "POST_CLUSTER",
> "progress_percent": 100,
> "request_id": 64,
> "status": "ABORTED",
> "title": "Finalize Upgrade",
> "total_task_count": 6
> }
> }
> ]
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)