You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ambari.apache.org by "Hudson (JIRA)" <ji...@apache.org> on 2015/09/19 00:51:04 UTC

[jira] [Commented] (AMBARI-13145) RU - Skipping failed task caused remaining pending tasks to be ABORTED

    [ https://issues.apache.org/jira/browse/AMBARI-13145?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14876630#comment-14876630 ] 

Hudson commented on AMBARI-13145:
---------------------------------

FAILURE: Integrated in Ambari-trunk-Commit #3467 (See [https://builds.apache.org/job/Ambari-trunk-Commit/3467/])
AMBARI-13145 - RU - Skipping failed task caused remaining pending tasks to be ABORTED (jonathanhurley) (jhurley: http://git-wip-us.apache.org/repos/asf?p=ambari.git&a=commit&h=9dd623abb78e094bbf6ab5fcd4763cf2efa96c4b)
* ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
* ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java


> RU - Skipping failed task caused remaining pending tasks to be ABORTED
> ----------------------------------------------------------------------
>
>                 Key: AMBARI-13145
>                 URL: https://issues.apache.org/jira/browse/AMBARI-13145
>             Project: Ambari
>          Issue Type: Bug
>          Components: ambari-server
>    Affects Versions: 2.1.0
>            Reporter: Jonathan Hurley
>            Assignee: Jonathan Hurley
>            Priority: Blocker
>             Fix For: 2.1.2
>
>         Attachments: AMBARI-13145.patch
>
>
> Aborting a failed task during an upgrade causes the entire upgrade request to become ABORTED.
> {code:title=Failed & Skipped command}
> {
>   "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414?fields=UpgradeItem/*,tasks/Tasks/*",
>   "UpgradeItem": {
>     "cluster_host_info": "{\"hs_host\":[\"9\"],\"namenode_host\":[\"17\",\"14\"],\"hive_metastore_host\":[\"19\",\"8\",\"11\"],\"hbase_rs_hosts\":[\"0-19\"],\"zookeeper_hosts\":[\"3\",\"14\",\"11\"],\"metrics_monitor_hosts\":[\"0-19\"],\"rm_host\":[\"16\",\"14\"],\"slave_hosts\":[\"0-19\"],\"app_timeline_server_hosts\":[\"6\"],\"ambari_server_host\":[\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\"],\"metrics_collector_hosts\":[\"3\"],\"all_ping_ports\":[\"8670:0-19\"],\"all_racks\":[\"/default-rack:0-19\"],\"knox_gateway_hosts\":[\"7\"],\"flume_hosts\":[\"0-19\"],\"falcon_server_hosts\":[\"15\"],\"hbase_master_hosts\":[\"19\",\"8\",\"14\"],\"journalnode_hosts\":[\"14,15\",\"11\"],\"nm_hosts\":[\"0-19\"],\"hive_server_host\":[\"19\",\"8\",\"11\"],\"zkfc_hosts\":[\"17\",\"14\"],\"all_hosts\":[\"os-s11-3-ectjrs-c102dalsechanr-8.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-2.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-5.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-1.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-3.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-16.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-19.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-17.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-14.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-18.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-11.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-7.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-9.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-10.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-12.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-15.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-13.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-6.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-20.novalocal\"],\"all_ipv4_ips\":[\"172.22.65.185:4\",\"172.22.65.222:12\",\"172.22.65.95:7\",\"172.22.65.225:2\",\"172.22.65.10:11\",\"172.22.65.46:17\",\"172.22.65.109:10\",\"172.22.65.17:14\",\"172.22.65.181:3\",\"172.22.65.33:15\",\"172.22.65.218:18\",\"172.22.65.21:13\",\"172.22.65.119:19\",\"172.22.65.58:9\",\"172.22.65.182:1\",\"172.22.65.100:6\",\"172.22.65.66:16\",\"172.22.65.68:5\",\"172.22.65.239:0\",\"172.22.65.131:8\"],\"oozie_server\":[\"14,15\",\"11\"],\"webhcat_server_host\":[\"19\",\"8\",\"11\"]}",
>     "cluster_name": "cl1",
>     "command_params": "{\"original_stack\":\"HDP-2.2\",\"upgrade_direction\":\"upgrade\",\"target_stack\":\"HDP-2.3\",\"forceRefreshConfigTagsBeforeExecution\":\"*\",\"version\":\"2.3.2.0-2844\"}",
>     "context": "Service Check Hive",
>     "end_time": -1,
>     "group_id": 11,
>     "host_params": "{\"ambari_db_rca_driver\":\"org.postgresql.Driver\",\"ambari_db_rca_password\":\"bigdatacustom\",\"ambari_db_rca_url\":\"jdbc:postgresql://172.22.65.184:5432/ambaricustom\",\"ambari_db_rca_username\":\"ambaricustomuser\",\"current_version\":\"2.2.6.0-2800\",\"db_driver_filename\":\"mysql-connector-java.jar\",\"db_name\":\"ambaricustom\",\"host_sys_prepped\":\"false\",\"java_home\":\"/usr/jdk64/jdk1.7.0_67\",\"java_version\":\"7\",\"jdk_location\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources/\",\"mysql_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//mysql-connector-java.jar\",\"oracle_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//ojdbc6.jar\",\"stack_name\":\"HDP\",\"stack_version\":\"2.3\"}",
>     "log_info": null,
>     "progress_percent": 100,
>     "request_id": 64,
>     "skippable": true,
>     "stage_id": 414,
>     "start_time": 1442469303768,
>     "status": "COMPLETED",
>     "text": "Service Check Hive"
>   },
>   "tasks": [
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414/tasks/3459",
>       "Tasks": {
>         "attempt_cnt": 5,
>         "cluster_name": "cl1",
>         "command": "SERVICE_CHECK",
>         "command_detail": "SERVICE_CHECK HIVE",
>         "end_time": -1,
>         "error_log": "/var/lib/ambari-agent/data/errors-3459.txt",
>         "exit_code": 999,
>         "host_name": "os-s11-3-ectjrs-c102dalsechanr-8.novalocal",
>         "id": 3459,
>         "output_log": "/var/lib/ambari-agent/data/output-3459.txt",
>         "request_id": 64,
>         "role": "HIVE_SERVICE_CHECK",
>         "stage_id": 414,
>         "start_time": 1442469303768,
>         "status": "FAILED",
>         "stderr": "Python script has been killed due to timeout after waiting 300 secs",
>         "stdout": "2015-09-17 05:55:08,128 - hadoop-client is currently at version 2.3.2.0-2844\n2015-09-17 05:55:08,168 - checked_call['conf-select set-conf-dir --package hadoop --stack-version 2.3.2.0-2844 --conf-version 0'] {'logoutput': False, 'sudo': True, 'quiet': False}\n2015-09-17 05:55:08,209 - checked_call returned (0, '/usr/hdp/2.3.2.0-2844/hadoop/conf -> /etc/hadoop/2.3.2.0-2844/0')\n2015-09-17 05:55:08,247 - hadoop-client is currently at version 2.3.2.0-2844\nTest connectivity to hive server\nWaiting for the Hive server to start...\n2015-09-17 05:55:08,346 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; '] {'user': 'ambari-qa'}\n2015-09-17 05:55:08,415 - Execute['! beeline -u 'jdbc:hive2://os-s11-3-ectjrs-c102dalsechanr-20.novalocal:10010/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL''] {'path': ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'], 'user': 'ambari-qa', 'timeout': 30}\nSuccessfully connected to os-s11-3-ectjrs-c102dalsechanr-20.novalocal on port 10010\nSuccessfully connected to Hive at os-s11-3-ectjrs-c102dalsechanr-8.novalocal on port 10010 after 8 seconds\n2015-09-17 05:55:16,613 - File['/var/lib/ambari-agent/tmp/hcatSmoke.sh'] {'content': StaticFile('hcatSmoke.sh'), 'mode': 0755}\n2015-09-17 05:55:16,614 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; env JAVA_HOME=/usr/jdk64/jdk1.7.0_67 /var/lib/ambari-agent/tmp/hcatSmoke.sh hcatsmokeid16acef41_date551715 prepare true'] {'logoutput': True, 'path': ['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', '/usr/sbin:/sbin:/usr/lib/ambari-server/*:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/var/lib/ambari-agent:/usr/hdp/2.3.2.0-2844/hadoop/bin:/usr/hdp/2.3.2.0-2844/hive/bin'], 'tries': 3, 'user': 'ambari-qa', 'try_sleep': 5}\nWARNING: Use \"yarn jar\" to launch YARN applications.\nivysettings.xml file not found in HIVE_HOME or HIVE_CONF_DIR,file:/grid/0/hdp/2.3.2.0-2844/hadoop/lib/hadoop-lzo-0.6.0.2.3.2.0-2844-sources.jar!/ivysettings.xml will be used",
>         "structured_out": {}
>       }
>     }
>   ]
> }
> {code}
> {code:title=ABORTED upgrade request}
> {
>   "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64?fields=upgrade_groups/UpgradeGroup/*",
>   "Upgrade": {
>     "cluster_name": "cl1",
>     "request_id": 64
>   },
>   "upgrade_groups": [
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/1",
>       "UpgradeGroup": {
>         "completed_task_count": 5,
>         "group_id": 1,
>         "in_progress_task_count": 0,
>         "name": "PRE_CLUSTER",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "Prepare Upgrade",
>         "total_task_count": 5
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/2",
>       "UpgradeGroup": {
>         "completed_task_count": 4,
>         "group_id": 2,
>         "in_progress_task_count": 0,
>         "name": "ZOOKEEPER",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "ZooKeeper",
>         "total_task_count": 4
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/3",
>       "UpgradeGroup": {
>         "completed_task_count": 36,
>         "group_id": 3,
>         "in_progress_task_count": 0,
>         "name": "CORE_MASTER",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "Core Masters",
>         "total_task_count": 36
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/4",
>       "UpgradeGroup": {
>         "completed_task_count": 15,
>         "group_id": 4,
>         "in_progress_task_count": 0,
>         "name": "SERVICE_CHECK",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "All Service Checks",
>         "total_task_count": 15
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/5",
>       "UpgradeGroup": {
>         "completed_task_count": 61,
>         "group_id": 5,
>         "in_progress_task_count": 0,
>         "name": "CORE_SLAVES",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "Core Slaves",
>         "total_task_count": 61
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/6",
>       "UpgradeGroup": {
>         "completed_task_count": 15,
>         "group_id": 6,
>         "in_progress_task_count": 0,
>         "name": "SERVICE_CHECK",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "All Service Checks",
>         "total_task_count": 15
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/7",
>       "UpgradeGroup": {
>         "completed_task_count": 21,
>         "group_id": 7,
>         "in_progress_task_count": 0,
>         "name": "HIVE",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "Hive",
>         "total_task_count": 21
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/8",
>       "UpgradeGroup": {
>         "completed_task_count": 28,
>         "group_id": 8,
>         "in_progress_task_count": 0,
>         "name": "OOZIE",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "Oozie",
>         "total_task_count": 28
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/9",
>       "UpgradeGroup": {
>         "completed_task_count": 22,
>         "group_id": 9,
>         "in_progress_task_count": 0,
>         "name": "FALCON",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "Falcon",
>         "total_task_count": 22
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/10",
>       "UpgradeGroup": {
>         "completed_task_count": 201,
>         "group_id": 10,
>         "in_progress_task_count": 0,
>         "name": "CLIENTS",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "COMPLETED",
>         "title": "Client Components",
>         "total_task_count": 201
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11",
>       "UpgradeGroup": {
>         "completed_task_count": 22,
>         "group_id": 11,
>         "in_progress_task_count": 0,
>         "name": "SERVICE_CHECK",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "ABORTED",
>         "title": "All Service Checks",
>         "total_task_count": 22
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/12",
>       "UpgradeGroup": {
>         "completed_task_count": 10,
>         "group_id": 12,
>         "in_progress_task_count": 0,
>         "name": "KNOX",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "ABORTED",
>         "title": "Knox",
>         "total_task_count": 10
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/13",
>       "UpgradeGroup": {
>         "completed_task_count": 40,
>         "group_id": 13,
>         "in_progress_task_count": 0,
>         "name": "SLIDER",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "ABORTED",
>         "title": "Slider",
>         "total_task_count": 40
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/14",
>       "UpgradeGroup": {
>         "completed_task_count": 42,
>         "group_id": 14,
>         "in_progress_task_count": 0,
>         "name": "FLUME",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "ABORTED",
>         "title": "Flume",
>         "total_task_count": 42
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/15",
>       "UpgradeGroup": {
>         "completed_task_count": 40,
>         "group_id": 15,
>         "in_progress_task_count": 0,
>         "name": "ALL_HOST_OPS",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "ABORTED",
>         "title": "Finalize Hosts",
>         "total_task_count": 40
>       }
>     },
>     {
>       "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/16",
>       "UpgradeGroup": {
>         "completed_task_count": 6,
>         "group_id": 16,
>         "in_progress_task_count": 0,
>         "name": "POST_CLUSTER",
>         "progress_percent": 100,
>         "request_id": 64,
>         "status": "ABORTED",
>         "title": "Finalize Upgrade",
>         "total_task_count": 6
>       }
>     }
>   ]
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)