You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ambari.apache.org by "Jonathan Hurley (JIRA)" <ji...@apache.org> on 2015/09/18 16:38:04 UTC

[jira] [Created] (AMBARI-13145) RU - Skipping failed task caused remaining pending tasks to be ABORTED

Jonathan Hurley created AMBARI-13145:
----------------------------------------

             Summary: RU - Skipping failed task caused remaining pending tasks to be ABORTED
                 Key: AMBARI-13145
                 URL: https://issues.apache.org/jira/browse/AMBARI-13145
             Project: Ambari
          Issue Type: Bug
          Components: ambari-server
    Affects Versions: 2.1.0
            Reporter: Jonathan Hurley
            Assignee: Jonathan Hurley
            Priority: Blocker
             Fix For: 2.1.2


Aborting a failed task during an upgrade causes the entire upgrade request to become ABORTED.

{code:title=Failed & Skipped command}
{
  "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414?fields=UpgradeItem/*,tasks/Tasks/*",
  "UpgradeItem": {
    "cluster_host_info": "{\"hs_host\":[\"9\"],\"namenode_host\":[\"17\",\"14\"],\"hive_metastore_host\":[\"19\",\"8\",\"11\"],\"hbase_rs_hosts\":[\"0-19\"],\"zookeeper_hosts\":[\"3\",\"14\",\"11\"],\"metrics_monitor_hosts\":[\"0-19\"],\"rm_host\":[\"16\",\"14\"],\"slave_hosts\":[\"0-19\"],\"app_timeline_server_hosts\":[\"6\"],\"ambari_server_host\":[\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\"],\"metrics_collector_hosts\":[\"3\"],\"all_ping_ports\":[\"8670:0-19\"],\"all_racks\":[\"/default-rack:0-19\"],\"knox_gateway_hosts\":[\"7\"],\"flume_hosts\":[\"0-19\"],\"falcon_server_hosts\":[\"15\"],\"hbase_master_hosts\":[\"19\",\"8\",\"14\"],\"journalnode_hosts\":[\"14,15\",\"11\"],\"nm_hosts\":[\"0-19\"],\"hive_server_host\":[\"19\",\"8\",\"11\"],\"zkfc_hosts\":[\"17\",\"14\"],\"all_hosts\":[\"os-s11-3-ectjrs-c102dalsechanr-8.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-2.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-5.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-1.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-3.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-16.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-19.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-17.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-14.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-18.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-11.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-7.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-9.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-10.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-12.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-15.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-13.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-6.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-20.novalocal\"],\"all_ipv4_ips\":[\"172.22.65.185:4\",\"172.22.65.222:12\",\"172.22.65.95:7\",\"172.22.65.225:2\",\"172.22.65.10:11\",\"172.22.65.46:17\",\"172.22.65.109:10\",\"172.22.65.17:14\",\"172.22.65.181:3\",\"172.22.65.33:15\",\"172.22.65.218:18\",\"172.22.65.21:13\",\"172.22.65.119:19\",\"172.22.65.58:9\",\"172.22.65.182:1\",\"172.22.65.100:6\",\"172.22.65.66:16\",\"172.22.65.68:5\",\"172.22.65.239:0\",\"172.22.65.131:8\"],\"oozie_server\":[\"14,15\",\"11\"],\"webhcat_server_host\":[\"19\",\"8\",\"11\"]}",
    "cluster_name": "cl1",
    "command_params": "{\"original_stack\":\"HDP-2.2\",\"upgrade_direction\":\"upgrade\",\"target_stack\":\"HDP-2.3\",\"forceRefreshConfigTagsBeforeExecution\":\"*\",\"version\":\"2.3.2.0-2844\"}",
    "context": "Service Check Hive",
    "end_time": -1,
    "group_id": 11,
    "host_params": "{\"ambari_db_rca_driver\":\"org.postgresql.Driver\",\"ambari_db_rca_password\":\"bigdatacustom\",\"ambari_db_rca_url\":\"jdbc:postgresql://172.22.65.184:5432/ambaricustom\",\"ambari_db_rca_username\":\"ambaricustomuser\",\"current_version\":\"2.2.6.0-2800\",\"db_driver_filename\":\"mysql-connector-java.jar\",\"db_name\":\"ambaricustom\",\"host_sys_prepped\":\"false\",\"java_home\":\"/usr/jdk64/jdk1.7.0_67\",\"java_version\":\"7\",\"jdk_location\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources/\",\"mysql_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//mysql-connector-java.jar\",\"oracle_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//ojdbc6.jar\",\"stack_name\":\"HDP\",\"stack_version\":\"2.3\"}",
    "log_info": null,
    "progress_percent": 100,
    "request_id": 64,
    "skippable": true,
    "stage_id": 414,
    "start_time": 1442469303768,
    "status": "COMPLETED",
    "text": "Service Check Hive"
  },
  "tasks": [
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414/tasks/3459",
      "Tasks": {
        "attempt_cnt": 5,
        "cluster_name": "cl1",
        "command": "SERVICE_CHECK",
        "command_detail": "SERVICE_CHECK HIVE",
        "end_time": -1,
        "error_log": "/var/lib/ambari-agent/data/errors-3459.txt",
        "exit_code": 999,
        "host_name": "os-s11-3-ectjrs-c102dalsechanr-8.novalocal",
        "id": 3459,
        "output_log": "/var/lib/ambari-agent/data/output-3459.txt",
        "request_id": 64,
        "role": "HIVE_SERVICE_CHECK",
        "stage_id": 414,
        "start_time": 1442469303768,
        "status": "FAILED",
        "stderr": "Python script has been killed due to timeout after waiting 300 secs",
        "stdout": "2015-09-17 05:55:08,128 - hadoop-client is currently at version 2.3.2.0-2844\n2015-09-17 05:55:08,168 - checked_call['conf-select set-conf-dir --package hadoop --stack-version 2.3.2.0-2844 --conf-version 0'] {'logoutput': False, 'sudo': True, 'quiet': False}\n2015-09-17 05:55:08,209 - checked_call returned (0, '/usr/hdp/2.3.2.0-2844/hadoop/conf -> /etc/hadoop/2.3.2.0-2844/0')\n2015-09-17 05:55:08,247 - hadoop-client is currently at version 2.3.2.0-2844\nTest connectivity to hive server\nWaiting for the Hive server to start...\n2015-09-17 05:55:08,346 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; '] {'user': 'ambari-qa'}\n2015-09-17 05:55:08,415 - Execute['! beeline -u 'jdbc:hive2://os-s11-3-ectjrs-c102dalsechanr-20.novalocal:10010/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL''] {'path': ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'], 'user': 'ambari-qa', 'timeout': 30}\nSuccessfully connected to os-s11-3-ectjrs-c102dalsechanr-20.novalocal on port 10010\nSuccessfully connected to Hive at os-s11-3-ectjrs-c102dalsechanr-8.novalocal on port 10010 after 8 seconds\n2015-09-17 05:55:16,613 - File['/var/lib/ambari-agent/tmp/hcatSmoke.sh'] {'content': StaticFile('hcatSmoke.sh'), 'mode': 0755}\n2015-09-17 05:55:16,614 - Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; env JAVA_HOME=/usr/jdk64/jdk1.7.0_67 /var/lib/ambari-agent/tmp/hcatSmoke.sh hcatsmokeid16acef41_date551715 prepare true'] {'logoutput': True, 'path': ['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', '/usr/sbin:/sbin:/usr/lib/ambari-server/*:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/var/lib/ambari-agent:/usr/hdp/2.3.2.0-2844/hadoop/bin:/usr/hdp/2.3.2.0-2844/hive/bin'], 'tries': 3, 'user': 'ambari-qa', 'try_sleep': 5}\nWARNING: Use \"yarn jar\" to launch YARN applications.\nivysettings.xml file not found in HIVE_HOME or HIVE_CONF_DIR,file:/grid/0/hdp/2.3.2.0-2844/hadoop/lib/hadoop-lzo-0.6.0.2.3.2.0-2844-sources.jar!/ivysettings.xml will be used",
        "structured_out": {}
      }
    }
  ]
}
{code}

{code:title=ABORTED upgrade request}
{
  "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64?fields=upgrade_groups/UpgradeGroup/*",
  "Upgrade": {
    "cluster_name": "cl1",
    "request_id": 64
  },
  "upgrade_groups": [
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/1",
      "UpgradeGroup": {
        "completed_task_count": 5,
        "group_id": 1,
        "in_progress_task_count": 0,
        "name": "PRE_CLUSTER",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "Prepare Upgrade",
        "total_task_count": 5
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/2",
      "UpgradeGroup": {
        "completed_task_count": 4,
        "group_id": 2,
        "in_progress_task_count": 0,
        "name": "ZOOKEEPER",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "ZooKeeper",
        "total_task_count": 4
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/3",
      "UpgradeGroup": {
        "completed_task_count": 36,
        "group_id": 3,
        "in_progress_task_count": 0,
        "name": "CORE_MASTER",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "Core Masters",
        "total_task_count": 36
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/4",
      "UpgradeGroup": {
        "completed_task_count": 15,
        "group_id": 4,
        "in_progress_task_count": 0,
        "name": "SERVICE_CHECK",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "All Service Checks",
        "total_task_count": 15
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/5",
      "UpgradeGroup": {
        "completed_task_count": 61,
        "group_id": 5,
        "in_progress_task_count": 0,
        "name": "CORE_SLAVES",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "Core Slaves",
        "total_task_count": 61
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/6",
      "UpgradeGroup": {
        "completed_task_count": 15,
        "group_id": 6,
        "in_progress_task_count": 0,
        "name": "SERVICE_CHECK",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "All Service Checks",
        "total_task_count": 15
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/7",
      "UpgradeGroup": {
        "completed_task_count": 21,
        "group_id": 7,
        "in_progress_task_count": 0,
        "name": "HIVE",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "Hive",
        "total_task_count": 21
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/8",
      "UpgradeGroup": {
        "completed_task_count": 28,
        "group_id": 8,
        "in_progress_task_count": 0,
        "name": "OOZIE",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "Oozie",
        "total_task_count": 28
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/9",
      "UpgradeGroup": {
        "completed_task_count": 22,
        "group_id": 9,
        "in_progress_task_count": 0,
        "name": "FALCON",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "Falcon",
        "total_task_count": 22
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/10",
      "UpgradeGroup": {
        "completed_task_count": 201,
        "group_id": 10,
        "in_progress_task_count": 0,
        "name": "CLIENTS",
        "progress_percent": 100,
        "request_id": 64,
        "status": "COMPLETED",
        "title": "Client Components",
        "total_task_count": 201
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11",
      "UpgradeGroup": {
        "completed_task_count": 22,
        "group_id": 11,
        "in_progress_task_count": 0,
        "name": "SERVICE_CHECK",
        "progress_percent": 100,
        "request_id": 64,
        "status": "ABORTED",
        "title": "All Service Checks",
        "total_task_count": 22
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/12",
      "UpgradeGroup": {
        "completed_task_count": 10,
        "group_id": 12,
        "in_progress_task_count": 0,
        "name": "KNOX",
        "progress_percent": 100,
        "request_id": 64,
        "status": "ABORTED",
        "title": "Knox",
        "total_task_count": 10
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/13",
      "UpgradeGroup": {
        "completed_task_count": 40,
        "group_id": 13,
        "in_progress_task_count": 0,
        "name": "SLIDER",
        "progress_percent": 100,
        "request_id": 64,
        "status": "ABORTED",
        "title": "Slider",
        "total_task_count": 40
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/14",
      "UpgradeGroup": {
        "completed_task_count": 42,
        "group_id": 14,
        "in_progress_task_count": 0,
        "name": "FLUME",
        "progress_percent": 100,
        "request_id": 64,
        "status": "ABORTED",
        "title": "Flume",
        "total_task_count": 42
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/15",
      "UpgradeGroup": {
        "completed_task_count": 40,
        "group_id": 15,
        "in_progress_task_count": 0,
        "name": "ALL_HOST_OPS",
        "progress_percent": 100,
        "request_id": 64,
        "status": "ABORTED",
        "title": "Finalize Hosts",
        "total_task_count": 40
      }
    },
    {
      "href": "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/16",
      "UpgradeGroup": {
        "completed_task_count": 6,
        "group_id": 16,
        "in_progress_task_count": 0,
        "name": "POST_CLUSTER",
        "progress_percent": 100,
        "request_id": 64,
        "status": "ABORTED",
        "title": "Finalize Upgrade",
        "total_task_count": 6
      }
    }
  ]
}
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)