You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/11/04 16:10:32 UTC
svn commit: r1636599 - in
/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark: ./
Statistic/ status/ status/impl/
Author: xuefu
Date: Tue Nov 4 15:10:32 2014
New Revision: 1636599
URL: http://svn.apache.org/r1636599
Log:
HIVE-8670: Combine Hive Operator statistic and Spark Metric to an uniformed query statistic.[Spark Branch] (Chengxiang via Xuefu)
Added:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistic.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticGroup.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistics.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticsBuilder.java
Modified:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java?rev=1636599&r1=1636598&r2=1636599&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java Tue Nov 4 15:10:32 2014
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -36,6 +37,9 @@ import org.apache.hadoop.hive.ql.exec.Op
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistic;
+import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticGroup;
+import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistics;
import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession;
import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager;
@@ -103,7 +107,10 @@ public class SparkTask extends Task<Spar
sparkCounters = jobRef.getSparkJobStatus().getCounter();
SparkJobMonitor monitor = new SparkJobMonitor(jobRef.getSparkJobStatus());
monitor.startMonitor();
- console.printInfo(sparkCounters.toString());
+ SparkStatistics sparkStatistics = jobRef.getSparkJobStatus().getSparkStatistics();
+ if (LOG.isInfoEnabled() && sparkStatistics != null) {
+ logSparkStatistic(sparkStatistics);
+ }
rc = 0;
} catch (Exception e) {
LOG.error("Failed to execute spark task.", e);
@@ -121,6 +128,19 @@ public class SparkTask extends Task<Spar
return rc;
}
+ private void logSparkStatistic(SparkStatistics sparkStatistic) {
+ Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups();
+ while (groupIterator.hasNext()) {
+ SparkStatisticGroup group = groupIterator.next();
+ LOG.info(group.getGroupName());
+ Iterator<SparkStatistic> statisticIterator = group.getStatistics();
+ while (statisticIterator.hasNext()) {
+ SparkStatistic statistic = statisticIterator.next();
+ LOG.info("\t" + statistic.getName() + ": " + statistic.getValue());
+ }
+ }
+ }
+
/**
* close will move the temp files into the right place for the fetch
* task. If the job has failed it will clean up the files.
Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistic.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistic.java?rev=1636599&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistic.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistic.java Tue Nov 4 15:10:32 2014
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.spark.Statistic;
+
+public class SparkStatistic {
+ private final String name;
+ private final String value;
+
+ SparkStatistic(String name, String value) {
+ this.name = name;
+ this.value = value;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ public String getName() {
+ return name;
+ }
+}
Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticGroup.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticGroup.java?rev=1636599&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticGroup.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticGroup.java Tue Nov 4 15:10:32 2014
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.spark.Statistic;
+
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+public class SparkStatisticGroup {
+ private final String groupName;
+ private final List<SparkStatistic> statisticList;
+
+ SparkStatisticGroup(String groupName, List<SparkStatistic> statisticList) {
+ this.groupName = groupName;
+ this.statisticList = Collections.unmodifiableList(statisticList);
+ }
+
+ public String getGroupName() {
+ return groupName;
+ }
+
+ public Iterator<SparkStatistic> getStatistics() {
+ return this.statisticList.iterator();
+ }
+}
Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistics.java?rev=1636599&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistics.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatistics.java Tue Nov 4 15:10:32 2014
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.spark.Statistic;
+
+
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+public class SparkStatistics {
+ private final List<SparkStatisticGroup> statisticGroups;
+
+ SparkStatistics(List<SparkStatisticGroup> statisticGroups) {
+ this.statisticGroups = Collections.unmodifiableList(statisticGroups);
+ }
+
+ public Iterator<SparkStatisticGroup> getStatisticGroups() {
+ return this.statisticGroups.iterator();
+ }
+}
\ No newline at end of file
Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticsBuilder.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticsBuilder.java?rev=1636599&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticsBuilder.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/Statistic/SparkStatisticsBuilder.java Tue Nov 4 15:10:32 2014
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.spark.Statistic;
+
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounter;
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounterGroup;
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
+
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+public class SparkStatisticsBuilder {
+
+ private Map<String, List<SparkStatistic>> statisticMap;
+
+ public SparkStatisticsBuilder() {
+ statisticMap = new HashMap<String, List<SparkStatistic>>();
+ }
+
+ public SparkStatistics build() {
+ List<SparkStatisticGroup> statisticGroups = new LinkedList<SparkStatisticGroup>();
+ for (Map.Entry<String, List<SparkStatistic>> entry : statisticMap.entrySet()) {
+ String groupName = entry.getKey();
+ List<SparkStatistic> statisitcList = entry.getValue();
+ statisticGroups.add(new SparkStatisticGroup(groupName, statisitcList));
+ }
+
+ return new SparkStatistics(statisticGroups);
+ }
+
+ public SparkStatisticsBuilder add(SparkCounters sparkCounters) {
+ for (SparkCounterGroup counterGroup : sparkCounters.getSparkCounterGroups().values()) {
+ String groupDisplayName = counterGroup.getGroupDisplayName();
+ List<SparkStatistic> statisticList = statisticMap.get(groupDisplayName);
+ if (statisticList == null) {
+ statisticList = new LinkedList<SparkStatistic>();
+ statisticMap.put(groupDisplayName, statisticList);
+ }
+ for (SparkCounter counter : counterGroup.getSparkCounters().values()) {
+ String displayName = counter.getDisplayName();
+ statisticList.add(new SparkStatistic(displayName, Long.toString(counter.getValue())));
+ }
+ }
+ return this;
+ }
+
+ public SparkStatisticsBuilder add(String groupName, String name, String value) {
+ List<SparkStatistic> statisticList = statisticMap.get(groupName);
+ if (statisticList == null) {
+ statisticList = new LinkedList<SparkStatistic>();
+ statisticMap.put(groupName, statisticList);
+ }
+ statisticList.add(new SparkStatistic(name, value));
+ return this;
+ }
+}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java?rev=1636599&r1=1636598&r2=1636599&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java Tue Nov 4 15:10:32 2014
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.exec.spark.status;
+import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistics;
import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
import java.util.Map;
@@ -36,4 +37,6 @@ public interface SparkJobStatus {
public SparkCounters getCounter();
+ public SparkStatistics getSparkStatistics();
+
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java?rev=1636599&r1=1636598&r2=1636599&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java Tue Nov 4 15:10:32 2014
@@ -22,6 +22,8 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistics;
+import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder;
import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobState;
import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus;
@@ -120,6 +122,11 @@ public class SimpleSparkJobStatus implem
return sparkCounters;
}
+ @Override
+ public SparkStatistics getSparkStatistics() {
+ return new SparkStatisticsBuilder().add(sparkCounters).build();
+ }
+
private List<StageInfo> getStageInfo(int stageId) {
List<StageInfo> stageInfos = new LinkedList<StageInfo>();