You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ey...@apache.org on 2009/02/27 19:34:42 UTC
svn commit: r748632 - in /hadoop/core/trunk/src/contrib/chukwa: conf/aggregator.sql src/java/org/apache/hadoop/chukwa/database/Aggregator.java

Author: eyang
Date: Fri Feb 27 18:34:41 2009
New Revision: 748632

URL: http://svn.apache.org/viewvc?rev=748632&view=rev
Log:
HADOOP-5051.  

- Added macro token subsitution for sum(table_name) 
- Added correct hdfs throughput aggregation SQL macros.

Modified:
    hadoop/core/trunk/src/contrib/chukwa/conf/aggregator.sql
    hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Aggregator.java

Modified: hadoop/core/trunk/src/contrib/chukwa/conf/aggregator.sql
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/chukwa/conf/aggregator.sql?rev=748632&r1=748631&r2=748632&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/chukwa/conf/aggregator.sql (original)
+++ hadoop/core/trunk/src/contrib/chukwa/conf/aggregator.sql Fri Feb 27 18:34:41 2009
@@ -5,11 +5,11 @@
 #insert into [c] select if(AvgCPUBusy is null,0,AvgCPUBusy) as m, CASE WHEN MRJobName like 'PigLatin%' THEN 'Pig' WHEN MRJobName like 'streamjob%' THEN 'Streaming' WHEN MRJobName like '%abacus%' THEN 'Abacus' ELSE 'Other' END as interface, count(*)*j.NumOfMachines/60 as nodehours,count(distinct(MRJobID)) as jobs from HodJobDigest d join HodJob j on (d.HodID = j.HodID) join MRJob m on (m.HodID = j.HodID) where d.Timestamp >= '[past_10_minutes]' and d.Timestamp <= '[now]' and d.Timestamp >= m.LAUNCH_TIME and d.Timestamp <= m.FINISH_TIME group by AvgCPUBusy,CASE WHEN MRJobName like 'PigLatin%' THEN 'Pig' WHEN MRJobName like 'streamjob%' THEN 'Streaming' WHEN MRJobName like '%abacus%' THEN 'Abacus' ELSE 'Other' END order by if(AvgCPUBusy is null,0,AvgCPUBusy)
 #insert into [cluster_hadoop_mapred] (select timestamp,[avg(hadoop_mapred_job)] from [hadoop_mapred_job] where timestamp between '[past_10_minutes]' and '[now]' group by timestamp);
 replace into [cluster_system_metrics] (select timestamp,[avg(system_metrics)] from [system_metrics] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp);
-replace into [dfs_throughput] (select timestamp,[avg(dfs_datanode)] from [dfs_datanode] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp);
+replace into [dfs_throughput] (select timestamp,count(host),avg(block_reports_avg_time),sum(block_reports_num_ops),sum(block_verification_failures),sum(blocks_read),sum(blocks_removed),sum(blocks_replicated),sum(blocks_verified),sum(blocks_written),sum(bytes_read),sum(bytes_written),avg(copy_block_op_avg_time),sum(copy_block_op_num_ops),avg(heart_beats_avg_time),sum(heart_beats_num_ops),avg(read_block_op_avg_time),sum(read_block_op_num_ops),avg(read_metadata_op_avg_time),sum(read_metadata_op_num_ops),sum(reads_from_local_client),sum(reads_from_remote_client),avg(replace_block_op_avg_time),sum(replace_block_op_num_ops),count(session_id),avg(write_block_op_avg_time),sum(write_block_op_num_ops),sum(writes_from_local_client),sum(writes_from_remote_client) from (select timestamp,host,block_reports_avg_time,case host when @ph then block_reports_num_ops-@p_block_reports_num_ops else 0 end as block_reports_num_ops, case host when @ph then block_verification_failures-@p_block_verific
 ation_failures else 0 end as block_verification_failures,case host when @ph then blocks_read-@p_blocks_read else 0 end as blocks_read,case host when @ph then blocks_removed-@p_blocks_removed else 0 end as blocks_removed,case host when @ph then blocks_replicated-@p_blocks_replicated else 0 end as blocks_replicated,case host when @ph then blocks_verified-@p_blocks_verified else 0 end as blocks_verified,case host when @ph then blocks_written-@p_blocks_written else 0 end as blocks_written,case host when @ph then bytes_read-@p_bytes_read else 0 end as bytes_read,case host when @ph then bytes_written-@p_bytes_written else 0 end as bytes_written,copy_block_op_avg_time,case host when @ph then copy_block_op_num_ops-@p_copy_block_op_num_ops else 0 end as copy_block_op_num_ops,heart_beats_avg_time,case host when @ph then heart_beats_num_ops-@p_heart_beats_num_ops else 0 end as heart_beats_num_ops,read_block_op_avg_time,case host when @ph then read_block_op_num_ops-@p_read_block_op_num_
 ops else 0 end as read_block_op_num_ops,read_metadata_op_avg_time,case host when @ph then read_metadata_op_num_ops-@p_read_metadata_op_num_ops else 0 end as read_metadata_op_num_ops,case host when @ph then reads_from_local_client-@p_reads_from_local_client else 0 end as reads_from_local_client,case host when @ph then reads_from_remote_client-@p_reads_from_remote_client else 0 end as reads_from_remote_client,replace_block_op_avg_time,case host when @ph then replace_block_op_num_ops-@p_replace_block_op_num_ops else 0 end as replace_block_op_num_ops,session_id,write_block_op_avg_time,case host when @ph then write_block_op_num_ops-@p_write_block_op_num_ops else 0 end as write_block_op_num_ops,case host when @ph then writes_from_local_client-@p_writes_from_local_client else 0 end as writes_from_local_client,case host when @ph then writes_from_remote_client-@p_writes_from_remote_client else 0 end as writes_from_remote_client,@ph:=host,@p_block_reports_num_ops:=block_reports_num_op
 s,@p_block_verification_failures:=block_verification_failures,@p_blocks_read:=blocks_read,@p_blocks_removed:=blocks_removed,@p_blocks_replicated:=blocks_replicated,@p_blocks_verified:=blocks_verified,@p_blocks_written:=blocks_written,@p_bytes_read:=bytes_read,@p_bytes_written:=bytes_written,@p_copy_block_op_num_ops:=copy_block_op_num_ops,@p_heart_beats_num_ops:=heart_beats_num_ops,@p_read_block_op_num_ops:=read_block_op_num_ops,@p_read_metadata_op_num_ops:=read_metadata_op_num_ops,@p_reads_from_local_client:=reads_from_local_client,@p_reads_from_remote_client:=reads_from_remote_client,@p_replace_block_op_num_ops:=replace_block_op_num_ops,@p_write_block_op_num_ops:=write_block_op_num_ops,@p_writes_from_local_client:=writes_from_local_client,@p_writes_from_remote_client:=writes_from_remote_client from [dfs_datanode] where timestamp between '[past_15_minutes]' and '[past_5_minutes]' group by host,timestamp) as a where timestamp!='[past_15_minutes]' group by timestamp);
 replace into [cluster_disk] (select a.timestamp,a.mount,a.used,a.available,a.used_percent from (select from_unixtime(unix_timestamp(timestamp)-unix_timestamp(timestamp)%60)as timestamp,mount,avg(used) as used,avg(available) as available,avg(used_percent) as used_percent from [disk] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp,mount) as a group by a.timestamp, a.mount);
-replace delayed into [hod_job_digest] (select timestamp,d.hodid,d.userid,[avg(system_metrics)] from (select a.HodID,b.host as machine,a.userid,a.starttime,a.endtime from [HodJob] a join [hod_machine] b on (a.HodID = b.HodID) where endtime between '[past_10_minutes]' and '[past_5_minutes]') as d,[system_metrics] where timestamp between d.starttime and d.endtime and host=d.machine group by hodid,timestamp);
-replace into [cluster_hadoop_rpc] (select timestamp,[avg(hadoop_rpc)] from [hadoop_rpc] where timestamp between '[past_10_minutes]' and '[past_5_minutes]' group by timestamp);
-replace into [user_util] (select timestamp, j.UserID as user, sum(j.NumOfMachines) as node_total, sum(cpu_idle_pcnt*j.NumOfMachines) as cpu_unused, sum((cpu_user_pcnt+cpu_system_pcnt)*j.NumOfMachines) as cpu_used, avg(cpu_user_pcnt+cpu_system_pcnt) as cpu_used_pcnt, sum((100-(sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)*j.NumOfMachines) as disk_unused, sum(((sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)*j.NumOfMachines) as disk_used, avg(((sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)) as disk_used_pcnt, sum((100-eth0_busy_pcnt)*j.NumOfMachines) as network_unused, sum(eth0_busy_pcnt*j.NumOfMachines) as network_used, avg(eth0_busy_pcnt) as network_used_pcnt, sum((100-mem_used_pcnt)*j.NumOfMachines) as memory_unused, sum(mem_used_pcnt*j.NumOfMachines) as memory_used, avg(mem_used_pcnt) as memory_used_pcnt from [hod_job_digest] d,[HodJob] j where (d.HodID = j.HodID) and Timestamp between '[past_10_minutes]' and '[past_5_minutes]' group
  by j.UserID);
+#replace delayed into [hod_job_digest] (select timestamp,d.hodid,d.userid,[avg(system_metrics)] from (select a.HodID,b.host as machine,a.userid,a.starttime,a.endtime from [HodJob] a join [hod_machine] b on (a.HodID = b.HodID) where endtime between '[past_10_minutes]' and '[past_5_minutes]') as d,[system_metrics] where timestamp between d.starttime and d.endtime and host=d.machine group by hodid,timestamp);
+replace into [cluster_hadoop_rpc] (select timestamp, count(host), avg(rpc_processing_time_avg_time), sum(rpc_processing_time_num_ops), avg(rpc_queue_time_avg_time), sum(rpc_queue_time_num_ops), avg(get_build_version_avg_time), sum(get_build_version_num_ops), avg(get_job_counters_avg_time), sum(get_job_counters_num_ops), avg(get_job_profile_avg_time), sum(get_job_profile_num_ops), avg(get_job_status_avg_time), sum(get_job_status_num_ops), avg(get_new_job_id_avg_time), sum(get_new_job_id_num_ops), avg(get_protocol_version_avg_time), sum(get_protocol_version_num_ops), avg(get_system_dir_avg_time), sum(get_system_dir_num_ops), avg(get_task_completion_events_avg_time), sum(get_task_completion_events_num_ops), avg(get_task_diagnostics_avg_time), sum(get_task_diagnostics_num_ops), avg(heartbeat_avg_time), sum(heartbeat_num_ops), avg(killJob_avg_time), sum(killJob_num_ops), avg(submit_job_avg_time), sum(submit_job_num_ops) from [hadoop_rpc] where timestamp between '[past_10_minutes]
 ' and '[past_5_minutes]' group by timestamp);
+#replace into [user_util] (select timestamp, j.UserID as user, sum(j.NumOfMachines) as node_total, sum(cpu_idle_pcnt*j.NumOfMachines) as cpu_unused, sum((cpu_user_pcnt+cpu_system_pcnt)*j.NumOfMachines) as cpu_used, avg(cpu_user_pcnt+cpu_system_pcnt) as cpu_used_pcnt, sum((100-(sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)*j.NumOfMachines) as disk_unused, sum(((sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)*j.NumOfMachines) as disk_used, avg(((sda_busy_pcnt+sdb_busy_pcnt+sdc_busy_pcnt+sdd_busy_pcnt)/4)) as disk_used_pcnt, sum((100-eth0_busy_pcnt)*j.NumOfMachines) as network_unused, sum(eth0_busy_pcnt*j.NumOfMachines) as network_used, avg(eth0_busy_pcnt) as network_used_pcnt, sum((100-mem_used_pcnt)*j.NumOfMachines) as memory_unused, sum(mem_used_pcnt*j.NumOfMachines) as memory_used, avg(mem_used_pcnt) as memory_used_pcnt from [hod_job_digest] d,[HodJob] j where (d.HodID = j.HodID) and Timestamp between '[past_10_minutes]' and '[past_5_minutes]' grou
 p by j.UserID);
 #
 # Down sample metrics for charts
 replace into [system_metrics_month] (select timestamp,[group_avg(system_metrics)] from [system_metrics_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
@@ -17,40 +17,40 @@
 replace into [system_metrics_year] (select timestamp,[group_avg(system_metrics)] from [system_metrics_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
 replace into [system_metrics_decade] (select timestamp,[group_avg(system_metrics)] from [system_metrics_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
 #
-replace into [dfs_namenode_month] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [dfs_namenode_quarter] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [dfs_namenode_year] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [dfs_namenode_decade] (select timestamp,[group_avg(dfs_namenode)] from [dfs_namenode_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [dfs_datanode_month] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [dfs_datanode_quarter] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [dfs_datanode_year] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [dfs_datanode_decade] (select timestamp,[group_avg(dfs_datanode)] from [dfs_datanode_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [hadoop_rpc_month] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [hadoop_rpc_quarter] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [hadoop_rpc_year] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [hadoop_rpc_decade] (select timestamp,[group_avg(hadoop_rpc)] from [hadoop_rpc_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [cluster_hadoop_rpc_month] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
-replace into [cluster_hadoop_rpc_quarter] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
-replace into [cluster_hadoop_rpc_year] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
-replace into [cluster_hadoop_rpc_decade] (select timestamp,[avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
-#
-replace into [hadoop_mapred_month] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
-replace into [hadoop_mapred_quarter] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
-replace into [hadoop_mapred_year] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
-replace into [hadoop_mapred_decade] (select timestamp,[group_avg(hadoop_mapred)] from [hadoop_mapred_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
-#
-replace into [hadoop_jvm_month] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host,process_name);
-replace into [hadoop_jvm_quarter] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host,process_name);
-replace into [hadoop_jvm_year] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host,process_name);
-replace into [hadoop_jvm_decade] (select timestamp,[group_avg(hadoop_jvm)] from [hadoop_jvm_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host,process_name);
-#
-replace into [dfs_throughput_month] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
-replace into [dfs_throughput_quarter] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
-replace into [dfs_throughput_year] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
-replace into [dfs_throughput_decade] (select timestamp,[avg(dfs_throughput)] from [dfs_throughput_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
+replace into [dfs_namenode_month] (select timestamp, [group_avg(dfs_namenode)] from [dfs_namenode_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
+replace into [dfs_namenode_quarter] (select timestamp, [group_avg(dfs_namenode)] from [dfs_namenode_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
+replace into [dfs_namenode_year] (select timestamp, [group_avg(dfs_namenode)] from [dfs_namenode_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
+replace into [dfs_namenode_decade] (select timestamp, [group_avg(dfs_namenode)] from [dfs_namenode_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
+#
+replace into [dfs_datanode_month] (select timestamp, [group_avg(dfs_datanode)] from [dfs_datanode_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
+replace into [dfs_datanode_quarter] (select timestamp, [group_avg(dfs_datanode)] from [dfs_datanode_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
+replace into [dfs_datanode_year] (select timestamp, [group_avg(dfs_datanode)] from [dfs_datanode_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
+replace into [dfs_datanode_decade] (select timestamp, [group_avg(dfs_datanode)] from [dfs_datanode_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
+#
+replace into [hadoop_rpc_month] (select timestamp, [group_avg(hadoop_rpc)] from [hadoop_rpc_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
+replace into [hadoop_rpc_quarter] (select timestamp, [group_avg(hadoop_rpc)] from [hadoop_rpc_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
+replace into [hadoop_rpc_year] (select timestamp, [group_avg(hadoop_rpc)] from [hadoop_rpc_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
+replace into [hadoop_rpc_decade] (select timestamp, [group_avg(hadoop_rpc)]	from [hadoop_rpc_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
+#
+replace into [cluster_hadoop_rpc_month] (select timestamp, [avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
+replace into [cluster_hadoop_rpc_quarter] (select timestamp, [avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
+replace into [cluster_hadoop_rpc_year] (select timestamp, [avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
+replace into [cluster_hadoop_rpc_decade] (select timestamp, [avg(cluster_hadoop_rpc)] from [cluster_hadoop_rpc_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
+#
+replace into [hadoop_mapred_month] (select timestamp, [group_avg(hadoop_mapred)] from [hadoop_mapred_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host);
+replace into [hadoop_mapred_quarter] (select timestamp, [group_avg(hadoop_mapred)] from [hadoop_mapred_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host);
+replace into [hadoop_mapred_year] (select timestamp, [group_avg(hadoop_mapred)] from [hadoop_mapred_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host);
+replace into [hadoop_mapred_decade] (select timestamp, [group_avg(hadoop_mapred)] from [hadoop_mapred_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host);
+#
+replace into [hadoop_jvm_month] (select timestamp, [group_avg(hadoop_jvm)] from [hadoop_jvm_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),host,process_name);
+replace into [hadoop_jvm_quarter] (select timestamp, [group_avg(hadoop_jvm)] from [hadoop_jvm_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),host,process_name);
+replace into [hadoop_jvm_year] (select timestamp, [group_avg(hadoop_jvm)] from [hadoop_jvm_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),host,process_name);
+replace into [hadoop_jvm_decade] (select timestamp, [group_avg(hadoop_jvm)] from [hadoop_jvm_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),host,process_name);
+#
+replace into [dfs_throughput_month] (select timestamp, [avg(dfs_throughput)] from [dfs_throughput_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
+replace into [dfs_throughput_quarter] (select timestamp, [avg(dfs_throughput)] from [dfs_throughput_month] where timestamp between '[past_180_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
+replace into [dfs_throughput_year] (select timestamp, [avg(dfs_throughput)]	from [dfs_throughput_quarter] where timestamp between '[past_1080_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
+replace into [dfs_throughput_decade] (select timestamp, [avg(dfs_throughput)] from [dfs_throughput_year] where timestamp between '[past_4320_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
 #
 replace into [node_activity_month] (select timestamp,[avg(node_activity)] from [node_activity_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300));
 replace into [node_activity_quarter] (select timestamp,[avg(node_activity)] from [node_activity_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800));
@@ -77,12 +77,12 @@
 replace into [cluster_system_metrics_year] (select timestamp,[avg(cluster_system_metrics)] from [cluster_system_metrics_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800));
 replace into [cluster_system_metrics_decade] (select timestamp,[avg(cluster_system_metrics)] from [cluster_system_metrics_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200));
 #
-replace into [hod_job_digest_month] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),HodID);
-replace into [hod_job_digest_quarter] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),HodID);
-replace into [hod_job_digest_year] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),HodID);
-replace into [hod_job_digest_decade] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),HodID);
-#
-replace into [user_util_month] (select timestamp,[group_avg(user_util)] from [user_util_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),user);
-replace into [user_util_quarter] (select timestamp,[group_avg(user_util)] from [user_util_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),user);
-replace into [user_util_year] (select timestamp,[group_avg(user_util)] from [user_util_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),user);
-replace into [user_util_decade] (select timestamp,[group_avg(user_util)] from [user_util_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),user);
+#replace into [hod_job_digest_month] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),HodID);
+#replace into [hod_job_digest_quarter] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),HodID);
+#replace into [hod_job_digest_year] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),HodID);
+#replace into [hod_job_digest_decade] (select timestamp,[group_avg(hod_job_digest)] from [hod_job_digest_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),HodID);
+#
+#replace into [user_util_month] (select timestamp,[group_avg(user_util)] from [user_util_week] where timestamp between '[past_15_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/300),user);
+#replace into [user_util_quarter] (select timestamp,[group_avg(user_util)] from [user_util_month] where timestamp between '[past_90_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/1800),user);
+#replace into [user_util_year] (select timestamp,[group_avg(user_util)] from [user_util_quarter] where timestamp between '[past_540_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/10800),user);
+#replace into [user_util_decade] (select timestamp,[group_avg(user_util)] from [user_util_year] where timestamp between '[past_2160_minutes]' and '[now]' group by FLOOR(UNIX_TIMESTAMP(Timestamp)/43200),user);

Modified: hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Aggregator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Aggregator.java?rev=748632&r1=748631&r2=748632&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Aggregator.java (original)
+++ hadoop/core/trunk/src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/database/Aggregator.java Fri Feb 27 18:34:41 2009
@@ -81,7 +81,7 @@
 	public String computeMacro(String macro) throws SQLException {
 		Pattern p = Pattern.compile("past_(.*)_minutes");
 		Matcher matcher = p.matcher(macro);
-		if(macro.indexOf("avg(")==0 || macro.indexOf("group_avg(")==0) {
+		if(macro.indexOf("avg(")==0 || macro.indexOf("group_avg(")==0 || macro.indexOf("sum(")==0) {
 			String meta="";
 			String[] table = dbc.findTableName(macro.substring(macro.indexOf("(")+1,macro.indexOf(")")), current, current);
 			try {
@@ -108,12 +108,20 @@
 	            	} else if(type==java.sql.Types.DOUBLE ||
 	            			  type==java.sql.Types.FLOAT ||
 	            			  type==java.sql.Types.INTEGER) {
-	            		meta=meta+"avg("+name+")";
+	            		if(macro.indexOf("sum(")==0) {
+	            		    meta=meta+"sum("+name+")";	            			
+	            		} else {
+	            		    meta=meta+"avg("+name+")";
+	            		}
 		            	first=false;
 	            	} else if(type==java.sql.Types.TIMESTAMP) {
 	            		// Skip the column
 	            	} else {
-	            		meta=meta+"AVG("+name+")";
+	            		if(macro.indexOf("sum(")==0) {
+	            		    meta=meta+"SUM("+name+")";
+	            		} else {
+		            		meta=meta+"AVG("+name+")";	            			
+	            		}
 		            	first=false;
 	            	}
 	            }