You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/04/02 06:12:12 UTC

[32/50] [abbrv] git commit: Added the option to create an inventory csv file.

Added the option to create an inventory csv file.

The inventory hold the number of sensor and reading for each station.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/f60f8858
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/f60f8858
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/f60f8858

Branch: refs/heads/prestonc/hash_join
Commit: f60f885885409dd2781cbfda1baaf18c89911b53
Parents: a50cf44
Author: Preston Carman <pr...@apache.org>
Authored: Tue Mar 25 16:38:28 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Apr 1 20:56:25 2014 -0700

----------------------------------------------------------------------
 .../noaa-ghcn-daily/scripts/weather_cli.py      |  8 +++--
 .../scripts/weather_config_ghcnd.py             | 14 ++++----
 .../scripts/weather_convert_to_xml.py           | 35 ++++++++++++++++++--
 3 files changed, 46 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/f60f8858/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 52945e5..5bfa698 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -53,7 +53,7 @@ def main(argv):
             print '    -a        Append the results to the progress file.'
             print '    -f (str)  The file name of a specific station to process.'
             print '              * Helpful when testing a single stations XML file output.'
-            print '    -l (str)  Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, statistics).'
+            print '    -l (str)  Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, inventory, statistics).'
             print '    -m (int)  Limits the number of files created for each station.'
             print '              * Helpful when testing to make sure all elements are supported for each station.'
             print '              Alternate form: --max_station_files=(int)'
@@ -73,7 +73,7 @@ def main(argv):
                 print 'Error: Argument must be a file name for --file (-f).'
                 sys.exit()
         elif opt in ('-l', "--locality"):
-            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "partition_scheme", "test_links", "queries", "statistics"):
+            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "partition_scheme", "test_links", "queries", "inventory", "statistics"):
                 section = arg
             else:
                 print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
@@ -221,6 +221,10 @@ def main(argv):
             print 'Processing the queries section (' + dataset.get_name() + ').'
             benchmark.copy_query_files(reset)
     
+    if section in ("inventory"):
+        print 'Processing the inventory section.'
+        convert.process_inventory_file()
+                  
 #     if section in ("statistics"):
 #         print 'Processing the statistics section.'
 #         data.print_progress_file_stats(convert)

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/f60f8858/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
index 6d3bd9c..04fff52 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
@@ -86,10 +86,10 @@ STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
 STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
 
 # Details about the row.
-INVENTORY_FIELDS = []
-INVENTORY_FIELDS.append(['ID', 1, 11, 'Character'])
-INVENTORY_FIELDS.append(['LATITUDE', 13, 20, 'Real'])
-INVENTORY_FIELDS.append(['LONGITUDE', 22, 30, 'Real'])
-INVENTORY_FIELDS.append(['ELEMENT', 32, 35, 'Character'])
-INVENTORY_FIELDS.append(['FIRSTYEAR', 37, 40, 'Integer'])
-INVENTORY_FIELDS.append(['LASTYEAR', 42, 45, 'Integer'])
+INVENTORY_FIELDS = {}
+INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
+INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
+INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/f60f8858/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
index c115efa..a4f33a1 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -109,9 +109,10 @@ class WeatherConvertToXML:
 
         # Extra support files.
         self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
+        self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt'
         self.ghcnd_states = base_path + '/ghcnd-states.txt'
         self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
-
+        
         # MSHR support files.
         self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
         
@@ -160,6 +161,36 @@ class WeatherConvertToXML:
     def get_base_folder(self, station_id, data_type="sensors"):
         return build_base_save_folder(self.save_path, station_id, data_type) 
     
+    def process_inventory_file(self):
+        print "Processing inventory file"
+        file_stream = open(self.ghcnd_inventory, 'r')
+        
+        csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT',  'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS']
+        row = file_stream.readline()
+        csv_inventory = {}
+        for row in file_stream:
+            id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID'])
+            sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT'])
+            start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR']))
+            end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR']))
+            if id in csv_inventory:
+                new_count = str(int(csv_inventory[id][2]) + 1)
+                new_max = str(max(int(csv_inventory[id][3]), (end - start)))
+                new_total = str(int(csv_inventory[id][3]) + end - start)
+                csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count, new_max, new_total]
+            else:
+                csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)]
+                
+        path = self.save_path + "/inventory.csv"
+        self.save_csv_file(path, csv_inventory, csv_header)
+    
+    def save_csv_file(self, path, csv_inventory, header):
+        csv_content = "|".join(header) + "\n"
+        for row_id in csv_inventory:
+            csv_content += "|".join(csv_inventory[row_id]) + "\n"
+        self.save_file(path, csv_content)
+        
+
     def process_station_file(self, file_name):
         print "Processing station file: " + file_name
         file_stream = open(file_name, 'r')
@@ -333,7 +364,7 @@ class WeatherConvertToXML:
         country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
         country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
         if country_code != "" and country_name != "":
-            additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:"+country_code, country_name)
+            additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code, country_name)
         
         return additional_xml