You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/10/21 21:36:02 UTC

[01/14] Remove benchmark files to allow easy copy from other branch.

Repository: vxquery
Updated Branches:
  refs/heads/master 72fd5c645 -> e97888ed8


http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
deleted file mode 100644
index 5db090a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
+++ /dev/null
@@ -1,554 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import textwrap
-from datetime import date
-import os
-from collections import OrderedDict
-
-# Custom modules.
-from weather_config_ghcnd import *
-from weather_config_mshr import *
-from weather_download_files import *
-
-class WeatherConvertToXML:
-    
-    STATES = OrderedDict({
-        'AK': 'Alaska',
-        'AL': 'Alabama',
-        'AR': 'Arkansas',
-        'AS': 'American Samoa',
-        'AZ': 'Arizona',
-        'CA': 'California',
-        'CO': 'Colorado',
-        'CT': 'Connecticut',
-        'DC': 'District of Columbia',
-        'DE': 'Delaware',
-        'FL': 'Florida',
-        'GA': 'Georgia',
-        'GU': 'Guam',
-        'HI': 'Hawaii',
-        'IA': 'Iowa',
-        'ID': 'Idaho',
-        'IL': 'Illinois',
-        'IN': 'Indiana',
-        'KS': 'Kansas',
-        'KY': 'Kentucky',
-        'LA': 'Louisiana',
-        'MA': 'Massachusetts',
-        'MD': 'Maryland',
-        'ME': 'Maine',
-        'MI': 'Michigan',
-        'MN': 'Minnesota',
-        'MO': 'Missouri',
-        'MP': 'Northern Mariana Islands',
-        'MS': 'Mississippi',
-        'MT': 'Montana',
-        'NA': 'National',
-        'NC': 'North Carolina',
-        'ND': 'North Dakota',
-        'NE': 'Nebraska',
-        'NH': 'New Hampshire',
-        'NJ': 'New Jersey',
-        'NM': 'New Mexico',
-        'NV': 'Nevada',
-        'NY': 'New York',
-        'OH': 'Ohio',
-        'OK': 'Oklahoma',
-        'OR': 'Oregon',
-        'PA': 'Pennsylvania',
-        'PR': 'Puerto Rico',
-        'RI': 'Rhode Island',
-        'SC': 'South Carolina',
-        'SD': 'South Dakota',
-        'TN': 'Tennessee',
-        'TX': 'Texas',
-        'UT': 'Utah',
-        'VA': 'Virginia',
-        'VI': 'Virgin Islands',
-        'VT': 'Vermont',
-        'WA': 'Washington',
-        'WI': 'Wisconsin',
-        'WV': 'West Virginia',
-        'WY': 'Wyoming'
-    })
-    
-    MONTHS = [
-        "January",
-        "February",
-        "March",
-        "April",
-        "May",
-        "June",
-        "July",
-        "August",
-        "September",
-        "October",
-        "November",
-        "December"
-    ]
-    
-    token = ""
-    
-    def __init__(self, base_path, save_path, debug_output):
-        self.save_path = save_path
-        self.debug_output = debug_output
-
-        # Extra support files.
-        self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
-        self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt'
-        self.ghcnd_states = base_path + '/ghcnd-states.txt'
-        self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
-        
-        # MSHR support files.
-        self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
-        
-    def set_token(self, token):
-        self.token = token
-        
-    def get_field_from_definition(self, row, field_definition):
-        return row[(field_definition[FIELD_INDEX_START] - 1):field_definition[FIELD_INDEX_END]]
-    
-    def get_field(self, fields_array, row, index):
-        return row[(fields_array[index][FIELD_INDEX_START] - 1):fields_array[index][FIELD_INDEX_END]]
-    
-    def get_dly_field(self, row, index):
-        return self.get_field(DLY_FIELDS, row, index)
-    
-    def print_row_files(self, row):
-        for field in DLY_FIELDS:
-            print str(field[FIELD_INDEX_NAME]) + " = '" + row[(field[FIELD_INDEX_START] - 1):field[FIELD_INDEX_END]] + "'"
-    
-    def save_file(self, filename, contents):
-        file = open(filename, 'w')
-        file.write(contents)
-        file.close()
-        return filename
-    
-    def get_folder_size(self, folder_name):
-        total_size = 0
-        for dirpath, dirnames, filenames in os.walk(folder_name):
-            for f in filenames:
-                fp = os.path.join(dirpath, f)
-                total_size += os.path.getsize(fp)
-        return total_size
-
-    def process_one_month_sensor_set(self, records, page):
-        # Default
-        return 0
-    
-    def process_station_data(self, row):
-        # Default
-        return 0
-    
-    def get_base_folder(self, station_id, data_type="sensors"):
-        return build_base_save_folder(self.save_path, station_id, data_type) 
-    
-    def process_inventory_file(self):
-        print "Processing inventory file"
-        file_stream = open(self.ghcnd_inventory, 'r')
-        
-        csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT', 'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS']
-        row = file_stream.readline()
-        csv_inventory = {}
-        for row in file_stream:
-            id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID'])
-            sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT'])
-            start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR']))
-            end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR']))
-            if id in csv_inventory:
-                new_count = str(int(csv_inventory[id][2]) + 1)
-                new_max = str(max(int(csv_inventory[id][3]), (end - start)))
-                new_total = str(int(csv_inventory[id][3]) + end - start)
-                csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count, new_max, new_total]
-            else:
-                csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)]
-                
-        path = self.save_path + "/inventory.csv"
-        self.save_csv_file(path, csv_inventory, csv_header)
-    
-    def save_csv_file(self, path, csv_inventory, header):
-        csv_content = "|".join(header) + "\n"
-        for row_id in csv_inventory:
-            csv_content += "|".join(csv_inventory[row_id]) + "\n"
-        self.save_file(path, csv_content)
-        
-
-    def process_station_file(self, file_name):
-        print "Processing station file: " + file_name
-        file_stream = open(file_name, 'r')
-        
-        row = file_stream.readline()
-        return self.process_station_data(row)
-
-    def process_sensor_file(self, file_name, max_files, sensor_max=99):
-        print "Processing sensor file: " + file_name
-        file_stream = open(file_name, 'r')
-    
-        month_last = 0
-        year_last = 0
-        records = []
-        page = 0
-        sensor_count = 0
-    
-        file_count = 0
-        for row in file_stream:
-            month = self.get_dly_field(row, DLY_FIELD_MONTH)
-            year = self.get_dly_field(row, DLY_FIELD_YEAR)
-            
-            if (month_last != 0 and year_last != 0) and (sensor_count >= sensor_max or month != month_last or year != year_last):
-                # process set
-                file_count += self.process_one_month_sensor_set(records, page)
-                records = []
-                if sensor_count >= sensor_max and month == month_last and year == year_last:
-                    # start a new page.
-                    page += 1
-                else:
-                    # start over.
-                    page = 0
-                sensor_count = 0
-            
-            records.append(row)
-            sensor_count += 1
-            if max_files != 0 and file_count >= max_files:
-                # Stop creating more files after the max is reached.
-                break
-
-            month_last = month
-            year_last = year
-        
-        station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
-        data_size = self.get_folder_size(self.get_base_folder(station_id) + "/" + station_id)
-        print "Created " + str(file_count) + " XML files for a data size of " + str(data_size) + "."
-        
-        return (file_count, data_size)
-    
-    def convert_c2f(self, c):
-        return (9 / 5 * c) + 32
-    
-    def default_xml_web_service_start(self):
-        field_xml = ""
-        field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
-        return field_xml
-    
-    def default_xml_data_start(self, total_records):
-        field_xml = ""
-        field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + str(total_records) + "\">\n"
-        return field_xml
-    
-    def default_xml_station_start(self):
-        field_xml = ""
-        field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" totalCount=\"1\">\n"
-        return field_xml
-    
-    def default_xml_field_date(self, report_date, indent=2):
-        field_xml = ""
-        field_xml += self.get_indent_space(indent) + "<date>" + str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2) + "T00:00:00.000</date>\n"
-        return field_xml
-    
-    def default_xml_mshr_station_additional(self, station_id):
-        """The web service station data is generate from the MSHR data supplemented with GHCN-Daily."""
-        station_mshr_row = ""
-        stations_mshr_file = open(self.mshr_stations, 'r')
-        for line in stations_mshr_file:
-            if station_id == self.get_field_from_definition(line, MSHR_FIELDS['GHCND_ID']).strip():
-                station_mshr_row = line
-                break
-        
-        if station_mshr_row == "":
-            return ""
-
-        additional_xml = ""
-
-        county = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['COUNTY']).strip()
-        if county != "":
-            additional_xml += self.default_xml_location_labels("CNTY", "FIPS:-9999", county)
-            
-        country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
-        country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
-        if country_code != "" and country_name != "":
-            additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code, country_name)
-        
-        return additional_xml
-
-    def default_xml_location_labels(self, type, id, display_name):
-        label_xml = ""
-        label_xml += self.default_xml_start_tag("locationLabels", 2)
-        label_xml += self.default_xml_element("type", type, 3)
-        label_xml += self.default_xml_element("id", id, 3)
-        label_xml += self.default_xml_element("displayName", display_name, 3)
-        label_xml += self.default_xml_end_tag("locationLabels", 2)
-        return label_xml
-        
-
-    def default_xml_web_service_station(self, station_id):
-        """The web service station data is generate from available historical sources."""
-        station_ghcnd_row = ""
-        stations_ghcnd_file = open(self.ghcnd_stations, 'r')
-        for line in stations_ghcnd_file:
-            if station_id == self.get_field_from_definition(line, STATIONS_FIELDS['ID']):
-                station_ghcnd_row = line
-                break
-    
-        xml_station = ""
-        xml_station += self.default_xml_start_tag("station", 1)
-        
-        xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2)
-        xml_station += self.default_xml_element("displayName", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['NAME']).strip(), 2)
-        xml_station += self.default_xml_element("latitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LATITUDE']).strip(), 2)
-        xml_station += self.default_xml_element("longitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LONGITUDE']).strip(), 2)
-        
-        elevation = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['ELEVATION']).strip()
-        if elevation != "-999.9":
-            xml_station += self.default_xml_element("elevation", elevation, 2)
-        
-        state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip()
-        if state_code != "" and state_code in self.STATES:
-            xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code])
-        
-        # Add the MSHR data to the station generated information.
-        xml_station += self.default_xml_mshr_station_additional(station_id)
-            
-        xml_station += self.default_xml_end_tag("station", 1)
-        return xml_station
-        
-    def default_xml_day_reading_as_field(self, row, day):
-        day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
-        value = self.get_dly_field(row, day_index);
-        if value == "-9999":
-            return ""
-    
-        field_xml = ""
-        field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
-        if field_id in ("MDTN", "MDTX", "MNPN", "MXPN", "TMAX", "TMIN", "TOBS",):
-            # Add both the celcius and fahrenheit temperatures.
-            celcius = float(value) / 10
-            field_xml += "            <" + field_id + "_c>" + str(celcius) + "</" + field_id + "_c>\n"
-            fahrenheit = self.convert_c2f(celcius)
-            field_xml += "            <" + field_id + "_f>" + str(fahrenheit) + "</" + field_id + "_f>\n"
-        elif field_id in ("AWND", "EVAP", "PRCP", "THIC", "WESD", "WESF", "WSF1", "WSF2", "WSF5", "WSFG", "WSFI", "WSFM",):
-            # Field values that are in tenths.
-            converted_value = float(value) / 10
-            field_xml += "            <" + field_id + ">" + str(converted_value) + "</" + field_id + ">\n"
-        elif field_id in ("ACMC", "ACMH", "ACSC", "ACSH", "PSUN",):
-            # Fields is a percentage.
-            field_xml += "            <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
-        elif field_id in ("FMTM", "PGTM",):
-            # Fields is a time value HHMM.
-            field_xml += "            <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
-        elif field_id in ("DAEV", "DAPR", "DASF", "DATN", "DATX", "DAWM", "DWPR", "FRGB", "FRGT", "FRTH", "GAHT", "MDSF", "MDWM", "MDEV", "MDPR", "SNOW", "SNWD", "TSUN", "WDF1", "WDF2", "WDF5", "WDFG", "WDFI", "WDFM", "WDMV",):
-            # Fields with no alternation needed.
-            field_xml += "            <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
-        else:
-            field_xml += "            <unknown>" + field_id + "</unknown>\n"
-            
-        # print field_xml
-        return field_xml
-    
-    def default_xml_day_reading(self, row, day, indent=2):
-        day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
-        value = self.get_dly_field(row, day_index);
-        mflag = self.get_dly_field(row, day_index + 1);
-        qflag = self.get_dly_field(row, day_index + 2);
-        sflag = self.get_dly_field(row, day_index + 3);
-
-        if value == "-9999":
-            return ""
-
-        indent_space = self.get_indent_space(indent)
-        field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
-        station_id = "GHCND:" + self.get_dly_field(row, DLY_FIELD_ID)
-    
-        field_xml = ""
-        field_xml += indent_space + "<dataType>" + field_id + "</dataType>\n"
-        field_xml += indent_space + "<station>" + station_id + "</station>\n"
-        field_xml += indent_space + "<value>" + value.strip() + "</value>\n"
-        field_xml += indent_space + "<attributes>\n"
-        field_xml += indent_space + indent_space + "<attribute>" + mflag.strip() + "</attribute>\n"
-        field_xml += indent_space + indent_space + "<attribute>" + qflag.strip() + "</attribute>\n"
-        field_xml += indent_space + indent_space + "<attribute>" + sflag.strip() + "</attribute>\n"
-        field_xml += indent_space + indent_space + "<attribute></attribute>\n"
-        field_xml += indent_space + "</attributes>\n"
-
-        # print field_xml
-        return field_xml
-    
-    def default_xml_end(self):
-        return textwrap.dedent("""\
-            </ghcnd_observation>""")
-
-    def default_xml_data_end(self):
-        return self.default_xml_end_tag("dataCollection", 0)
-
-    def default_xml_station_end(self):
-        return self.default_xml_end_tag("stationCollection", 0)
-
-    def default_xml_element(self, tag, data, indent=1):
-        return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + tag + ">\n"
-
-    def default_xml_start_tag(self, tag, indent=1):
-        return self.get_indent_space(indent) + "<" + tag + ">\n"
-
-    def default_xml_end_tag(self, tag, indent=1):
-        return self.get_indent_space(indent) + "</" + tag + ">\n"
-
-    def get_indent_space(self, indent):
-        return (" " * (4 * indent))
-    
-
-class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
-    """The web service class details how to create files similar to the NOAA web service."""
-    skip_downloading = False
-    # Station data
-    def process_station_data(self, row):
-        """Adds a single station record file either from downloading the data or generating a similar record."""
-        station_id = self.get_dly_field(row, DLY_FIELD_ID)
-        download = 0
-        if self.token is not "" and not self.skip_downloading:
-            download = self.download_station_data(station_id, self.token, True)
-            if download == 0:
-                self.skip_downloading = True
-        
-        # If not downloaded, generate.
-        if download != 0:
-            return download
-        else:
-            # Information for each daily file.
-            station_xml_file = self.default_xml_web_service_start()
-            station_xml_file += self.default_xml_station_start()
-            station_xml_file += self.default_xml_web_service_station(station_id)
-            station_xml_file += self.default_xml_station_end()
-            
-            # Remove white space.
-            station_xml_file = station_xml_file.replace("\n", "");
-            station_xml_file = station_xml_file.replace(self.get_indent_space(1), "");
-
-            # Make sure the station folder is available.
-            ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
-            if not os.path.isdir(ghcnd_xml_station_path):
-                os.makedirs(ghcnd_xml_station_path)
-                    
-            # Save XML string to disk.
-            save_file_name = ghcnd_xml_station_path + station_id + ".xml"
-            save_file_name = self.save_file(save_file_name, station_xml_file)
-    
-            if save_file_name is not "":
-                if self.debug_output:
-                    print "Wrote file: " + save_file_name
-                return 1
-            else:
-                return 0
-
-    # Station data
-    def download_station_data(self, station_id, token, reset=False):
-        """Downloads the station data from the web service."""
-        import time
-        time.sleep(2)
-        # Make sure the station folder is available.
-        ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
-        if not os.path.isdir(ghcnd_xml_station_path):
-            os.makedirs(ghcnd_xml_station_path)
-                
-        # Build download URL.
-        url = "http://www.ncdc.noaa.gov/cdo-services/services/datasets/GHCND/stations/GHCND:" + station_id + ".xml?token=" + token
-        url_file = urllib.urlopen(url)
-        station_xml_file = ""
-        while (True):
-            line = url_file.readline()
-            if not line:
-                break
-            station_xml_file += line
-        
-        if station_xml_file.find("<cdoError>") != -1:
-            if self.debug_output:
-                print "Error in station download"
-            return 0
-        
-        # Save XML string to disk.
-        save_file_name = ghcnd_xml_station_path + station_id + ".xml"
-        save_file_name = self.save_file(save_file_name, station_xml_file)
-    
-        if save_file_name is not "":
-            if self.debug_output:
-                print "Wrote file: " + save_file_name
-            return 2
-        else:
-            return 0
-
-    # Sensor data
-    def process_one_month_sensor_set(self, records, page):
-        """Generates records for a station using the web service xml layout."""
-        found_data = False        
-        year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR))
-        month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH))
-    
-        station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
-
-        # Information for each daily file.
-        count = 0
-        daily_xml_file = ""
-        
-        for day in range(1, 32):
-            try:
-                # TODO find out what is a valid python date range? 1889?
-                # Attempt to see if this is valid date.
-                report_date = date(year, month, day)
-
-                for record in records:
-                    record_xml_snip = self.default_xml_day_reading(record, report_date.day)
-                    if record_xml_snip is not "":
-                        daily_xml_file += self.default_xml_start_tag("data")
-                        daily_xml_file += self.default_xml_field_date(report_date)
-                        daily_xml_file += record_xml_snip
-                        daily_xml_file += self.default_xml_end_tag("data")
-                        found_data = True
-                        count += 1
-
-            except ValueError:
-                pass
-
-        daily_xml_file = self.default_xml_web_service_start() + self.default_xml_data_start(count) + daily_xml_file + self.default_xml_data_end()
-        daily_xml_file = daily_xml_file.replace("\n", "");
-        daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), "");
-
-        if not found_data:
-            return 0
-
-        # Make sure the station folder is available.
-        ghcnd_xml_station_path = self.get_base_folder(station_id) + "/" + station_id + "/" + str(report_date.year) + "/"
-        if not os.path.isdir(ghcnd_xml_station_path):
-            os.makedirs(ghcnd_xml_station_path)
-                
-        # Save XML string to disk.
-        save_file_name = ghcnd_xml_station_path + build_sensor_save_filename(station_id, report_date, page)
-        save_file_name = self.save_file(save_file_name, daily_xml_file)
-
-        if save_file_name is not "":
-            if self.debug_output:
-                print "Wrote file: " + save_file_name
-            return 1
-        else:
-            return 0
-
-def build_base_save_folder(save_path, station_id, data_type="sensors"):
-    # Default
-    station_prefix = station_id[:3]
-    return save_path + data_type + "/" + station_prefix + "/"
-
-def build_sensor_save_filename(station_id, report_date, page):
-    # Default
-    return station_id + "_" + str(report_date.year).zfill(4) + str(report_date.month).zfill(2) + "_" + str(page) + ".xml"
-

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
deleted file mode 100644
index 4877120..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ /dev/null
@@ -1,416 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import glob
-import os.path
-import linecache
-import distutils.core
-
-from weather_convert_to_xml import *
-from collections import OrderedDict
-
-# Weather data files created to manage the conversion process.
-# Allows partition and picking up where you left off.
-class WeatherDataFiles:
-
-    LARGE_FILE_ROOT_TAG = "root"
-
-    INDEX_DATA_FILE_NAME = 0
-    INDEX_DATA_SENSORS_STATUS = 1
-    INDEX_DATA_STATION_STATUS = 2
-    INDEX_DATA_FILE_COUNT = 3
-    INDEX_DATA_FOLDER_DATA = 4
-
-    DATA_FILE_START_INDEX = 0
-    DATA_FILE_EXTENSION = ".dly"
-    DATA_FILE_MISSING = "missing"
-    DATA_FILE_INITIAL = "initialized"
-    DATA_FILE_DOWNLOADED = "downloaded"
-    DATA_FILE_GENERATED = "generated"
-    SEPERATOR = ","
-    
-    type = "sensor"
-    data_reset = False
-    
-    def __init__(self, base_path, progress_file_name="/tmp/_weather_data.csv"):
-        self.base_path = base_path
-
-        self.progress_file_name = progress_file_name
-        
-        self.current = self.DATA_FILE_START_INDEX
-        self.progress_data = []
-
-    def get_file_list_iterator(self):
-        """Return the list of files one at a time."""
-        return glob.iglob(self.base_path + "/*" + self.DATA_FILE_EXTENSION)
-
-    # Save Functions
-    def build_progress_file(self, options, convert):
-        if not os.path.isfile(self.progress_file_name) or 'reset' in options:
-            # Build a new file.
-            file = open(self.progress_file_name, 'w')
-            contents = self.get_default_progress_file_csv()
-            file.write(contents)
-            file.close()
-        elif 'append' in options or 'recalculate' in options:
-            self.open_progress_data()
-            row_count = len(self.progress_data)
-            for row in range(0, row_count):
-                row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
-                file_name = row_contents[self.INDEX_DATA_FILE_NAME]
-                if self.get_file_row(file_name) < 0 and 'append' in options: 
-                    self.progress_data.append(self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL))
-                elif 'recalculate' in options:
-                    # The folder is hard coded
-                    station_id = os.path.basename(file_name).split('.')[0]
-                    folder_name = convert.get_base_folder(station_id)
-                    if os.path.exists(folder_name):
-                        row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
-                        sensor_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
-                        station_status = row_contents[self.INDEX_DATA_STATION_STATUS]
-                        file_count = self.get_file_count(folder_name)
-                        data_size = self.get_folder_size(folder_name)
-                        self.progress_data[row] = self.get_progress_csv_row(file_name, sensor_status, station_status, file_count, data_size)
-                    else:
-                        self.progress_data[row] = self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
-            # Save file
-            self.close_progress_data(True)
-        self.reset()
-        
-    def copy_to_n_partitions(self, save_path, partitions, base_paths, reset):
-        """Once the initial data has been generated, the data can be copied into a set number of partitions. """
-        if (len(base_paths) == 0):
-            return
-        
-        # Initialize the partition paths.
-        partition_paths = get_partition_paths(0, partitions, base_paths)
-        for path in partition_paths:
-            # Make sure the xml folder is available.
-            prepare_path(path, reset)
-
-        import fnmatch
-        import os
-        
-        # copy stations and sensors into each partition
-        current_sensor_partition = 0
-        current_station_partition = 0
-        self.open_progress_data()
-        row_count = len(self.progress_data)
-        for row in range(0, row_count):
-            row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
-            file_name = row_contents[self.INDEX_DATA_FILE_NAME]
-            station_id = os.path.basename(file_name).split('.')[0]
-               
-            # Copy sensor files
-            type = "sensors"
-            file_path = build_base_save_folder(save_path, station_id, type) + station_id
-            for root, dirnames, filenames in os.walk(file_path):
-                for filename in fnmatch.filter(filenames, '*.xml'):
-                    xml_path = os.path.join(root, filename)
-                    new_file_base = build_base_save_folder(partition_paths[current_sensor_partition], station_id, type) + station_id
-                    if not os.path.isdir(new_file_base):
-                        os.makedirs(new_file_base)
-                    shutil.copyfile(xml_path, new_file_base + "/" + filename)
-                    current_sensor_partition += 1
-                    if current_sensor_partition >= len(partition_paths):
-                        current_sensor_partition = 0
-            
-            # Copy station files
-            type = "stations"
-            file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
-            new_file_base = build_base_save_folder(partition_paths[current_station_partition], station_id, type)
-            new_file_path = new_file_base + station_id + ".xml"
-            if os.path.isfile(file_path):
-                if not os.path.isdir(new_file_base):
-                    os.makedirs(new_file_base)
-                shutil.copyfile(file_path, new_file_path)
-            current_station_partition += 1
-            if current_station_partition >= len(partition_paths):
-                current_station_partition = 0
-
-    def build_to_n_partition_files(self, save_path, partitions, base_paths, reset):
-        """Once the initial data has been generated, the data can be divided into partitions 
-        and stored in single files.
-        """
-        if (len(base_paths) == 0):
-            return
-        
-        XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
-        
-        partition_paths = get_partition_paths(0, partitions, base_paths)
-
-        import fnmatch
-        import os
-        
-        for path in partition_paths:
-            prepare_path(path, reset)
-
-        # Initialize the partition paths.
-        types = ["sensors", "stations"]
-        for type in types:
-            partition_files = []
-            for path in partition_paths:
-                # Make sure the xml folder is available.
-                prepare_path(path + type + "/", False)
-                partition_files.append(open(path + type + "/partition.xml", 'w'))
-                partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
-
-            # copy into each partition
-            current_partition = 0
-            self.open_progress_data()
-            row_count = len(self.progress_data)
-            for row in range(0, row_count):
-                row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
-                file_name = row_contents[self.INDEX_DATA_FILE_NAME]
-                station_id = os.path.basename(file_name).split('.')[0]
-                
-                # Copy files
-                if type == "sensors":
-                    file_path = build_base_save_folder(save_path, station_id, type) + station_id
-                    for root, dirnames, filenames in os.walk(file_path):
-                        for filename in fnmatch.filter(filenames, '*.xml'):
-                            xml_path = os.path.join(root, filename)
-                            xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
-                            partition_files[current_partition].write(xml_data)
-                            current_partition += 1
-                            if current_partition >= len(partition_files):
-                                current_partition = 0
-                elif type == "stations":
-                    file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
-                    xml_path = os.path.join(root, file_path)
-                    xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
-                    partition_files[current_partition].write(xml_data)
-                    current_partition += 1
-                    if current_partition >= len(partition_paths):
-                        current_partition = 0
-
-            for row in range(0, len(partition_paths)):
-                partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
-                partition_files[row].close()
-
-    def get_file_row(self, file_name):
-        for i in range(0, len(self.progress_data)):
-            if self.progress_data[i].startswith(file_name):
-                return i
-        return -1
-        
-    def get_default_progress_file_csv(self):
-        contents = ""
-        for path in self.get_file_list_iterator():
-            file_name = os.path.basename(path)
-            contents += self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
-        return contents
-    
-    def print_progress_file_stats(self, convert):
-        sensor_count_missing = 0
-        sensor_count = 0
-        file_count = 0
-        data_size = 0
-        
-        sensor_count_actual = 0
-        file_count_actual = 0
-        data_size_actual = 0
-        
-        station_count_missing = 0
-        station_count_generated = 0
-        station_count_downloaded = 0
-        
-        self.open_progress_data()
-        row_count = len(self.progress_data)
-        for row in range(0, row_count):
-            row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
-            if int(row_contents[self.INDEX_DATA_FILE_COUNT]) != -1 and  int(row_contents[self.INDEX_DATA_FOLDER_DATA]) != -1:
-                sensor_count += 1
-                file_count += int(row_contents[self.INDEX_DATA_FILE_COUNT])
-                data_size += int(row_contents[self.INDEX_DATA_FOLDER_DATA])
-            else:
-                sensor_count_missing += 1
-                
-            if row_contents[self.INDEX_DATA_STATION_STATUS] == "generated":
-                station_count_generated += 1
-            if row_contents[self.INDEX_DATA_STATION_STATUS] == "downloaded":
-                station_count_downloaded += 1
-            else:
-                station_count_missing += 1
-
-            file_name = row_contents[self.INDEX_DATA_FILE_NAME]
-            station_id = os.path.basename(file_name).split('.')[0]
-            folder_name = convert.get_base_folder(station_id)
-            if os.path.exists(folder_name):
-                sensor_count_actual += 1
-                file_count_actual += self.get_file_count(folder_name)
-                data_size_actual += self.get_folder_size(folder_name)
-
-
-        print "Progress File:\t" + self.progress_file_name + "\n"
-        
-        print "CSV DETAILS OF PROCESSED SENSORS"
-        print "Number of stations:\t" + "{:,}".format(sensor_count)
-        print "Number of files:\t" + "{:,}".format(file_count)
-        print "Data size:\t\t" + "{:,}".format(data_size) + " Bytes\n"
-
-        print "CSV DETAILS OF unPROCESSED SENSORS"
-        print "Number of stations:\t" + "{:,}".format(sensor_count_missing) + "\n"
-
-        print "CSV DETAILS OF PROCESSED STATIONS"
-        print "Generated:\t\t" + "{:,}".format(station_count_generated)
-        print "Downloaded:\t\t" + "{:,}".format(station_count_downloaded)
-        print "Missing:\t\t" + "{:,}".format(station_count_missing) + "\n"
-
-        print "FOLDER DETAILS"
-        print "Number of stations:\t" + "{:,}".format(sensor_count_actual)
-        print "Number of files:\t" + "{:,}".format(file_count_actual)
-        print "Data size:\t\t" + "{:,}".format(data_size_actual) + " Bytes\n"
-
-    
-    def get_progress_csv_row(self, file_name, sensors_status, station_status, file_count=-1, data_size=-1):
-        return file_name + self.SEPERATOR + sensors_status + self.SEPERATOR + station_status + self.SEPERATOR + str(file_count) + self.SEPERATOR + str(data_size) + "\n"
-    
-    def update_file_sensor_status(self, file_name, sensors_status, file_count=-1, data_size=-1):
-        for row in range(0, len(self.progress_data)):
-            if self.progress_data[row].startswith(file_name):
-                station_status = self.progress_data[row].rsplit(self.SEPERATOR)[self.INDEX_DATA_STATION_STATUS]
-                self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
-                break
-
-        # Save the file            
-        self.close_progress_data(True)
-
-    def update_file_station_status(self, file_name, station_status):
-        for row in range(0, len(self.progress_data)):
-            if self.progress_data[row].startswith(file_name):
-                row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
-                sensors_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
-                file_count = int(row_contents[self.INDEX_DATA_FILE_COUNT])
-                data_size = int(row_contents[self.INDEX_DATA_FOLDER_DATA])
-                self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
-                break
-
-        # Save the file            
-        self.close_progress_data(True)
-
-    def get_file_count(self, folder_name):
-        count = 0
-        for dirpath, dirnames, filenames in os.walk(folder_name):
-            for f in filenames:
-                count += 1
-        return count
-
-    def get_folder_size(self, folder_name):
-        total_size = 0
-        for dirpath, dirnames, filenames in os.walk(folder_name):
-            for f in filenames:
-                fp = os.path.join(dirpath, f)
-                total_size += os.path.getsize(fp)
-        return total_size
-
-    def get_station_status(self, return_value):
-        if return_value == 2:
-            return self.DATA_FILE_DOWNLOADED
-        elif return_value == 1:
-            return self.DATA_FILE_GENERATED
-        return self.DATA_FILE_MISSING
-        
-    
-    def open_progress_data(self):
-        with open(self.progress_file_name, 'r') as file:
-            self.progress_data = file.readlines()
-
-    def close_progress_data(self, force=False):
-        if len(self.progress_data) > 0 or force:
-            with open(self.progress_file_name, 'w') as file:
-                file.writelines(self.progress_data)
-
-    
-    def reset(self):
-        self.close_progress_data()
-
-        self.current = self.DATA_FILE_START_INDEX
-        self.open_progress_data()
-
-    def set_type(self, type):
-        self.type = type
-
-    def set_data_reset(self, data_reset):
-        self.data_reset = data_reset
-
-
-    # Iterator Functions
-    def __iter__(self):
-        return self
-
-    def next(self):
-        columns = []
-        while True:
-            # find a row that has not been created.
-            if self.current >= len(self.progress_data):
-                raise StopIteration
-            row = self.progress_data[self.current]
-            self.current += 1
-            columns = row.rsplit(self.SEPERATOR)
-            if self.type == "sensor" and (columns[self.INDEX_DATA_SENSORS_STATUS].strip() != self.DATA_FILE_GENERATED or self.data_reset):
-                break
-            elif self.type == "station" and (columns[self.INDEX_DATA_STATION_STATUS].strip() != self.DATA_FILE_DOWNLOADED or self.data_reset):
-                break
-        return columns[self.INDEX_DATA_FILE_NAME]
-    
-    
-# Index values of each field details.
-PARTITION_INDEX_NODE = 0
-PARTITION_INDEX_DISK = 1
-PARTITION_INDEX_VIRTUAL = 2
-PARTITION_INDEX = 3
-PARTITION_INDEX_PATH = 4
-PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path")
-            
-def get_partition_paths(node_id, partitions, base_paths, key="partitions"):
-    partition_paths = []
-    for scheme in get_partition_scheme(node_id, partitions, base_paths, key):
-        partition_paths.append(scheme[PARTITION_INDEX_PATH])
-    return partition_paths
-
-def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):
-    partitions_per_disk = virtual_partitions / len(base_paths)
-    return get_disk_partition_scheme(node_id, partitions_per_disk, base_paths, key)
-
-def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"):
-    partition_paths = []
-    for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key):
-        partition_paths.append(scheme[PARTITION_INDEX_PATH])
-    return partition_paths
-
-def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"):
-    partition_scheme = []
-    for i in range(0, virtual_disk_partitions):
-        for j in range(0, len(base_paths)):
-            new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/"
-            partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path))
-    return partition_scheme
-
-def get_partition_folder(disks, partitions, index):        
-    return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
-
-def prepare_path(path, reset):
-    """Ensures the directory is available. If reset, then its a brand new directory."""
-    if os.path.isdir(path) and reset:
-        shutil.rmtree(path)
-                
-    if not os.path.isdir(path):
-        os.makedirs(path)
-
-def file_get_contents(filename):
-    with open(filename) as f:
-        return f.read()

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
deleted file mode 100644
index fb59b50..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os.path
-import shutil
-import tarfile
-import urllib
-import zipfile
-
-# Custom modules.
-from weather_config_ghcnd import *
-from weather_config_mshr import *
-
-class WeatherDownloadFiles:
-
-    def __init__(self, save_path):
-        self.save_path = save_path
-        
-        if not os.path.isdir(save_path):
-            os.makedirs(save_path)
-
-
-    def download_ghcnd_files(self, reset=False):
-        """Download the complete list."""
-        for file_name in FILE_NAMES:
-            url = BASE_DOWNLOAD_URL + file_name
-            self.download_file(url, reset)
-
-    def download_mshr_files(self, reset=False):
-        for url in MSHR_URLS:
-            self.download_file(url, reset)
-
-    def download_file(self, url, reset=False):
-        """Download the file, unless it exists."""
-        file_name = self.save_path + "/" + url.split('/')[-1]
-
-        if not os.path.isfile(file_name) or reset:
-            print "Downloading: " + url
-            urllib.urlretrieve(url, file_name, report_download_status)
-            print
-
-    def unzip_ghcnd_package(self, package, reset=False):
-        """Unzip the package file, unless it exists."""
-        file_name = self.save_path + "/" + package + ".tar.gz"
-        unzipped_path = self.save_path + "/" + package
-        
-        if os.path.isdir(unzipped_path) and reset:
-            shutil.rmtree(unzipped_path)
-            
-        if not os.path.isdir(unzipped_path):
-            print "Unzipping: " + file_name
-            tar_file = tarfile.open(file_name, 'r:gz')
-            tar_file.extractall(unzipped_path)
- 
-    def unzip_mshr_files(self, reset=False):
-        """Unzip the package file, unless it exists."""
-        for url in MSHR_URLS:
-            if url.endswith('.zip'):
-                file_name = self.save_path + "/" + url.split('/')[-1]
-                print "Unzipping: " + file_name
-                with zipfile.ZipFile(file_name, 'r') as myzip:
-                    myzip.extractall(self.save_path)
- 
-def report_download_status(count, block, size):
-    """Report download status."""
-    line_size = 50
-    erase = "\b" * line_size
-    sys.stdout.write(erase)
-    report = get_report_line((float(count) * block / size), line_size)
-    sys.stdout.write(report)
-
-def get_report_line(percentage, line_size):
-    """Creates a string to be used in reporting the percentage done."""
-    report = ""
-    for i in range(0, line_size):
-        if (float(i) / line_size < percentage):
-            report += "="
-        else:
-            report += "-"
-    return report
-            
-def download_file_save_as(url, new_file_name, reset=False):
-    """Download the file, unless it exists."""
-    if not os.path.isfile(new_file_name) or reset:
-        print "Downloading: " + url
-        urllib.urlretrieve(url, new_file_name, report_download_status)
-        print
-

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/README.md b/vxquery-benchmark/src/main/resources/util/README.md
deleted file mode 100644
index 8e2a204..0000000
--- a/vxquery-benchmark/src/main/resources/util/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-
-Utilities for Benchmark Operations
-=====================
-
-# Introduction
-
-Helpful scripts or configuration document to work with the benchmarks.
-
-## Saxon Collection
-
-To test the data with other XQuery processors, the saxon script helps with 
-creating a collection.xml file.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
deleted file mode 100644
index 02f39ee..0000000
--- a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import getopt, glob, os, sys
- 
-def main(argv):
-    xml_folder = ""
-     
-    # Get the base folder
-    try:
-        opts, args = getopt.getopt(argv, "f:h", ["folder="])
-    except getopt.GetoptError:
-        print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
-        print 'To see a full list of options try:'
-        print '  $ python build_saxon_collection_xml.py -h'
-        sys.exit(2)
-    for opt, arg in opts:
-        if opt == '-h':
-            print 'Options:'
-            print '    -f        The base folder to create collection XML file.'
-            sys.exit()
-        elif opt in ('-f', "--folder"):
-            # check if file exists.
-            if os.path.exists(arg):
-                xml_folder = arg
-            else:
-                print 'Error: Argument must be a folder name for --folder (-f).'
-                sys.exit()
-  
-    # Required fields to run the script.
-    if xml_folder == "" or not os.path.exists(xml_folder):
-        print 'Error: The folder path option must be supplied:  --folder (-f).'
-        sys.exit()
-      
-    # find all XML files in folder
-    collection_xml = "<collection>"
-    for i in range(1, 5):
-        # Search the ith directory level.
-        search_pattern = xml_folder + ('/*' * i) + '.xml'
-        for file_path in glob.iglob(search_pattern):
-            collection_xml += '<doc href="' + str.replace(file_path, xml_folder, '') + '"/>'
-    collection_xml += "</collection>"
-          
-    # create collection XML
-    file = open('collection.xml', 'w')
-    file.write(collection_xml)
-    file.close()
-
-if __name__ == "__main__":
-    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
deleted file mode 100644
index 1cd7939..0000000
--- a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import fnmatch
-import getopt
-import glob
-import os
-import sys
-import csv
-
-SEARCH_STRING = 'Average execution time:'
-
-def find_files(directory, pattern):
-    for root, dirs, files in os.walk(directory):
-        for basename in files:
-            if fnmatch.fnmatch(basename, pattern):
-                yield (root, basename)
-    
-    
-def main(argv):
-    ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
-    '''
-    log_folder = ""
-    save_file = ""
-    data_type = ""
-    
-    # Get the base folder
-    try:
-        opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
-    except getopt.GetoptError:
-        print 'The file options for list_xml_files.py were not correctly specified.'
-        print 'To see a full list of options try:'
-        print '  $ python list_xml_files.py -h'
-        sys.exit(2)
-    for opt, arg in opts:
-        if opt == '-h':
-            print 'Options:'
-            print '    -f        The base folder to build XML file list.'
-            print '    -s        The save file.'
-            sys.exit()
-        elif opt in ('-f', "--folder"):
-            # check if file exists.
-            if os.path.exists(arg):
-                log_folder = arg
-            else:
-                print 'Error: Argument must be a folder name for --folder (-f).'
-                sys.exit()
-        elif opt in ('-s', "--save_file"):
-            save_file = arg
-        elif opt in ('-t', "--data_type"):
-            data_type = arg
-  
-    # Required fields to run the script.
-    if log_folder == "" or not os.path.exists(log_folder):
-        print 'Error: The folder path option must be supplied:  --folder (-f).'
-        sys.exit()
-    if save_file == "":
-        print 'Error: The folder path option must be supplied:  --save_file (-s).'
-        sys.exit()
-      
-    list_xml_csv = ''
-    with open(save_file, 'w') as outfile:
-        csvfile = csv.writer(outfile)
-        for path, filename in find_files(log_folder, '*.log'):
-            # Only write out a specific type of data xml documents found in a specific path.
-            with open(path + "/" + filename) as infile:
-                folders = path.replace(log_folder, "")
-                for line in infile:
-                    # Skip the root tags.
-                    if line.startswith(SEARCH_STRING):
-                        time_split = line.split(" ")
-                        name_split = filename.split(".")
-                        folder_split = folders.split("/")
-
-                        # Build data row
-                        row = folder_split
-                        row.append(name_split[0])
-                        row.append(time_split[3])
-                        row.append(name_split[2])
-                        csvfile.writerow(row)
-        
-          
-if __name__ == "__main__":
-    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/log_top.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/log_top.sh b/vxquery-benchmark/src/main/resources/util/log_top.sh
deleted file mode 100755
index 4a2f7e1..0000000
--- a/vxquery-benchmark/src/main/resources/util/log_top.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOG_FILE=logs/top.log
-
-# Reset counters
-iostat >> /dev/null
-sar -n DEV 1 1  >> /dev/null
-
-# Save IO, CPU and Network snapshot to a log file.
-while (sleep 7)
-do
-	echo "---------------------------------------------" >> ${LOG_FILE}
-	date >> ${LOG_FILE}
-	echo >> ${LOG_FILE}
-	iostat -y 1 1 >> ${LOG_FILE}
-	top -n 1 -b | head -11 | tail -6 >> ${LOG_FILE}
-	sar -n DEV 1 1 >> ${LOG_FILE}
-done;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
deleted file mode 100644
index 9238a19..0000000
--- a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import fnmatch
-import getopt
-import glob
-import os
-import sys
-
-XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + "\n"
-XML_SUFFIX = '</root>' + "\n"
-
-def find_files(directory, pattern):
-    for root, dirs, files in os.walk(directory):
-        for basename in files:
-            if fnmatch.fnmatch(basename, pattern):
-                yield (root, basename)
-    
-    
-def main(argv):
-    ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
-    '''
-    xml_folder = ""
-    save_file = ""
-    data_type = ""
-     
-    # Get the base folder
-    try:
-        opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
-    except getopt.GetoptError:
-        print 'The file options for list_xml_files.py were not correctly specified.'
-        print 'To see a full list of options try:'
-        print '  $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors'
-        sys.exit(2)
-    for opt, arg in opts:
-        if opt == '-h':
-            print 'Options:'
-            print '    -f        The base folder to build XML file list.'
-            print '    -s        The save file.'
-            sys.exit()
-        elif opt in ('-f', "--folder"):
-            # check if file exists.
-            if os.path.exists(arg):
-                xml_folder = arg
-            else:
-                print 'Error: Argument must be a folder name for --folder (-f).'
-                sys.exit()
-        elif opt in ('-s', "--save_file"):
-            save_file = arg
-        elif opt in ('-t', "--data_type"):
-            data_type = arg
-  
-    # Required fields to run the script.
-    if xml_folder == "" or not os.path.exists(xml_folder):
-        print 'Error: The folder path option must be supplied:  --folder (-f).'
-        sys.exit()
-    if save_file == "":
-        print 'Error: The folder path option must be supplied:  --save_file (-s).'
-        sys.exit()
-      
-    list_xml_csv = ''
-    with open(save_file, 'w') as outfile:
-        outfile.write(XML_PREFIX)
-        for path, filename in find_files(xml_folder, '*.xml'):
-            # Only write out a specific type of data xml documents found in a specific path.
-            if data_type in path:
-                with open(path + "/" + filename) as infile:
-                    for line in infile:
-                        # Skip the root tags.
-                        if line != XML_PREFIX and line != XML_SUFFIX:
-                            outfile.write(line)
-        outfile.write(XML_SUFFIX)
-          
-if __name__ == "__main__":
-    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
deleted file mode 100644
index d0621eb..0000000
--- a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Function List :)
-(: VXQuery function list in csv with arguments and return types :)
-let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml"
-let $r :=
-    for $f in fn:doc($list)/functions/function
-        let $pl := 
-            for $p in $f/param
-            return $p/@type
-        return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
-return fn:string-join($r , '|')
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
deleted file mode 100644
index f485807..0000000
--- a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Function List :)
-(: VXQuery function list in csv with arguments and return types :)
-let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml"
-let $r :=
-    for $f in fn:doc($list)/operators/operator
-        let $pl := 
-            for $p in $f/param
-            return $p/@type
-        return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
-return fn:string-join($r , '|')
\ No newline at end of file


[07/14] git commit: Found missing file.

Posted by pr...@apache.org.
Found missing file.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/44d07d98
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/44d07d98
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/44d07d98

Branch: refs/heads/master
Commit: 44d07d98e404bb8786b62c14a9284660e36fc228
Parents: c182925
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:18:00 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:18:00 2014 -0700

----------------------------------------------------------------------
 .../vxquery/cli/VXQueryClusterShutdown.java     | 76 ++++++++++++++++++++
 1 file changed, 76 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/44d07d98/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java b/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java
new file mode 100644
index 0000000..37fe4f6
--- /dev/null
+++ b/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.cli;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.kohsuke.args4j.Argument;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+
+public class VXQueryClusterShutdown {
+    /**
+     * Main method to get command line options and execute query process.
+     * 
+     * @param args
+     * @throws Exception
+     */
+    public static void main(String[] args) throws Exception {
+        final CmdLineOptions opts = new CmdLineOptions();
+        CmdLineParser parser = new CmdLineParser(opts);
+
+        // parse command line options
+        try {
+            parser.parseArgument(args);
+        } catch (Exception e) {
+            parser.printUsage(System.err);
+            return;
+        }
+        
+        // give error message if missing arguments
+        if (opts.clientNetIpAddress == null) {
+            parser.printUsage(System.err);
+            return;
+        }
+        
+        try {
+            IHyracksClientConnection hcc = new HyracksConnection(opts.clientNetIpAddress, opts.clientNetPort);
+            hcc.stopCluster();
+        } catch (Exception e) {
+            System.err.println("Unable to connect and shutdown the Hyracks cluster.");
+            System.err.println(e);
+            return;
+        }
+    }
+
+    /**
+     * Helper class with fields and methods to handle all command line options
+     */
+    private static class CmdLineOptions {
+        @Option(name = "-client-net-ip-address", usage = "IP Address of the ClusterController", required = true)
+        private String clientNetIpAddress;
+
+        @Option(name = "-client-net-port", usage = "Port of the ClusterController")
+        private int clientNetPort = 1098;
+
+        @Argument
+        private List<String> arguments = new ArrayList<String>();
+    }
+
+}


[13/14] git commit: Continuing to tweak the MRQL scripts.

Posted by pr...@apache.org.
Continuing to tweak the MRQL scripts.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/9e0133ad
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/9e0133ad
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/9e0133ad

Branch: refs/heads/master
Commit: 9e0133adc899f580d89a47765da92b53c6d3ee17
Parents: 7f06298
Author: Preston Carman <pr...@apache.org>
Authored: Tue Oct 21 11:08:28 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Oct 21 11:08:28 2014 -0700

----------------------------------------------------------------------
 .../mrql_scripts/load_node_file.sh              | 29 +++++---
 .../mrql_scripts/run_group_test.sh              | 25 ++++---
 .../mrql_scripts/run_mrql_tests.sh              |  2 +-
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    |  2 +-
 .../RemoveUnusedSortDistinctNodesRule.java      | 19 +-----
 .../rules/util/CardinalityRuleToolbox.java      | 13 ----
 .../rewriter/rules/util/OperatorToolbox.java    | 72 --------------------
 .../vxquery/functions/builtin-functions.xml     |  1 +
 .../xmlquery/query/XMLQueryCompiler.java        |  2 +-
 .../src/main/resources/conf/cluster_example.xml | 12 ++--
 .../src/main/resources/conf/local.xml           | 18 ++---
 11 files changed, 57 insertions(+), 138 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
index ead0902..206c38b 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -19,20 +19,29 @@
 
 if [ -z "${1}" ]
 then
-    echo "Please enter the node number."
+    echo "Please enter the data set as the first argument."
     exit
 fi
 
-echo "Loading node ${1} data file in to cluster."
+if [ -z "${2}" ]
+then
+    echo "Please enter the node number as the second argument."
+    exit
+fi
+
+DATASET=${1}
+NODES=${2}
+
+echo "Loading ${NODES} node ${DATASET} data file in to cluster."
 
 # Add each sensor block
-cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_sensors_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_sensors_${1}.xml all/sensors
-rm -f disk1/hadoop/all_sensors_${1}.xml
+cp saved/backups/mr/${DATASET}_sensors_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_sensors_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_sensors_${NODES}.xml ${DATASET}/sensors
+rm -f disk1/hadoop/${DATASET}_sensors_${NODES}.xml
 
 # Add each station block
-cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_stations_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_stations_${1}.xml all/stations
-rm -f disk1/hadoop/all_stations_${1}.xml
+cp saved/backups/mr/${DATASET}_stations_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_stations_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_stations_${NODES}.xml ${DATASET}/stations
+rm -f disk1/hadoop/${DATASET}_stations_${NODES}.xml

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index f42a451..0208beb 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -19,11 +19,18 @@
 
 if [ -z "${1}" ]
 then
-    echo "Please enter the number of nodes."
+    echo "Please enter the data set as the first argument."
     exit
 fi
 
-NODES=${1}
+if [ -z "${2}" ]
+then
+    echo "Please enter the node number as the second argument."
+    exit
+fi
+
+DATASET=${1}
+NODES=${2}
 REPEAT=1
 
 # Start Hadoop
@@ -32,24 +39,26 @@ sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
 sleep 10
 
 # Prepare hadoop file system
-hadoop fs -mkdir all
+hadoop fs -mkdir ${DATASET}
 hadoop fs -ls 
-hadoop fs -mkdir all/sensors
-hadoop fs -mkdir all/stations
-hadoop fs -ls all
+hadoop fs -mkdir ${DATASET}/sensors
+hadoop fs -mkdir ${DATASET}/stations
+hadoop fs -ls ${DATASET}
+
+hadoop balancer
 
 
 # Upload test data
 COUNTER=0
 while [ ${COUNTER} -lt ${NODES} ];
 do
-    sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${COUNTER}
+    sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${DATASET} ${COUNTER}
     let COUNTER=COUNTER+1 
 done
 
 
 # Start test
-sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
+sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} ${DATASET}
 
 
 # Stop Hadoop

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
index 1e512e1..d6bc9ab 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -22,7 +22,7 @@
 
 NODES=${2}
 REPEAT=${3}
-DATASET="all"
+DATASET=${4}
 
 
 # Make log folder

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index 88339bd..5146586 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -52,7 +52,7 @@ do
         echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
         echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
         echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
-        fi;
+    fi;
 done
 
 if which programname >/dev/null;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
index 43d636b..43e2603 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
@@ -384,8 +384,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 // Find the last operator to set a variable and call this function again.
                 SubplanOperator subplan = (SubplanOperator) op;
                 for (int index = 0; index < subplan.getNestedPlans().size(); index++) {
-                    AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans().get(index)
-                            .getRoots().get(0).getValue();
+                    AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans()
+                            .get(index).getRoots().get(0).getValue();
                     updateVariableMap(lastOperator, cardinalityVariable, documentOrderVariables, uniqueNodesVariables,
                             vxqueryContext);
                 }
@@ -437,21 +437,6 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 break;
 
             // The following operators' analysis has not yet been implemented.
-            case CLUSTER:
-            case DISTINCT:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INSERT_DELETE:
-            case LIMIT:
-            case PARTITIONINGSPLIT:
-            case REPLICATE:
-            case RUNNINGAGGREGATE:
-            case SCRIPT:
-            case SINK:
-            case UNIONALL:
-            case UNNEST_MAP:
-            case UPDATE:
             default:
                 throw new RuntimeException("Operator (" + op.getOperatorTag()
                         + ") has not been implemented in rewrite rule.");

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
index a586c06..5b4594e 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
@@ -81,19 +81,6 @@ public class CardinalityRuleToolbox {
                 break;
 
             // The following operators' analysis has not yet been implemented.
-            case CLUSTER:
-            case DISTINCT:
-            case EXTENSION_OPERATOR:
-            case INDEX_INSERT_DELETE:
-            case INSERT_DELETE:
-            case PARTITIONINGSPLIT:
-            case REPLICATE:
-            case RUNNINGAGGREGATE:
-            case SCRIPT:
-            case SINK:
-            case UNIONALL:
-            case UNNEST_MAP:
-            case UPDATE:
             default:
                 throw new RuntimeException("Operator (" + op.getOperatorTag()
                         + ") has not been implemented in rewrite rule.");

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
index da85f2d..725a082 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
@@ -84,29 +84,6 @@ public class OperatorToolbox {
                 AbstractUnnestOperator auo = (AbstractUnnestOperator) op;
                 result.add(auo.getExpressionRef());
                 break;
-            case CLUSTER:
-            case DATASOURCESCAN:
-            case DISTINCT:
-            case DISTRIBUTE_RESULT:
-            case EMPTYTUPLESOURCE:
-            case EXCHANGE:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INSERT_DELETE:
-            case LIMIT:
-            case NESTEDTUPLESOURCE:
-            case ORDER:
-            case PARTITIONINGSPLIT:
-            case PROJECT:
-            case REPLICATE:
-            case SCRIPT:
-            case SINK:
-            case SUBPLAN:
-            case UNIONALL:
-            case UPDATE:
-            case WRITE:
-            case WRITE_RESULT:
             default:
                 // TODO Not yet implemented.
                 break;
@@ -129,32 +106,6 @@ public class OperatorToolbox {
             case UNNEST_MAP:
                 AbstractUnnestOperator ano = (AbstractUnnestOperator) op;
                 return ano.getExpressionRef();
-            case CLUSTER:
-            case DATASOURCESCAN:
-            case DISTINCT:
-            case DISTRIBUTE_RESULT:
-            case EMPTYTUPLESOURCE:
-            case EXCHANGE:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INNERJOIN:
-            case INSERT_DELETE:
-            case LEFTOUTERJOIN:
-            case LIMIT:
-            case NESTEDTUPLESOURCE:
-            case ORDER:
-            case PARTITIONINGSPLIT:
-            case PROJECT:
-            case REPLICATE:
-            case SCRIPT:
-            case SELECT:
-            case SINK:
-            case SUBPLAN:
-            case UNIONALL:
-            case UPDATE:
-            case WRITE:
-            case WRITE_RESULT:
             default:
                 // TODO Not yet implemented.
                 break;
@@ -196,29 +147,6 @@ public class OperatorToolbox {
             case EMPTYTUPLESOURCE:
             case NESTEDTUPLESOURCE:
                 return null;
-            case CLUSTER:
-            case DISTINCT:
-            case DISTRIBUTE_RESULT:
-            case EXCHANGE:
-            case EXTENSION_OPERATOR:
-            case GROUP:
-            case INDEX_INSERT_DELETE:
-            case INNERJOIN:
-            case INSERT_DELETE:
-            case LEFTOUTERJOIN:
-            case LIMIT:
-            case ORDER:
-            case PARTITIONINGSPLIT:
-            case PROJECT:
-            case REPLICATE:
-            case SCRIPT:
-            case SELECT:
-            case SINK:
-            case SUBPLAN:
-            case UNIONALL:
-            case UPDATE:
-            case WRITE:
-            case WRITE_RESULT:
             default:
                 // Skip operators and go look at input.
                 for (Mutable<ILogicalOperator> input : op.getInputs()) {

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
index b439a83..38f03a4 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
+++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
@@ -125,6 +125,7 @@
     <function name="fn:collection">
         <param name="arg" type="xs:string?"/>
         <return type="node()*"/>
+        <!-- Collection operator is added during the rewrite rules phase.  -->
     </function>
     
     <!-- fn:compare($comparand1  as xs:string?, $comparand2 as xs:string?)  as xs:integer?  -->

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
index 966bd87..3cdc492 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
@@ -113,7 +113,7 @@ public class XMLQueryCompiler {
                 });
         builder.getPhysicalOptimizationConfig().setFrameSize(this.frameSize);
         if (joinHashSize > 0) {
-            builder.getPhysicalOptimizationConfig().setInMemHashJoinTableSize(joinHashSize);
+            builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinHashSize);
         }
         builder.setLogicalRewrites(buildDefaultLogicalRewrites());
         builder.setPhysicalRewrites(buildDefaultPhysicalRewrites());

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/cluster_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/cluster_example.xml b/vxquery-server/src/main/resources/conf/cluster_example.xml
index 41d642d..18d9173 100644
--- a/vxquery-server/src/main/resources/conf/cluster_example.xml
+++ b/vxquery-server/src/main/resources/conf/cluster_example.xml
@@ -15,13 +15,13 @@
   limitations under the License.
 -->
 <cluster xmlns="cluster">
-	<name>local</name>
+    <name>local</name>
     <username>joe</username>
-	<master_node>
-		<id>master</id>
-		<client_ip>128.195.52.177</client_ip>
-    	<cluster_ip>192.168.100.0</cluster_ip>
-	</master_node>
+    <master_node>
+        <id>master</id>
+        <client_ip>128.195.52.177</client_ip>
+        <cluster_ip>192.168.100.0</cluster_ip>
+    </master_node>
     <node>
         <id>nodeA</id>
         <cluster_ip>192.168.100.1</cluster_ip>

http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/local.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/local.xml b/vxquery-server/src/main/resources/conf/local.xml
index 5c27e11..4a48620 100644
--- a/vxquery-server/src/main/resources/conf/local.xml
+++ b/vxquery-server/src/main/resources/conf/local.xml
@@ -15,14 +15,14 @@
   limitations under the License.
 -->
 <cluster xmlns="cluster">
-	<name>local</name>
-	<master_node>
-		<id>master</id>
-		<client_ip>127.0.0.1</client_ip>
-    	<cluster_ip>127.0.0.1</cluster_ip>
-	</master_node>
-	<node>
-		<id>node1</id>
-		<cluster_ip>127.0.0.1</cluster_ip>
+    <name>local</name>
+    <master_node>
+        <id>master</id>
+        <client_ip>127.0.0.1</client_ip>
+        <cluster_ip>127.0.0.1</cluster_ip>
+    </master_node>
+    <node>
+        <id>node1</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
     </node>
 </cluster>


[06/14] git commit: copy of all changes in exrt benchmark queries that is copyright free.

Posted by pr...@apache.org.
copy of all changes in exrt benchmark queries that is copyright free.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/c182925c
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/c182925c
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/c182925c

Branch: refs/heads/master
Commit: c182925cc23f8662077150da7f6c9a3d67a6fbc8
Parents: 3167366
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:15:03 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:15:03 2014 -0700

----------------------------------------------------------------------
 .../main/resources/noaa-ghcn-daily/README.md    |  40 ++
 .../noaa-ghcn-daily/conf/weather_example.xml    |  35 ++
 .../conf/weather_example_cluster.xml            |  58 ++
 .../noaa-ghcn-daily/other_systems/mrql/q00.mrql |  23 +
 .../noaa-ghcn-daily/other_systems/mrql/q01.mrql |  21 +
 .../noaa-ghcn-daily/other_systems/mrql/q02.mrql |  24 +
 .../noaa-ghcn-daily/other_systems/mrql/q03.mrql |  22 +
 .../noaa-ghcn-daily/other_systems/mrql/q04.mrql |  24 +
 .../other_systems/mrql/q04_count_sensor.mrql    |  21 +
 .../other_systems/mrql/q04_count_station.mrql   |  23 +
 .../noaa-ghcn-daily/other_systems/mrql/q05.mrql |  27 +
 .../other_systems/mrql/q05_count_sensor.mrql    |  23 +
 .../other_systems/mrql/q05_count_station.mrql   |  23 +
 .../noaa-ghcn-daily/other_systems/mrql/q06.mrql |  26 +
 .../other_systems/mrql/q06_count_sensor.mrql    |  23 +
 .../other_systems/mrql/q06_count_station.mrql   |  23 +
 .../noaa-ghcn-daily/other_systems/mrql/q07.mrql |  26 +
 .../other_systems/mrql/q07_count_join.mrql      |  26 +
 .../other_systems/mrql/q07_count_tmax.mrql      |  22 +
 .../other_systems/mrql/q07_count_tmin.mrql      |  22 +
 .../other_systems/mrql_scripts/clear.sh         |  19 +
 .../mrql_scripts/run_mrql_tests.sh              |  46 ++
 .../other_systems/mrql_scripts/start.sh         |  20 +
 .../other_systems/mrql_scripts/stop.sh          |  20 +
 .../other_systems/saxon/count_sensor.xq         |   7 +
 .../other_systems/saxon/count_station.xq        |   7 +
 .../noaa-ghcn-daily/other_systems/saxon/q00.xq  |  15 +
 .../noaa-ghcn-daily/other_systems/saxon/q01.xq  |   8 +
 .../noaa-ghcn-daily/other_systems/saxon/q02.xq  |  14 +
 .../noaa-ghcn-daily/other_systems/saxon/q03.xq  |   8 +
 .../noaa-ghcn-daily/other_systems/saxon/q04.xq  |  30 +
 .../other_systems/saxon/q04_count_sensor.xq     |  10 +
 .../other_systems/saxon/q04_count_station.xq    |   8 +
 .../noaa-ghcn-daily/other_systems/saxon/q05.xq  |  33 ++
 .../other_systems/saxon/q05_count_sensor.xq     |  11 +
 .../other_systems/saxon/q05_count_station.xq    |   8 +
 .../noaa-ghcn-daily/other_systems/saxon/q06.xq  |  30 +
 .../other_systems/saxon/q06_count_sensor.xq     |   8 +
 .../other_systems/saxon/q06_count_station.xq    |   5 +
 .../noaa-ghcn-daily/other_systems/saxon/q07.xq  |  15 +
 .../other_systems/saxon/q07_count_tmax.xq       |   9 +
 .../other_systems/saxon/q07_count_tmin.xq       |   9 +
 .../saxon_scripts/run_saxon_tests.sh            |  44 ++
 .../noaa-ghcn-daily/queries/count_sensor.xq     |  24 +
 .../noaa-ghcn-daily/queries/count_station.xq    |  24 +
 .../noaa-ghcn-daily/queries/no_result.xq        |  24 +
 .../resources/noaa-ghcn-daily/queries/q00.xq    |  31 ++
 .../resources/noaa-ghcn-daily/queries/q01.xq    |  25 +
 .../resources/noaa-ghcn-daily/queries/q02.xq    |  30 +
 .../resources/noaa-ghcn-daily/queries/q03.xq    |  25 +
 .../resources/noaa-ghcn-daily/queries/q04.xq    |  32 ++
 .../noaa-ghcn-daily/queries/q04_count_join.xq   |  34 ++
 .../noaa-ghcn-daily/queries/q04_count_sensor.xq |  29 +
 .../queries/q04_count_station.xq                |  28 +
 .../resources/noaa-ghcn-daily/queries/q05.xq    |  33 ++
 .../noaa-ghcn-daily/queries/q05_count_join.xq   |  35 ++
 .../noaa-ghcn-daily/queries/q05_count_sensor.xq |  31 ++
 .../queries/q05_count_station.xq                |  28 +
 .../resources/noaa-ghcn-daily/queries/q06.xq    |  30 +
 .../noaa-ghcn-daily/queries/q06_count_join.xq   |  34 ++
 .../noaa-ghcn-daily/queries/q06_count_sensor.xq |  29 +
 .../queries/q06_count_station.xq                |  27 +
 .../resources/noaa-ghcn-daily/queries/q07.xq    |  33 ++
 .../noaa-ghcn-daily/queries/q07_count_join.xq   |  35 ++
 .../noaa-ghcn-daily/queries/q07_count_tmax.xq   |  28 +
 .../noaa-ghcn-daily/queries/q07_count_tmin.xq   |  28 +
 .../resources/noaa-ghcn-daily/scripts/README.md |  51 ++
 .../scripts/benchmark_logging.properties        |   1 +
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    |  68 +++
 .../scripts/run_benchmark_cluster.sh            |  90 +++
 .../noaa-ghcn-daily/scripts/run_group_test.sh   |  51 ++
 .../noaa-ghcn-daily/scripts/run_mrql_tests.sh   |  42 ++
 .../scripts/weather_benchmark.py                | 377 +++++++++++++
 .../noaa-ghcn-daily/scripts/weather_cli.py      | 236 ++++++++
 .../noaa-ghcn-daily/scripts/weather_config.py   | 134 +++++
 .../scripts/weather_config_ghcnd.py             |  95 ++++
 .../scripts/weather_config_mshr.py              |  78 +++
 .../scripts/weather_convert_to_xml.py           | 554 +++++++++++++++++++
 .../scripts/weather_data_files.py               | 406 ++++++++++++++
 .../scripts/weather_download_files.py           | 102 ++++
 .../src/main/resources/util/README.md           |  28 +
 .../util/build_saxon_collection_xml.py          |  63 +++
 .../src/main/resources/util/diff_xml_files.py   |  97 ++++
 .../resources/util/find_averages_in_logs.py     |  97 ++++
 .../src/main/resources/util/list_xml_files.py   |  72 +++
 .../src/main/resources/util/merge_xml_files.py  |  88 +++
 .../main/resources/util/vxquery_functions.xq    |  27 +
 .../main/resources/util/vxquery_operators.xq    |  27 +
 .../java/org/apache/vxquery/cli/VXQuery.java    |   1 +
 vxquery-server/pom.xml                          |   4 +
 .../main/resources/scripts/cluster_actions.py   |  16 +-
 .../src/main/resources/scripts/cluster_cli.py   |   6 +-
 .../resources/scripts/cluster_information.py    |  27 +-
 .../src/main/resources/scripts/startcc.sh       |   6 +-
 .../src/main/resources/scripts/startnc.sh       |   4 +-
 .../src/main/resources/scripts/stopcc.sh        |   3 +-
 .../src/main/resources/scripts/stopcluster.sh   |  49 ++
 .../src/main/resources/scripts/stopnc.sh        |   2 +-
 98 files changed, 4473 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
new file mode 100644
index 0000000..9b512dd
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
@@ -0,0 +1,40 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+NOAA GHCN-Daily Benchmark
+=====================
+
+# Introduction
+
+The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY) 
+.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor 
+readings. Using the RSS feed as a template, the GHCN-DAILY historical 
+information is used to generate past RSS feed XML documents. The process allows 
+testing on a large set of information with out having to continually monitor 
+the weather.gov site for all the weather details for years.
+
+# Detailed Description
+
+Detailed GHDN-DAILY information: 
+<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
+
+# Folders
+
+ * conf
+ * other_systems
+ * queries
+ * scripts
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
new file mode 100644
index 0000000..2c15a33
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
@@ -0,0 +1,35 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<data xmlns="data">
+    <name>Local Example</name>
+    <save_path>/data</save_path>
+    <package>ghcnd_all</package>
+    <node>
+        <id>localhost</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
+    </node>
+    <dataset>
+        <name>tiny-example</name>
+        <test>local_speed_up</test>
+        <save_path>/data</save_path>
+        <partition_type>small_files</partition_type>
+        <partitions_per_path>1</partitions_per_path>
+        <partitions_per_path>2</partitions_per_path>
+        <partitions_per_path>4</partitions_per_path>
+        <partitions_per_path>8</partitions_per_path>
+    </dataset>
+</data>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
new file mode 100644
index 0000000..7d05ac0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
@@ -0,0 +1,58 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<data xmlns="data">
+    <name>Cluster Example</name>
+    <save_path>/data</save_path>
+    <package>ghcnd_all</package>
+    <node>
+        <id>machine1</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
+    </node>
+    <node>
+        <id>machine2</id>
+        <cluster_ip>127.0.0.2</cluster_ip>
+    </node>
+    <node>
+        <id>machine3</id>
+        <cluster_ip>127.0.0.3</cluster_ip>
+    </node>
+    <node>
+        <id>machine4</id>
+        <cluster_ip>127.0.0.4</cluster_ip>
+    </node>
+    <node>
+        <id>machine5</id>
+        <cluster_ip>127.0.0.5</cluster_ip>
+    </node>
+    <dataset>
+        <name>tiny-1drive</name>
+        <test>speed_up</test>
+        <test>batch_scale_out</test>
+        <save_path>/data</save_path>
+        <partition_type>small_files</partition_type>
+        <partitions_per_path>1</partitions_per_path>
+    </dataset>
+    <dataset>
+        <name>small-2drives</name>
+        <test>speed_up</test>
+        <test>batch_scale_out</test>
+        <save_path>/data</save_path>
+        <save_path>/data2</save_path>
+        <partition_type>large_files</partition_type>
+        <partitions_per_path>1</partitions_per_path>
+    </dataset>
+</data>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql
new file mode 100644
index 0000000..49d005e
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (r)
+from r in source(xml, args[0], {"data"})
+where text(r.station) = "GHCND:USW00012836"
+    and toInt(substring(text(r.date), 0, 4)) >= 2003
+    and toInt(substring(text(r.date), 5, 7)) = 12
+    and toInt(substring(text(r.date), 8, 10)) = 25
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql
new file mode 100644
index 0000000..f4cbd45
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (r)
+from r in source(xml, args[0], {"data"})
+where text(r.dataType) = "AWND"
+    and toFloat(text(r.value)) > 491.744
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql
new file mode 100644
index 0000000..5b7b507
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+sum(
+    select (toInt(text(r.value)))
+    from r in source(xml, args[0], {"data"})
+    where text(r.station) = "GHCND:USW00014771"
+        and toInt(substring(text(r.date), 0, 4)) = 1999
+        and text(r.dataType) = "PRCP"
+) / 10
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql
new file mode 100644
index 0000000..b444e55
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+max(
+    select (toInt(text(r.value)))
+    from r in source(xml, args[0], {"data"})
+    where text(r.dataType) = "TMAX"
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql
new file mode 100644
index 0000000..1b36852
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (sensors)
+from sensors in source(xml, args[0], {"data"}),
+    stations in source(xml, args[1], {"station"}),
+    l in stations.locationLabels
+where text(stations.id) = text(sensors.station) 
+    and text(sensors.date) = "1976-07-04T00:00:00.000"
+    and text(l.displayName) = "Washington"
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql
new file mode 100644
index 0000000..da0eae3
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (r)
+from r in source(xml, args[0], {"data"})
+where text(r.date) = "1976-07-04T00:00:00.000"
+  and text(r.dataType) = "TMAX"
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql
new file mode 100644
index 0000000..16bbe5a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (r)
+    from t in source(xml, args[1], {"station"}),
+        l in t.locationLabels
+    where text(l.displayName) = "WASHINGTON"
+)
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql
new file mode 100644
index 0000000..230f6e7
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+min(
+    select (toInt(text(sensors.value)))
+    from sensors in source(xml, args[0], {"data"}),
+        stations in source(xml, args[1], {"station"}),
+        l in stations.locationLabels
+    where text(stations.id) = text(sensors.station) 
+        and toInt(substring(text(sensors.date), 0, 4)) = 2001
+        and text(sensors.dataType) = "TMIN"
+        and text(l.id) = "FIPS:US"
+) / 10
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql
new file mode 100644
index 0000000..dd801ed
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (toInt(text(r.value)))
+    from r in source(xml, args[0], {"data"})
+    where toInt(substring(text(r.date), 0, 4)) = 2001
+        and text(r.dataType) = "TMIN"
+)
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql
new file mode 100644
index 0000000..d8fcacc
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (t)
+    from t in source(xml, args[1], {"station"}),
+        l in t.locationLabels
+    where text(l.id) = "FIPS:US"
+)
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
new file mode 100644
index 0000000..583a5b9
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (n, d, v)
+from sensors in source(xml, args[0], {"data"}),
+    d in sensors.date,
+    v in sensors.value,
+    stations in source(xml, args[1], {"station"}),
+    n in stations.displayName,
+where text(stations.id) = text(sensors.station) 
+    and toInt(substring(text(d), 0, 4)) = 2000
+    and text(sensors.dataType) = "TMAX"
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql
new file mode 100644
index 0000000..22e5918
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (r.date, r.value)
+    from r in source(xml, args[0], {"data"})
+    where toInt(substring(text(r.date), 0, 4)) = 2000
+        and text(r.dataType) = "TMAX"
+)
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql
new file mode 100644
index 0000000..9fb9e84
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (t.displayName)
+    from t in source(xml, args[1], {"station"}),
+        l in t.locationLabels
+    where text(l.displayName) = "WASHINGTON"
+)
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql
new file mode 100644
index 0000000..cdb0b0c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+avg(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        rtmin in source(xml, args[0], {"data"})
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and text(rtmin.dataType) = "TMIN"
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
new file mode 100644
index 0000000..8dec470
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        rtmin in source(xml, args[0], {"data"})
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(r.dataType) = "TMAX"
+        and text(r.dataType) = "TMIN"
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
new file mode 100644
index 0000000..ca8ab4c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (rtmax)
+    from rtmax in source(xml, args[0], {"data"})
+    where text(r.dataType) = "TMAX"
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
new file mode 100644
index 0000000..fe17ebe
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (rtmin)
+    from rtmin in source(xml, args[0], {"data"})
+    where text(r.dataType) = "TMIN"
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
new file mode 100755
index 0000000..da7cabe
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+hadoop namenode -format

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
new file mode 100755
index 0000000..10ab4d9
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Examples
+# run_mrql_tests.sh mrql_all/ 1 2
+
+NODES=${2}
+REPEAT=${3}
+DATASET="all"
+
+
+for j in $(find ${1} -name '*q??.mrql')
+do
+    date
+    echo "Running MRQL query: ${j}"
+    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${NODES} ${j} ${DATASET}/sensors.xml ${DATASET}/stations.xml >> ~/disk1/weather_data/mrql/query_logs/$(basename "${j}").log 2>&1; done; 
+done
+
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="MRQL Tests Finished (${DATASET})"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all MRQL tests on ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
new file mode 100755
index 0000000..a1766c9
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+bin/start-all.sh

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
new file mode 100755
index 0000000..e49d818
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+bin/stop-all.sh

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq
new file mode 100644
index 0000000..1e8e312
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq
@@ -0,0 +1,7 @@
+(: XQuery Join Query :)
+(: Count all the weather sensor readings available.                           :)
+count(
+    let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r in collection($sensor_collection)/root/dataCollection/data
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq
new file mode 100644
index 0000000..fe6ec8f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq
@@ -0,0 +1,7 @@
+(: XQuery Join Query :)
+(: Count all the weather stations available.                                  :)
+count(
+    let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+    for $s in collection($station_collection)/root/stationCollection/station
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq
new file mode 100644
index 0000000..09c5b79
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq
@@ -0,0 +1,15 @@
+(: 
+XQuery Filter Query
+-------------------
+See historical data for Key West International Airport, FL (USW00012836)
+station by selecting  the weather readings for December 25 over the last 
+10 years. 
+:)
+let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in collection($collection)/root/dataCollection/data
+let $datetime := xs:dateTime(fn:data($r/date))
+where $r/station eq "GHCND:USW00012836" 
+    and fn:year-from-dateTime($datetime) ge 2003
+    and fn:month-from-dateTime($datetime) eq 12 
+    and fn:day-from-dateTime($datetime) eq 25
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq
new file mode 100644
index 0000000..39d7e20
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq
@@ -0,0 +1,8 @@
+(: XQuery Filter Query :)
+(: Find all reading for hurricane force wind warning or extreme wind warning. :)
+(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744     :)
+(: meters per second). (Wind value is in tenth of a meter per second)         :)
+let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in fn:collection($collection)/root/dataCollection/data
+where $r/dataType eq "AWND" and xs:decimal($r/value) gt 491.744
+return $r

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq
new file mode 100644
index 0000000..3fb0975
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq
@@ -0,0 +1,14 @@
+(:
+XQuery Aggregate Query
+----------------------
+Find the annual precipitation (PRCP) for a Syracuse, NY using the airport
+weather station (USW00014771) report for 1999.                                     
+:)
+fn:sum(
+    let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r in collection($collection)/root/dataCollection/data
+    where $r/station eq "GHCND:USW00014771" 
+        and $r/dataType eq "PRCP" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999
+    return $r/value
+) div 10

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq
new file mode 100644
index 0000000..cda344a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq
@@ -0,0 +1,8 @@
+(: XQuery Aggregate Query :)
+(: Find the highest recorded temperature (TMAX) in Celsius.                   :)
+fn:max(
+    let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r in collection($collection)/root/dataCollection/data
+    where $r/dataType eq "TMAX"
+    return $r/value
+) div 10

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
new file mode 100644
index 0000000..8f513ce
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Query :)
+(: Find all the weather readings for King county for a specific day    :)
+(: 1976/7/4.                                                                  :)
+let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in collection($sensor_collection)/root/dataCollection/data
+
+let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+for $s in collection($station_collection)/root/stationCollection/station
+
+where $s/id eq $r/station 
+    and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+    and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
+return $r

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq
new file mode 100644
index 0000000..6c927a4
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq
@@ -0,0 +1,10 @@
+(: XQuery Join Query :)
+(: Count all the weather sensor readings on 1976-07-04.                       :)
+count(
+    let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r in collection($sensor_collection)/root/dataCollection/data
+        
+    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+    where $date eq xs:date("1976-07-04")
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq
new file mode 100644
index 0000000..957aec0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq
@@ -0,0 +1,8 @@
+(: XQuery Join Query :)
+(: Count all the weather stations for King county.                            :)
+count(
+    let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+    for $s in collection($station_collection)/root/stationCollection/station
+    where (some $x in $s/locationLabels satisfies ($x/type eq "CNTY" and fn:contains(fn:upper-case(fn:data($x/displayName)), "KING")))
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
new file mode 100644
index 0000000..5f452c0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Aggregate Query :)
+(: Find the lowest recorded temperature (TMIN) in the state of Oregon for     :)
+(: 2001.                                                                      :)
+fn:min(
+    let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r in collection($sensor_collection)/root/dataCollection/data
+        
+    let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+    for $s in collection($station_collection)/root/stationCollection/station
+    
+    where $s/id eq $r/station
+        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+        and $r/dataType eq "TMIN" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+    return $r/value
+) div 10

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq
new file mode 100644
index 0000000..4ac353b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq
@@ -0,0 +1,11 @@
+(: XQuery Join Aggregate Query :)
+(: Count all sensor readings for TMIN in 2001.                                :)
+count(
+    let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r in collection($sensor_collection)/root/dataCollection/data
+    
+    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+    where $r/dataType eq "TMIN" 
+        and fn:year-from-date($date) eq 2001
+    return $r/value
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq
new file mode 100644
index 0000000..4349805
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq
@@ -0,0 +1,8 @@
+(: XQuery Join Aggregate Query :)
+(: Count all stations in the state of Oregon.                                 :)
+count(
+    let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+    for $s in collection($station_collection)/root/stationCollection/station
+    where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "OREGON"))
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
new file mode 100644
index 0000000..2c02bc7
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Query :)
+(: Find the highest recorded temperature (TMAX) for each station for each     :)
+(: day over the year 2000.                                                    :)
+let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in collection($sensor_collection)/root/dataCollection/data
+
+let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+for $s in collection($station_collection)/root/stationCollection/station
+
+where $s/id eq $r/station
+    and $r/dataType eq "TMAX" 
+    and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+return ($s/displayName, $r/date, $r/value)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq
new file mode 100644
index 0000000..5ca3329
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq
@@ -0,0 +1,8 @@
+count(
+    let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r in collection($sensor_collection)/root/dataCollection/data
+    
+    where $r/dataType eq "TMAX" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq
new file mode 100644
index 0000000..ef1e732
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq
@@ -0,0 +1,5 @@
+count(
+    let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+    for $s in collection($station_collection)/root/stationCollection/station
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq
new file mode 100644
index 0000000..35e5ea2
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq
@@ -0,0 +1,15 @@
+(: XQuery Join Aggregate Query :)
+(: Self join with all sensor readings after the year 2000.                    :)
+fn:avg(
+let $sensor_collection_min := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r_min in collection($sensor_collection_min)/root/dataCollection/data
+
+let $sensor_collection_max := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r_max in collection($sensor_collection_max)/root/dataCollection/data
+
+where $r_min/station eq $r_max/station
+    and $r_min/date eq $r_max/date
+    and $r_min/dataType eq "TMIN"
+    and $r_max/dataType eq "TMAX"
+return ($r_max/value - $r_min/value)
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq
new file mode 100644
index 0000000..3245746
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq
@@ -0,0 +1,9 @@
+(: XQuery Join Query :)
+(: Find the all the records for TMAX.                                         :)
+count(
+    let $sensor_collection_max := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r_max in collection($sensor_collection_max)/root/dataCollection/data
+    
+    where $r_max/dataType eq "TMAX"
+    return $r_max
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq
new file mode 100644
index 0000000..6fcd276
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq
@@ -0,0 +1,9 @@
+(: XQuery Join Query :)
+(: Find the all the records for TMIN.                                         :)
+count(
+    let $sensor_collection_min := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+    for $r_min in collection($sensor_collection_min)/root/dataCollection/data
+    
+    where $r_min/dataType eq "TMIN"
+    return $r_min
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh
new file mode 100755
index 0000000..c1c2132
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+REPEAT=${3}
+DATASET="hcn"
+
+
+mkdir -p ~/logs/saxon/
+
+for j in $(find ${1} -name '*q??.xq')
+do
+    date
+    echo "Running Saxon query: ${j}"
+time for i in {1..${REPEAT}}; do JAVA_OPTS="-Xmx8g" java -cp saxon9he.jar net.sf.saxon.Query -t -repeat:${REPEAT} -q:${j} >> ~/logs/saxon/$(basename "${j}").log 2>&1; done; 
+done
+
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Saxon Tests Finished (${DATASET})"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all Saxon tests on ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
new file mode 100644
index 0000000..6fa981b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Query :)
+(: Count all the weather sensor readings available.                           :)
+count(
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
new file mode 100644
index 0000000..1958ec6
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Query :)
+(: Count all the weather stations available.                                  :)
+count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
new file mode 100644
index 0000000..c1363e3
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: 
+VXQuery used to only parse all files with out producing results.
+:)
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
+where fn:false()
+return $r

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
new file mode 100644
index 0000000..5006a21
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
@@ -0,0 +1,31 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+(: 
+XQuery Filter Query
+-------------------
+See historical data for Key West International Airport, FL (USW00012836)
+station by selecting  the weather readings for December 25 over the last 
+10 years. 
+:)
+let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($collection)/dataCollection/data
+let $datetime := xs:dateTime(fn:data($r/date))
+where $r/station eq "GHCND:USW00012836" 
+    and fn:year-from-dateTime($datetime) ge 2003
+    and fn:month-from-dateTime($datetime) eq 12 
+    and fn:day-from-dateTime($datetime) eq 25
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
new file mode 100644
index 0000000..0827c45
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Filter Query :)
+(: Find all reading for hurricane force wind warning or extreme wind warning. :)
+(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744     :)
+(: meters per second). (Wind value is in tenth of a meter per second)         :)
+let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($collection)/dataCollection/data
+where $r/dataType eq "AWND" and xs:decimal(fn:data($r/value)) gt 491.744
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
new file mode 100644
index 0000000..0635618
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+(:
+XQuery Aggregate Query
+----------------------
+Find the annual precipitation (PRCP) for a Syracuse, NY using the airport
+weather station (USW00014771) report for 1999.                                     
+:)
+fn:sum(
+    let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($collection)/dataCollection/data
+    where $r/station eq "GHCND:USW00014771" 
+        and $r/dataType eq "PRCP" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999
+    return $r/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
new file mode 100644
index 0000000..c58b0a3
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Aggregate Query :)
+(: Find the highest recorded temperature (TMAX) in Celsius.                   :)
+fn:max(
+    let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($collection)/dataCollection/data
+    where $r/dataType eq "TMAX"
+    return $r/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
new file mode 100644
index 0000000..7d5fd77
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
@@ -0,0 +1,32 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find all the weather readings for Washington state for a specific day 1976/7/4. 
+:)
+let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+for $s in collection($station_collection)/stationCollection/station
+
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
+    
+where $s/id eq $r/station 
+    and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+    and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq
new file mode 100644
index 0000000..b28312e
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq
@@ -0,0 +1,34 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find all the weather readings for Washington state for a specific day 1976/7/4. 
+:)
+fn:count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+
+    where $s/id eq $r/station 
+        and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+        and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
+    return $r
+)
\ No newline at end of file


[09/14] git commit: New comments on renamed files.

Posted by pr...@apache.org.
New comments on renamed files.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/17bedfa9
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/17bedfa9
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/17bedfa9

Branch: refs/heads/master
Commit: 17bedfa9ff55b5b17b480773818db938d6184923
Parents: 0e666fc
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:42:29 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:42:29 2014 -0700

----------------------------------------------------------------------
 .../main/resources/noaa-ghcn-daily/queries/count_sensor.xq    | 7 +++++--
 .../main/resources/noaa-ghcn-daily/queries/count_station.xq   | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/17bedfa9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
index 6fa981b..a93ad66 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
@@ -15,8 +15,11 @@
    specific language governing permissions and limitations
    under the License. :)
 
-(: XQuery Join Query :)
-(: Count all the weather sensor readings available.                           :)
+(:
+XQuery Join Query
+-------------------
+Count all the weather sensor readings available.
+:)
 count(
     let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
     for $r in collection($sensor_collection)/dataCollection/data

http://git-wip-us.apache.org/repos/asf/vxquery/blob/17bedfa9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
index 1958ec6..ebce764 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
@@ -15,8 +15,11 @@
    specific language governing permissions and limitations
    under the License. :)
 
-(: XQuery Join Query :)
-(: Count all the weather stations available.                                  :)
+(: 
+XQuery Join Query
+-------------------
+Count all the weather stations available.
+:)
 count(
     let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
     for $s in collection($station_collection)/stationCollection/station


[02/14] Remove benchmark files to allow easy copy from other branch.

Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
deleted file mode 100644
index 0827c45..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Filter Query :)
-(: Find all reading for hurricane force wind warning or extreme wind warning. :)
-(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744     :)
-(: meters per second). (Wind value is in tenth of a meter per second)         :)
-let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($collection)/dataCollection/data
-where $r/dataType eq "AWND" and xs:decimal(fn:data($r/value)) gt 491.744
-return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
deleted file mode 100644
index 0635618..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-(:
-XQuery Aggregate Query
-----------------------
-Find the annual precipitation (PRCP) for a Syracuse, NY using the airport
-weather station (USW00014771) report for 1999.                                     
-:)
-fn:sum(
-    let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($collection)/dataCollection/data
-    where $r/station eq "GHCND:USW00014771" 
-        and $r/dataType eq "PRCP" 
-        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999
-    return $r/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
deleted file mode 100644
index c58b0a3..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Aggregate Query :)
-(: Find the highest recorded temperature (TMAX) in Celsius.                   :)
-fn:max(
-    let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($collection)/dataCollection/data
-    where $r/dataType eq "TMAX"
-    return $r/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
deleted file mode 100644
index 5b7246d..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find all the weather readings for Washington state for a specific day    :)
-(: 1976/7/4.                                                                  :)
-let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-for $s in collection($station_collection)/stationCollection/station
-
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-    
-where $s/id eq $r/station 
-    and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
-    and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
-return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
deleted file mode 100644
index 6c7810a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather sensor readings on 1976-07-04.                       :)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-        
-    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
-    where $date eq xs:date("1976-07-04")
-    return $r
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
deleted file mode 100644
index 18e627a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather stations for Washington state.                       :)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
deleted file mode 100644
index c95f3f5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Find the lowest recorded temperature (TMIN) in the United States for     :)
-(: 2001.                                                                      :)
-fn:min(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    
-    where $s/id eq $r/station
-        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
-        and $r/dataType eq "TMIN" 
-        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
-    return $r/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
deleted file mode 100644
index 8548742..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
+++ /dev/null
@@ -1,28 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Count all sensor readings for TMIN in 2001.                                :)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    
-    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
-    where $r/dataType eq "TMIN" 
-        and fn:year-from-date($date) eq 2001
-    return $r/value
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
deleted file mode 100644
index 6f3a6b8..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Count all stations in the United States.                                 :)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    where (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
deleted file mode 100644
index 5c8ed54..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find the highest recorded temperature (TMAX) for each station for each     :)
-(: day over the year 2000.                                                    :)
-let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-for $s in collection($station_collection)/stationCollection/station
-
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-
-where $s/id eq $r/station
-    and $r/dataType eq "TMAX" 
-    and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
-return ($s/displayName, $r/date, $r/value)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
deleted file mode 100644
index 1938151..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count max temperature (TMAX) readings for 2000-01-01.                          :)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    
-    where $r/dataType eq "TMAX" 
-    	and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
-    return $r
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
deleted file mode 100644
index 3c1dc98..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the stations.                                         :)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
deleted file mode 100644
index 5b1f2ac..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Self Join Query :)
-(: Self join with all stations finding the difference in min and max       :)
-(: temperature and get the average.                                        :)
-fn:avg(
-    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_min in collection($sensor_collection_min)/dataCollection/data
-    
-    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_max in collection($sensor_collection_max)/dataCollection/data
-    
-    where $r_min/station eq $r_max/station
-        and $r_min/date eq $r_max/date
-        and $r_min/dataType eq "TMIN"
-        and $r_max/dataType eq "TMAX"
-    return $r_max/value - $r_min/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
deleted file mode 100644
index a48cad5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
+++ /dev/null
@@ -1,26 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find the all the records for TMAX.                                         :)
-count(
-    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_max in collection($sensor_collection_max)/dataCollection/data
-    
-    where $r_max/dataType eq "TMAX"
-    return $r_max
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
deleted file mode 100644
index 4a72d0f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
+++ /dev/null
@@ -1,26 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find the all the records for TMIN.                                         :)
-count(
-    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_min in collection($sensor_collection_min)/dataCollection/data
-    
-    where $r_min/dataType eq "TMIN"
-    return $r_min
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
deleted file mode 100644
index 6fa981b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather sensor readings available.                           :)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    return $r
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
deleted file mode 100644
index 1958ec6..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather stations available.                                  :)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
deleted file mode 100644
index 58bea51..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-
-Weather Data Conversion To XML
-=====================
-
-# Introduction
-
-The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY) 
-.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor 
-readings. Using the RSS feed as a template, the GHCN-DAILY historical 
-information is used to generate past RSS feed XML documents. The process allows 
-testing on a large set of information with out having to continually monitor 
-the weather.gov site for all the weather details for years.
-
-# Detailed Description
-
-Detailed GHDN-DAILY information: 
-<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
-
-The process takes a save folder for the data. The folder contains a several 
-folders:
-
- - all_xml_files (The generated xml files for a given package)
- - downloads (All files taken from the NOAA HTTP site)
- - dataset-[name] (all files related to a single dataset)
-     
-     
-# Examples commands
-
-Building
-
-
-Partitioning
-python weather_cli.py -x weather_example.xml
-
-Linking
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
deleted file mode 100755
index 632dbcb..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Run all the queries and save a log. 
-# First argument: Supply the folder which houses all the queries (recursive).
-# Second argument: adds options to the VXQuery CLI.
-#
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
-#
-REPEAT=5
-FRAME_SIZE=$((8*1024))
-BUFFER_SIZE=$((32*1024*1024))
-JOIN_HASH_SIZE=-1
-
-if [ -z "${1}" ]
-then
-    echo "Please supply a directory for query files to be found."
-    exit
-fi
-
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
-
-for j in $(find ${1} -name '*q??.xq')
-do
-    if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]] 
-    then
-        date
-        echo "Running query: ${j}"
-        log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
-        log_base_path=$(dirname ${j/queries/query_logs})
-        mkdir -p ${log_base_path}
-        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
-        echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
-        echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
-        echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
-        fi;
-done
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="Benchmark Tests Finished"
-    EMAIL="ecarm002@ucr.edu"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all tests in folder ${1}.
-    EOM
-else
-    echo "No mail command to use."
-fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
deleted file mode 100755
index 98ab04b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Run all the queries and save a log. 
-# First argument: Supply the folder which houses all the queries (recursive).
-# Second argument: adds options to the VXQuery CLI.
-#
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
-#
-CLUSTER="uci"
-REPEAT=5
-FRAME_SIZE=$((8*1024))
-BUFFER_SIZE=$((32*1024*1024))
-#JOIN_HASH_SIZE=$((256*1024*1024))
-JOIN_HASH_SIZE=-1
-
-if [ -z "${1}" ]
-then
-    echo "Please supply a directory for query files to be found."
-    exit
-fi
-
-if [ -z "${2}" ]
-then
-    echo "Please the number of nodes (start at 0)."
-    exit
-fi
-
-# Run queries for the specified number of nodes.
-echo "Starting ${2} cluster nodes"
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
-
-# wait for cluster to finish setting up  
-sleep 5
-
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
-
-for j in $(find ${1} -name '*q??.xq')
-do
-    # Only work with i nodes.
-    if [[ "${j}" =~ "${2}nodes" ]]
-    then
-        # Only run for specified queries.
-        if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]]
-        then
-            date
-            echo "Running query: ${j}"
-            log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
-            log_base_path=$(dirname ${j/queries/query_logs})
-            mkdir -p ${log_base_path}
-            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
-            echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
-            echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
-            echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
-        fi;
-    fi;
-done
-    
-# Stop cluster.
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="Benchmark Cluster Tests Finished"
-    EMAIL="ecarm002@ucr.edu"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}.
-    EOM
-else
-    echo "No mail command to use."
-fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
deleted file mode 100755
index 58976b7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-DATASET="dataset-hcn-d2"
-cluster_ip=${1}
-base_weather_folder=${2}
-
-for n in 7 6 5 3 4 2 1 0
-do
-    #for t in "batch_scale_out" "speed_up"
-    for t in "batch_scale_out"
-    #for t in "speed_up"
-    do 
-        for p in 2 
-        do 
-            for c in 4
-            do 
-                echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
-                sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
-            done
-        done
-    done
-done
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="Benchmark Group Tests Finished"
-    EMAIL="ecarm002@ucr.edu"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all tests in the predefined group for ${DATASET}.
-    EOM
-else
-    echo "No mail command to use."
-fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
deleted file mode 100755
index a6788be..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
-REPEAT=${1}
-DATASET="hcn"
-
-for n in `seq 0 7`
-#for n in 0
-do
-    date
-    echo "Running q0${n} on ${DATASET} for MRQL."
-    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; 
-done
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="MRQL Tests Finished (${DATASET})"
-    EMAIL="ecarm002@ucr.edu"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all MRQL tests on ${DATASET}.
-    EOM
-else
-    echo "No mail command to use."
-fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
deleted file mode 100644
index 8021b2c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os.path
-import linecache
-import distutils.core
-import fileinput
-import socket
-
-from weather_config import *
-from weather_data_files import *
-
-# Weather data files created to manage the conversion process.
-# Allows partition and picking up where you left off.
-#
-# benchmark_name/
-#   data/
-#   queries/
-#   logs/
-class WeatherBenchmark:
-
-    DATA_LINKS_FOLDER = "data_links/"
-    LARGE_FILE_ROOT_TAG = WeatherDataFiles.LARGE_FILE_ROOT_TAG
-    QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/"
-    QUERY_MASTER_FOLDER = "../queries/"
-    QUERY_FILE_LIST = [
-                       "q00.xq",
-                       "q01.xq",
-                       "q02.xq",
-                       "q03.xq",
-                       "q04.xq",
-                       "q05.xq",
-                       "q06.xq",
-                       "q07.xq"
-                       ] 
-    QUERY_UTILITY_LIST = [
-                          "no_result.xq",
-                          "sensor_count.xq",
-                          "station_count.xq",
-                          "q04_sensor.xq",
-                          "q04_station.xq",
-                          "q05_sensor.xq",
-                          "q05_station.xq",
-                          "q06_sensor.xq",
-                          "q06_station.xq",
-                          "q07_tmin.xq",
-                          "q07_tmax.xq",
-                          ] 
-    BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] 
-    BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] 
-    QUERY_COLLECTIONS = ["sensors", "stations"]
-
-    SEPERATOR = "|"
-    
-    def __init__(self, base_paths, partitions, dataset, nodes):
-        self.base_paths = base_paths
-        self.partitions = partitions
-        self.dataset = dataset
-        self.nodes = nodes
-        
-    def print_partition_scheme(self):
-        if (len(self.base_paths) == 0):
-            return
-        for test in self.dataset.get_tests():
-            if test in self.BENCHMARK_LOCAL_TESTS:
-                self.print_local_partition_schemes(test)
-            elif test in self.BENCHMARK_CLUSTER_TESTS:
-                self.print_cluster_partition_schemes(test)
-            else:
-                print "Unknown test."
-                exit()
-            
-    def print_local_partition_schemes(self, test):
-        node_index = 0
-        virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
-        for p in self.partitions:
-            scheme = self.get_local_partition_scheme(test, p)
-            self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
-        
-    def print_cluster_partition_schemes(self, test):
-        node_index = self.get_current_node_index()
-        virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
-        for p in self.partitions:
-            scheme = self.get_cluster_partition_scheme(test, p)
-            self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
-        
-    def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id):
-        print
-        print "---------------- Partition Scheme --------------------"
-        print "    Test: " + test
-        print "    Virtual Partitions: " + str(virtual_partitions)
-        print "    Disks: " + str(len(self.base_paths))
-        print "    Partitions: " + str(partitions)
-        print "    Node Id: " + str(node_id)
-        
-        if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
-            folder_length = len(scheme[0][3]) + 5
-            row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} {:<" + str(folder_length) + "}"
-            HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
-            print row_format.format(*HEADER)
-            for row in scheme:
-                print row_format.format(*row)
-            print
-        else:
-            print "    Scheme is EMPTY."
-
-    def get_local_partition_scheme(self, test, partition):
-        scheme = []
-        virtual_partitions = get_local_virtual_disk_partitions(self.partitions)
-        data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths)
-        link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
-
-        # Match link paths to real data paths.
-        group_size = len(data_schemes) / len(link_base_schemes)
-        for d in range(len(self.base_paths)):
-            offset = 0
-            for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
-                if d == link_disk:
-                    # Only consider a single disk at a time.
-                    for data_node, data_disk, data_virtual, data_index, data_path in data_schemes:
-                        if test == "local_speed_up" and data_disk == link_disk \
-                                and offset <= data_index and data_index < offset + group_size:
-                            scheme.append([data_disk, data_index, link_index, data_path, link_path])
-                        elif test == "local_batch_scale_out" and data_disk == link_disk \
-                                and data_index == link_index:
-                            scheme.append([data_disk, data_index, link_index, data_path, link_path])
-                    offset += group_size
-        return scheme
-    
-    def get_cluster_partition_scheme(self, test, partition):
-        node_index = self.get_current_node_index()
-        if node_index == -1:
-            print "Unknown host."
-            return 
-        
-        scheme = []
-        virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
-        data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths)
-        link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
-
-        # Match link paths to real data paths.
-        for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
-            # Prep
-            if test == "speed_up":
-                group_size = virtual_disk_partitions / (link_node + 1) / partition
-            elif test == "batch_scale_out":
-                group_size = virtual_disk_partitions / len(self.nodes) / partition
-            else:
-                print "Unknown test."
-                return
-            
-            node_offset = group_size * node_index * partition
-            node_offset += group_size * link_index
-            has_data = True
-            if link_node < node_index:
-                has_data = False
-    
-            # Make links
-            for date_node, data_disk, data_virtual, data_index, data_path in data_schemes:
-                if has_data and data_disk == link_disk \
-                        and node_offset <= data_index and data_index < node_offset + group_size:
-                    scheme.append([link_disk, data_index, link_index, data_path, link_path])
-            scheme.append([link_disk, -1, link_index, "", link_path])
-        return scheme
-    
-    def build_data_links(self, reset):
-        if (len(self.base_paths) == 0):
-            return
-        if reset:
-            shutil.rmtree(self.base_paths[0] + self.DATA_LINKS_FOLDER)
-        for test in self.dataset.get_tests():
-            if test in self.BENCHMARK_LOCAL_TESTS:
-                for i in self.partitions:
-                    scheme = self.get_local_partition_scheme(test, i)
-                    self.build_data_links_scheme(scheme)
-                if 1 in self.partitions and len(self.base_paths) > 1:
-                    scheme = self.build_data_links_local_zero_partition(test)
-                    self.build_data_links_scheme(scheme)
-            elif test in self.BENCHMARK_CLUSTER_TESTS:
-                for i in self.partitions:
-                    scheme = self.get_cluster_partition_scheme(test, i)
-                    self.build_data_links_scheme(scheme)
-                if 1 in self.partitions and len(self.base_paths) > 1:
-                    scheme = self.build_data_links_cluster_zero_partition(test)
-                    self.build_data_links_scheme(scheme)
-            else:
-                print "Unknown test."
-                exit()
-    
-    def build_data_links_scheme(self, scheme):
-        '''Build all the data links based on the scheme information.'''
-        for (data_disk, data_index, partition, data_path, link_path) in scheme:
-            self.add_collection_links_for(data_path, link_path, data_index)
-    
-    def build_data_links_cluster_zero_partition(self, test):
-        '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
-        scheme = []
-        link_base_schemes = get_cluster_link_scheme(len(self.nodes), 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
-        for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
-            new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test + "/" + str(link_node) + "nodes")
-            scheme.append([0, link_disk, 0, link_path, new_link_path])
-        return scheme
-
-    def build_data_links_local_zero_partition(self, test):
-        '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
-        scheme = []
-        index = 0
-        link_base_schemes = get_partition_scheme(0, 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
-        for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
-            if test == "local_batch_scale_out" and index > 0:
-                continue
-            new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test)
-            scheme.append([0, index, 0, link_path, new_link_path])
-            index += 1
-        return scheme
-
-    def get_zero_partition_path(self, node, key):
-        '''Return a partition path for the zero partition.'''
-        base_path = self.base_paths[0]
-        new_link_path = get_partition_scheme(node, 1, [base_path], key)[0][PARTITION_INDEX_PATH]
-        return new_link_path.replace("p1", "p0")
-        
-    def get_current_node_index(self):
-        found = False
-        node_index = 0
-        for machine in self.nodes:
-            if socket.gethostname().startswith(machine.get_node_name()):
-                found = True
-                break
-            node_index += 1
-    
-        if found:
-            return node_index
-        else:
-            return -1
-    
-    def add_collection_links_for(self, real_path, link_path, index):
-        for collection in self.QUERY_COLLECTIONS:
-            collection_path = link_path + collection + "/"
-            collection_index = collection_path + "index" + str(index)
-            if not os.path.isdir(collection_path):
-                os.makedirs(collection_path)
-            if index >= 0:
-                if os.path.islink(collection_index):
-                    os.unlink(collection_index)
-                os.symlink(real_path + collection + "/", collection_index)
-            
-    def copy_query_files(self, reset):
-        for test in self.dataset.get_tests():
-            if test in self.BENCHMARK_LOCAL_TESTS:
-                self.copy_local_query_files(test, reset)
-            elif test in self.BENCHMARK_CLUSTER_TESTS:
-                self.copy_cluster_query_files(test, reset)
-            else:
-                print "Unknown test."
-                exit()
-            
-    def copy_cluster_query_files(self, test, reset):
-        '''Determine the data_link path for cluster query files and copy with
-        new location for collection.'''
-        if 1 in self.partitions and len(self.base_paths) > 1:
-            for n in range(len(self.nodes)):
-                query_path = get_cluster_query_path(self.base_paths, test, 0, n)
-                prepare_path(query_path, reset)
-            
-                # Copy query files.
-                new_link_path = self.get_zero_partition_path(n, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
-                self.copy_and_replace_query(query_path, [new_link_path])
-        for n in range(len(self.nodes)):
-            for p in self.partitions:
-                query_path = get_cluster_query_path(self.base_paths, test, p, n)
-                prepare_path(query_path, reset)
-            
-                # Copy query files.
-                partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
-                self.copy_and_replace_query(query_path, partition_paths)
-
-    def copy_local_query_files(self, test, reset):
-        '''Determine the data_link path for local query files and copy with
-        new location for collection.'''
-        if 1 in self.partitions and len(self.base_paths) > 1:
-            query_path = get_local_query_path(self.base_paths, test, 0)
-            prepare_path(query_path, reset)
-    
-            # Copy query files.
-            new_link_path = self.get_zero_partition_path(0, self.DATA_LINKS_FOLDER + test)
-            self.copy_and_replace_query(query_path, [new_link_path])
-        for p in self.partitions:
-            query_path = get_local_query_path(self.base_paths, test, p)
-            prepare_path(query_path, reset)
-    
-            # Copy query files.
-            partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
-            self.copy_and_replace_query(query_path, partition_paths)
-
-    def copy_and_replace_query(self, query_path, replacement_list):
-        '''Copy the query files over to the query_path and replace the path
-        for the where the collection data is located.'''
-        for query_file in self.QUERY_FILE_LIST + self.QUERY_UTILITY_LIST:
-            shutil.copyfile(self.QUERY_MASTER_FOLDER + query_file, query_path + query_file)
-        
-            # Make a search replace for each collection.
-            for collection in self.QUERY_COLLECTIONS:
-                replacement_list_with_type = []
-                for replace in replacement_list:
-                    replacement_list_with_type.append(replace + collection)
-
-                replace_string = self.SEPERATOR.join(replacement_list_with_type)
-                for line in fileinput.input(query_path + query_file, True):
-                    sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string))
-                    
-            # Make a search replace for partition type.
-            if self.dataset.get_partition_type() == "large_files":
-                for line in fileinput.input(query_path + query_file, True):
-                    sys.stdout.write(line.replace("/stationCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/stationCollection"))
-                for line in fileinput.input(query_path + query_file, True):
-                    sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
-                    
-    def get_number_of_slices_per_disk(self):
-        if len(self.dataset.get_tests()) == 0:
-            print "No test has been defined in config file."
-        else:
-            for test in self.dataset.get_tests():
-                if test in self.BENCHMARK_LOCAL_TESTS:
-                    return get_local_virtual_disk_partitions(self.partitions)
-                elif test in self.BENCHMARK_CLUSTER_TESTS:
-                    return get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
-                else:
-                    print "Unknown test."
-                    exit()
-
-def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):        
-    link_paths = []
-    for n in range(0, nodes):
-        new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
-        link_paths.extend(new_link_path)
-    return link_paths
-
-def get_local_query_path(base_paths, test, partition):        
-    return base_paths[0] + "queries/" + test + "/" + get_local_query_folder(len(base_paths), partition) + "/"
-
-def get_local_query_folder(disks, partitions):        
-    return "d" + str(disks) + "_p" + str(partitions)
-
-def get_cluster_query_path(base_paths, test, partition, nodes):        
-    return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/"
-
-def get_cluster_virtual_disk_partitions(nodes, partitions):
-    vp = get_local_virtual_disk_partitions(partitions)
-    vn = calculate_partitions(range(1, len(nodes)+1, 1))
-    return vp * vn
-
-def get_local_virtual_disk_partitions(partitions):
-    return calculate_partitions(partitions)
-
-def calculate_partitions(list):
-    x = 1
-    for i in list:
-        if x % i != 0:
-            if i % x == 0:
-                x = i
-            else:
-                x *= i
-    return x

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
deleted file mode 100644
index eeae25c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys, getopt
-
-# Custom modules.
-from weather_data_files import *
-from weather_download_files import *
-from weather_convert_to_xml import *
-from weather_config import *
-from weather_benchmark import *
-
-DEBUG_OUTPUT = False
-
-#
-# Weather conversion for GHCN-DAILY files to xml.
-#
-# http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
-#
-def main(argv):
-    append = False
-    max_records = 0
-    process_file_name = ""
-    reset = False
-    section = "all"
-    token = ""
-    update = False
-    xml_config_path = ""
-    
-    try:
-        opts, args = getopt.getopt(argv, "af:hl:m:ruvw:x:", ["file=", "locality=", "max_station_files=", "web_service=", "xml_config="])
-    except getopt.GetoptError:
-        print 'The file options for weather_cli.py were not correctly specified.'
-        print 'To see a full list of options try:'
-        print '  $ python weather_cli.py -h'
-        sys.exit(2)
-    for opt, arg in opts:
-        if opt == '-h':
-            print 'Converting weather daily files to xml options:'
-            print '    -a        Append the results to the progress file.'
-            print '    -f (str)  The file name of a specific station to process.'
-            print '              * Helpful when testing a single stations XML file output.'
-            print '    -l (str)  Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).'
-            print '    -m (int)  Limits the number of files created for each station.'
-            print '              * Helpful when testing to make sure all elements are supported for each station.'
-            print '              Alternate form: --max_station_files=(int)'
-            print '    -r        Reset the build process. (For one section or all sections depending on other parameters.)'
-            print '    -u        Recalculate the file count and data size for each data source file.'
-            print '    -v        Extra debug information.'
-            print '    -w (str)  Downloads the station XML file form the web service.'
-            print '    -x (str)  XML config file for weather data.'
-            sys.exit()
-        elif opt in ('-a', "--append"):
-            append = True
-        elif opt in ('-f', "--file"):
-            # check if file exists.
-            if os.path.exists(arg):
-                process_file_name = arg
-            else:
-                print 'Error: Argument must be a file name for --file (-f).'
-                sys.exit()
-        elif opt in ('-l', "--locality"):
-            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "partition_scheme", "test_links", "queries", "inventory", "statistics"):
-                section = arg
-            else:
-                print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
-                sys.exit()
-        elif opt in ('-m', "--max_station_files"):
-            if arg.isdigit():
-                max_records = int(arg)
-            else:
-                print 'Error: Argument must be an integer for --max_station_files (-m).'
-                sys.exit()
-        elif opt == '-r':
-            reset = True
-        elif opt == '-u':
-            update = True
-        elif opt == '-v':
-            global DEBUG_OUTPUT
-            DEBUG_OUTPUT = True
-        elif opt == '-w':
-            # check if file exists.
-            if arg is not "":
-                token = arg
-            else:
-                print 'Error: Argument must be a string --web_service (-w).'
-                sys.exit()
-        elif opt in ('-x', "--xml_config"):
-            # check if file exists.
-            if os.path.exists(arg):
-                xml_config_path = arg
-            else:
-                print 'Error: Argument must be a xml file for --xml_config (-x).'
-                sys.exit()
-
-    # Required fields to run the script.
-    if xml_config_path == "" or not os.path.exists(xml_config_path):
-        print 'Error: The xml config option must be supplied: --xml_config (-x).'
-        sys.exit()
-    config = WeatherConfig(xml_config_path)
-    
-    # Required fields to run the script.
-    if config.get_save_path() == "" or not os.path.exists(config.get_save_path()):
-        print 'Error: The save directory option must be supplied in the config file.'
-        sys.exit()
-
-    # Set up downloads folder.
-    download_path = config.get_save_path() + "/downloads"
-    if section in ("all", "download"):
-        print 'Processing the download section.'
-        download = WeatherDownloadFiles(download_path)
-        download.download_ghcnd_files(reset)
-        download.download_mshr_files(reset)
-
-        # Unzip the required file.
-        download.unzip_ghcnd_package(config.get_package(), reset)
-        download.unzip_mshr_files(reset)
-
-
-    # Create some basic paths for save files and references.
-    ghcnd_data_dly_path = download_path + '/' + config.get_package() + '/' + config.get_package()
-    xml_data_save_path = config.get_save_path() + '/all_xml_files/'
-
-    # Make sure the xml folder is available.
-    if not os.path.isdir(xml_data_save_path):
-        os.makedirs(xml_data_save_path)
-
-    # Set up the XML build objects.
-    convert = WeatherWebServiceMonthlyXMLFile(download_path, xml_data_save_path, DEBUG_OUTPUT)
-    progress_file = xml_data_save_path + "_data_progress.csv"
-    data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
-    if section in ("all", "progress_file"):
-        print 'Processing the progress_file section.'
-        options = list()
-        if append:
-            options.append('append')
-        if update:
-            options.append('recalculate')
-        if reset:
-            options.append('reset')
-        data.build_progress_file(options, convert)
-    
-    if section in ("all", "sensor_build"):
-        print 'Processing the sensor_build section.'
-        if process_file_name is not "":
-            # process a single file
-            if os.path.exists(process_file_name):
-                (file_count, data_size) = convert.process_sensor_file(process_file_name, max_records, 4)
-                data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
-            else:
-                data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_MISSING)
-        else:
-            # process directory
-            data.reset()
-            data.set_type("sensor")
-            data.set_data_reset(reset)
-            for file_name in data:
-                file_path = ghcnd_data_dly_path + '/' + file_name
-                if os.path.exists(file_path):
-                    (file_count, data_size) = convert.process_sensor_file(file_path, max_records, 4)
-                    data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
-                else:
-                    data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
-                
-    if section in ("all", "station_build"):
-        print 'Processing the station_build section.'
-        data.reset()
-        data.set_type("station")
-        data.set_data_reset(reset)
-        if token is not "":
-            convert.set_token(token)
-        for file_name in data: 
-            file_path = ghcnd_data_dly_path + '/' + file_name
-            if os.path.exists(file_path):
-                return_status = convert.process_station_file(file_path)
-                status = data.get_station_status(return_status)
-                data.update_file_station_status(file_name, status)
-            else:
-                data.update_file_station_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
-                    
-    for dataset in config.get_dataset_list():
-        # Set up the setting for each dataset.
-        dataset_folder = "/dataset-" + dataset.get_name()
-        progress_file = config.get_save_path() + dataset_folder + "/_data_progress.csv"
-        data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
-
-        base_paths = []
-        for paths in dataset.get_save_paths():
-            base_paths.append(paths + dataset_folder + "/")
-        benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list())
-        
-        if section in ("all", "partition", "partition_scheme"):
-            slices = benchmark.get_number_of_slices_per_disk()
-            print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
-            data.reset()
-            if section == "partition_scheme":
-                benchmark.print_partition_scheme()
-            else:
-                if dataset.get_partition_type() == "large_files":
-                    data.build_to_n_partition_files(xml_data_save_path, slices, base_paths, reset)
-                else:
-                    data.copy_to_n_partitions(xml_data_save_path, slices, base_paths, reset)
-    
-        if section in ("all", "test_links"):
-            # TODO determine current node 
-            print 'Processing the test links section (' + dataset.get_name() + ').'
-            benchmark.print_partition_scheme()
-            benchmark.build_data_links(reset)
-
-        if section in ("all", "queries"):
-            print 'Processing the queries section (' + dataset.get_name() + ').'
-            benchmark.copy_query_files(reset)
-    
-    if section in ("inventory"):
-        print 'Processing the inventory section.'
-        convert.process_inventory_file()
-                  
-#     if section in ("statistics"):
-#         print 'Processing the statistics section.'
-#         data.print_progress_file_stats(convert)
-                  
-if __name__ == "__main__":
-    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
deleted file mode 100644
index 80607b8..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from xml.dom.minidom import parse
-
-class WeatherConfig:
-    def __init__(self, config_xml_file):
-        self.config_xml_file = config_xml_file
-        
-        self.config = parse(self.config_xml_file)
-
-    def get_save_path(self):
-        return self.get_text(self.config.getElementsByTagName("save_path")[0])
-
-    def get_package(self):
-        return self.get_text(self.config.getElementsByTagName("package")[0])
-
-    def get_node_machine_list(self):
-        nodes = []
-        for node in self.config.getElementsByTagName("node"):
-            id = self.get_node_name(node)
-            ip = self.get_node_ip(node)
-            nodes.append(Machine(id, ip))
-        return nodes
-
-    def get_dataset_list(self):
-        nodes = []
-        for node in self.config.getElementsByTagName("dataset"):
-            name = self.get_dataset_name(node)
-            save_paths = self.get_dataset_save_paths(node)
-            partition_type = self.get_dataset_partition_type(node)
-            partitions = self.get_dataset_partitions(node)
-            tests = self.get_dataset_tests(node)
-            nodes.append(Dataset(name, save_paths, partition_type, partitions, tests))
-        return nodes
-
-
-    # --------------------------------------------------------------------------
-    # Node Specific Functions
-    # --------------------------------------------------------------------------
-    def get_node_ip(self, node):
-        return self.get_text(node.getElementsByTagName("cluster_ip")[0])
-
-    def get_node_name(self, node):
-        return self.get_text(node.getElementsByTagName("id")[0])
-
-    
-    # --------------------------------------------------------------------------
-    # Dataset Specific Functions
-    # --------------------------------------------------------------------------
-    def get_dataset_name(self, node):
-        return self.get_text(node.getElementsByTagName("name")[0])
-
-    def get_dataset_save_paths(self, node):
-        paths = []
-        for item in node.getElementsByTagName("save_path"):
-            paths.append(self.get_text(item))
-        return paths
-
-    def get_dataset_partition_type(self, node):
-        return self.get_text(node.getElementsByTagName("partition_type")[0])
-
-    def get_dataset_partitions(self, node):
-        paths = []
-        for item in node.getElementsByTagName("partitions_per_path"):
-            paths.append(int(self.get_text(item)))
-        return paths
-
-    def get_dataset_tests(self, node):
-        tests = []
-        for item in node.getElementsByTagName("test"):
-            tests.append(self.get_text(item))
-        return tests
-
-    def get_text(self, xml_node):
-        rc = []
-        for node in xml_node.childNodes:
-            if node.nodeType == node.TEXT_NODE:
-                rc.append(node.data)
-        return ''.join(rc)
-
-class Machine:
-    def __init__(self, id, ip):
-        self.id = id
-        self.ip = ip
-    
-    def get_node_name(self):
-        return self.id
-    
-    def get_node_ip(self):
-        return self.ip
-    
-    def __repr__(self):
-        return self.id + "(" + self.ip + ")"
-    
-class Dataset:
-    def __init__(self, name, save_paths, partition_type, partitions, tests):
-        self.name = name
-        self.save_paths = save_paths
-        self.partitions = partitions
-        self.partition_type = partition_type
-        self.tests = tests
-    
-    def get_name(self):
-        return self.name
-    
-    def get_save_paths(self):
-        return self.save_paths
-    
-    def get_partitions(self):
-        return self.partitions
-    
-    def get_partition_type(self):
-        return self.partition_type
-    
-    def get_tests(self):
-        return self.tests
-    
-    def __repr__(self):
-        return self.name + ":" + str(self.save_paths) + ":" + str(self.partitions)
-    

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
deleted file mode 100644
index 04fff52..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Base URL used to get all the required files.
-BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
-
-# List of required files for a build.
-FILE_NAMES = []
-FILE_NAMES.append('ghcnd-countries.txt')
-FILE_NAMES.append('ghcnd-inventory.txt')
-FILE_NAMES.append('ghcnd-states.txt')
-FILE_NAMES.append('ghcnd-stations.txt')
-FILE_NAMES.append('ghcnd-version.txt')
-FILE_NAMES.append('ghcnd_all.tar.gz')
-FILE_NAMES.append('ghcnd_gsn.tar.gz')
-FILE_NAMES.append('ghcnd_hcn.tar.gz')
-FILE_NAMES.append('readme.txt')
-FILE_NAMES.append('status.txt')
-
-# Store the row details here.
-
-# Index values of each field details.
-FIELD_INDEX_NAME = 0
-FIELD_INDEX_START = 1
-FIELD_INDEX_END = 2
-FIELD_INDEX_TYPE = 3
-
-DLY_FIELD_ID = 0
-DLY_FIELD_YEAR = 1
-DLY_FIELD_MONTH = 2
-DLY_FIELD_ELEMENT = 3
-
-DLY_FIELD_DAY_OFFSET = 4
-DLY_FIELD_DAY_FIELDS = 4
-
-DLY_FIELDS = []
-
-# Details about the row.
-DLY_FIELDS.append(['ID', 1, 11, 'Character'])
-DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
-DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
-DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
-
-# Days in each row.
-for i in range(1, 32):
-    start = 22 + ((i - 1) * 8)
-    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
-    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 'Character'])
-    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 'Character'])
-    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 'Character'])
-
-# Details about the row.
-STATIONS_FIELDS = {}
-STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
-STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
-STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
-STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
-STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
-STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
-
-# Details about the row.
-COUNTRIES_FIELDS = {}
-COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-STATES_FIELDS = {}
-STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-INVENTORY_FIELDS = {}
-INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
-INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
-INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
deleted file mode 100644
index 7b1434f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# List of required files for a build.
-MSHR_URLS = []
-MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
-MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
-
-# Index values of each field details.
-MSHR_FIELD_INDEX_NAME = 0
-MSHR_FIELD_INDEX_START = 1
-MSHR_FIELD_INDEX_END = 2
-MSHR_FIELD_INDEX_TYPE = 3
-
-# Store the row details here.
-MSHR_FIELDS = {}
-
-# Details about the row.
-MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
-MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
-MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
-MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
-MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
-MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
-MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
-MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
-MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
-MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
-MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
-MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
-MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
-MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
-MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
-MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 'X(30)']
-MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
-MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
-MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
-MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
-MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
-MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
-MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
-MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
-MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
-MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
-MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
-MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
-MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
-MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
-MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
-MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
-MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
-MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
-MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
-MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
-MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
-MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
-MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
-MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
-MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
-MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
-MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
-MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
-MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
-MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']


[11/14] git commit: Added a background processes to help with parallelizing the data loading.

Posted by pr...@apache.org.
Added a background processes to help with parallelizing the data loading.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/31b3f4d9
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/31b3f4d9
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/31b3f4d9

Branch: refs/heads/master
Commit: 31b3f4d985c6344d1ccc15ce57fb57a1cb2e3011
Parents: eefadb2
Author: Preston Carman <pr...@apache.org>
Authored: Thu Oct 9 12:19:27 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Oct 9 12:19:27 2014 -0700

----------------------------------------------------------------------
 .../mrql_scripts/load_node_file.sh              | 37 ++++++++++++++++++++
 .../mrql_scripts/run_group_test.sh              | 15 +++-----
 2 files changed, 41 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/31b3f4d9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
new file mode 100755
index 0000000..048274f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${1}" ]
+then
+    echo "Please enter the node number."
+    exit
+fi
+
+
+# Add each sensor block
+cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/upload/
+gunzip disk1/hadoop/upload/all_sensors_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${1}.xml all/sensors
+rm -f disk1/hadoop/upload/all_sensors_${1}.xml
+
+# Add each station block
+cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/upload/
+gunzip disk1/hadoop/upload/all_stations_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${1}.xml all/stations
+rm -f disk1/hadoop/upload/all_stations_${1}.xml

http://git-wip-us.apache.org/repos/asf/vxquery/blob/31b3f4d9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index c34ec95..60dc255 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -33,19 +33,12 @@ hadoop fs -mkdir all/stations
 n=0
 while [ ${n} -lt ${NODES} ];
 do
-    # Add each sensor block
-    cp saved/backups/mr/all_sensors_${n}.xml.gz disk1/hadoop/upload/
-    gunzip disk1/hadoop/upload/all_sensors_${n}.xml.gz
-    hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${n}.xml all/sensors
-    rm -f disk1/hadoop/upload/all_sensors_${n}.xml
-    
-    # Add each station block
-    cp saved/backups/mr/all_stations_${n}.xml.gz disk1/hadoop/upload/
-    gunzip disk1/hadoop/upload/all_stations_${n}.xml.gz
-    hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${n}.xml all/stations
-    rm -f disk1/hadoop/upload/all_stations_${n}.xml
+    sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${n} &
 done
 
+# After all files have been uploaded, continue.
+wait
+
 
 # Start test
 sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}


[04/14] copy of all changes in exrt benchmark queries that is copyright free.

Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
new file mode 100644
index 0000000..5db090a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -0,0 +1,554 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import textwrap
+from datetime import date
+import os
+from collections import OrderedDict
+
+# Custom modules.
+from weather_config_ghcnd import *
+from weather_config_mshr import *
+from weather_download_files import *
+
+class WeatherConvertToXML:
+    
+    STATES = OrderedDict({
+        'AK': 'Alaska',
+        'AL': 'Alabama',
+        'AR': 'Arkansas',
+        'AS': 'American Samoa',
+        'AZ': 'Arizona',
+        'CA': 'California',
+        'CO': 'Colorado',
+        'CT': 'Connecticut',
+        'DC': 'District of Columbia',
+        'DE': 'Delaware',
+        'FL': 'Florida',
+        'GA': 'Georgia',
+        'GU': 'Guam',
+        'HI': 'Hawaii',
+        'IA': 'Iowa',
+        'ID': 'Idaho',
+        'IL': 'Illinois',
+        'IN': 'Indiana',
+        'KS': 'Kansas',
+        'KY': 'Kentucky',
+        'LA': 'Louisiana',
+        'MA': 'Massachusetts',
+        'MD': 'Maryland',
+        'ME': 'Maine',
+        'MI': 'Michigan',
+        'MN': 'Minnesota',
+        'MO': 'Missouri',
+        'MP': 'Northern Mariana Islands',
+        'MS': 'Mississippi',
+        'MT': 'Montana',
+        'NA': 'National',
+        'NC': 'North Carolina',
+        'ND': 'North Dakota',
+        'NE': 'Nebraska',
+        'NH': 'New Hampshire',
+        'NJ': 'New Jersey',
+        'NM': 'New Mexico',
+        'NV': 'Nevada',
+        'NY': 'New York',
+        'OH': 'Ohio',
+        'OK': 'Oklahoma',
+        'OR': 'Oregon',
+        'PA': 'Pennsylvania',
+        'PR': 'Puerto Rico',
+        'RI': 'Rhode Island',
+        'SC': 'South Carolina',
+        'SD': 'South Dakota',
+        'TN': 'Tennessee',
+        'TX': 'Texas',
+        'UT': 'Utah',
+        'VA': 'Virginia',
+        'VI': 'Virgin Islands',
+        'VT': 'Vermont',
+        'WA': 'Washington',
+        'WI': 'Wisconsin',
+        'WV': 'West Virginia',
+        'WY': 'Wyoming'
+    })
+    
+    MONTHS = [
+        "January",
+        "February",
+        "March",
+        "April",
+        "May",
+        "June",
+        "July",
+        "August",
+        "September",
+        "October",
+        "November",
+        "December"
+    ]
+    
+    token = ""
+    
+    def __init__(self, base_path, save_path, debug_output):
+        self.save_path = save_path
+        self.debug_output = debug_output
+
+        # Extra support files.
+        self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
+        self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt'
+        self.ghcnd_states = base_path + '/ghcnd-states.txt'
+        self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
+        
+        # MSHR support files.
+        self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
+        
+    def set_token(self, token):
+        self.token = token
+        
+    def get_field_from_definition(self, row, field_definition):
+        return row[(field_definition[FIELD_INDEX_START] - 1):field_definition[FIELD_INDEX_END]]
+    
+    def get_field(self, fields_array, row, index):
+        return row[(fields_array[index][FIELD_INDEX_START] - 1):fields_array[index][FIELD_INDEX_END]]
+    
+    def get_dly_field(self, row, index):
+        return self.get_field(DLY_FIELDS, row, index)
+    
+    def print_row_files(self, row):
+        for field in DLY_FIELDS:
+            print str(field[FIELD_INDEX_NAME]) + " = '" + row[(field[FIELD_INDEX_START] - 1):field[FIELD_INDEX_END]] + "'"
+    
+    def save_file(self, filename, contents):
+        file = open(filename, 'w')
+        file.write(contents)
+        file.close()
+        return filename
+    
+    def get_folder_size(self, folder_name):
+        total_size = 0
+        for dirpath, dirnames, filenames in os.walk(folder_name):
+            for f in filenames:
+                fp = os.path.join(dirpath, f)
+                total_size += os.path.getsize(fp)
+        return total_size
+
+    def process_one_month_sensor_set(self, records, page):
+        # Default
+        return 0
+    
+    def process_station_data(self, row):
+        # Default
+        return 0
+    
+    def get_base_folder(self, station_id, data_type="sensors"):
+        return build_base_save_folder(self.save_path, station_id, data_type) 
+    
+    def process_inventory_file(self):
+        print "Processing inventory file"
+        file_stream = open(self.ghcnd_inventory, 'r')
+        
+        csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT', 'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS']
+        row = file_stream.readline()
+        csv_inventory = {}
+        for row in file_stream:
+            id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID'])
+            sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT'])
+            start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR']))
+            end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR']))
+            if id in csv_inventory:
+                new_count = str(int(csv_inventory[id][2]) + 1)
+                new_max = str(max(int(csv_inventory[id][3]), (end - start)))
+                new_total = str(int(csv_inventory[id][3]) + end - start)
+                csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count, new_max, new_total]
+            else:
+                csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)]
+                
+        path = self.save_path + "/inventory.csv"
+        self.save_csv_file(path, csv_inventory, csv_header)
+    
+    def save_csv_file(self, path, csv_inventory, header):
+        csv_content = "|".join(header) + "\n"
+        for row_id in csv_inventory:
+            csv_content += "|".join(csv_inventory[row_id]) + "\n"
+        self.save_file(path, csv_content)
+        
+
+    def process_station_file(self, file_name):
+        print "Processing station file: " + file_name
+        file_stream = open(file_name, 'r')
+        
+        row = file_stream.readline()
+        return self.process_station_data(row)
+
+    def process_sensor_file(self, file_name, max_files, sensor_max=99):
+        print "Processing sensor file: " + file_name
+        file_stream = open(file_name, 'r')
+    
+        month_last = 0
+        year_last = 0
+        records = []
+        page = 0
+        sensor_count = 0
+    
+        file_count = 0
+        for row in file_stream:
+            month = self.get_dly_field(row, DLY_FIELD_MONTH)
+            year = self.get_dly_field(row, DLY_FIELD_YEAR)
+            
+            if (month_last != 0 and year_last != 0) and (sensor_count >= sensor_max or month != month_last or year != year_last):
+                # process set
+                file_count += self.process_one_month_sensor_set(records, page)
+                records = []
+                if sensor_count >= sensor_max and month == month_last and year == year_last:
+                    # start a new page.
+                    page += 1
+                else:
+                    # start over.
+                    page = 0
+                sensor_count = 0
+            
+            records.append(row)
+            sensor_count += 1
+            if max_files != 0 and file_count >= max_files:
+                # Stop creating more files after the max is reached.
+                break
+
+            month_last = month
+            year_last = year
+        
+        station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
+        data_size = self.get_folder_size(self.get_base_folder(station_id) + "/" + station_id)
+        print "Created " + str(file_count) + " XML files for a data size of " + str(data_size) + "."
+        
+        return (file_count, data_size)
+    
+    def convert_c2f(self, c):
+        return (9 / 5 * c) + 32
+    
+    def default_xml_web_service_start(self):
+        field_xml = ""
+        field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
+        return field_xml
+    
+    def default_xml_data_start(self, total_records):
+        field_xml = ""
+        field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + str(total_records) + "\">\n"
+        return field_xml
+    
+    def default_xml_station_start(self):
+        field_xml = ""
+        field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" totalCount=\"1\">\n"
+        return field_xml
+    
+    def default_xml_field_date(self, report_date, indent=2):
+        field_xml = ""
+        field_xml += self.get_indent_space(indent) + "<date>" + str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2) + "T00:00:00.000</date>\n"
+        return field_xml
+    
+    def default_xml_mshr_station_additional(self, station_id):
+        """The web service station data is generate from the MSHR data supplemented with GHCN-Daily."""
+        station_mshr_row = ""
+        stations_mshr_file = open(self.mshr_stations, 'r')
+        for line in stations_mshr_file:
+            if station_id == self.get_field_from_definition(line, MSHR_FIELDS['GHCND_ID']).strip():
+                station_mshr_row = line
+                break
+        
+        if station_mshr_row == "":
+            return ""
+
+        additional_xml = ""
+
+        county = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['COUNTY']).strip()
+        if county != "":
+            additional_xml += self.default_xml_location_labels("CNTY", "FIPS:-9999", county)
+            
+        country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
+        country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
+        if country_code != "" and country_name != "":
+            additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code, country_name)
+        
+        return additional_xml
+
+    def default_xml_location_labels(self, type, id, display_name):
+        label_xml = ""
+        label_xml += self.default_xml_start_tag("locationLabels", 2)
+        label_xml += self.default_xml_element("type", type, 3)
+        label_xml += self.default_xml_element("id", id, 3)
+        label_xml += self.default_xml_element("displayName", display_name, 3)
+        label_xml += self.default_xml_end_tag("locationLabels", 2)
+        return label_xml
+        
+
+    def default_xml_web_service_station(self, station_id):
+        """The web service station data is generate from available historical sources."""
+        station_ghcnd_row = ""
+        stations_ghcnd_file = open(self.ghcnd_stations, 'r')
+        for line in stations_ghcnd_file:
+            if station_id == self.get_field_from_definition(line, STATIONS_FIELDS['ID']):
+                station_ghcnd_row = line
+                break
+    
+        xml_station = ""
+        xml_station += self.default_xml_start_tag("station", 1)
+        
+        xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2)
+        xml_station += self.default_xml_element("displayName", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['NAME']).strip(), 2)
+        xml_station += self.default_xml_element("latitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LATITUDE']).strip(), 2)
+        xml_station += self.default_xml_element("longitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LONGITUDE']).strip(), 2)
+        
+        elevation = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['ELEVATION']).strip()
+        if elevation != "-999.9":
+            xml_station += self.default_xml_element("elevation", elevation, 2)
+        
+        state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip()
+        if state_code != "" and state_code in self.STATES:
+            xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code])
+        
+        # Add the MSHR data to the station generated information.
+        xml_station += self.default_xml_mshr_station_additional(station_id)
+            
+        xml_station += self.default_xml_end_tag("station", 1)
+        return xml_station
+        
+    def default_xml_day_reading_as_field(self, row, day):
+        day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
+        value = self.get_dly_field(row, day_index);
+        if value == "-9999":
+            return ""
+    
+        field_xml = ""
+        field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
+        if field_id in ("MDTN", "MDTX", "MNPN", "MXPN", "TMAX", "TMIN", "TOBS",):
+            # Add both the celcius and fahrenheit temperatures.
+            celcius = float(value) / 10
+            field_xml += "            <" + field_id + "_c>" + str(celcius) + "</" + field_id + "_c>\n"
+            fahrenheit = self.convert_c2f(celcius)
+            field_xml += "            <" + field_id + "_f>" + str(fahrenheit) + "</" + field_id + "_f>\n"
+        elif field_id in ("AWND", "EVAP", "PRCP", "THIC", "WESD", "WESF", "WSF1", "WSF2", "WSF5", "WSFG", "WSFI", "WSFM",):
+            # Field values that are in tenths.
+            converted_value = float(value) / 10
+            field_xml += "            <" + field_id + ">" + str(converted_value) + "</" + field_id + ">\n"
+        elif field_id in ("ACMC", "ACMH", "ACSC", "ACSH", "PSUN",):
+            # Fields is a percentage.
+            field_xml += "            <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+        elif field_id in ("FMTM", "PGTM",):
+            # Fields is a time value HHMM.
+            field_xml += "            <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+        elif field_id in ("DAEV", "DAPR", "DASF", "DATN", "DATX", "DAWM", "DWPR", "FRGB", "FRGT", "FRTH", "GAHT", "MDSF", "MDWM", "MDEV", "MDPR", "SNOW", "SNWD", "TSUN", "WDF1", "WDF2", "WDF5", "WDFG", "WDFI", "WDFM", "WDMV",):
+            # Fields with no alternation needed.
+            field_xml += "            <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+        else:
+            field_xml += "            <unknown>" + field_id + "</unknown>\n"
+            
+        # print field_xml
+        return field_xml
+    
+    def default_xml_day_reading(self, row, day, indent=2):
+        day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
+        value = self.get_dly_field(row, day_index);
+        mflag = self.get_dly_field(row, day_index + 1);
+        qflag = self.get_dly_field(row, day_index + 2);
+        sflag = self.get_dly_field(row, day_index + 3);
+
+        if value == "-9999":
+            return ""
+
+        indent_space = self.get_indent_space(indent)
+        field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
+        station_id = "GHCND:" + self.get_dly_field(row, DLY_FIELD_ID)
+    
+        field_xml = ""
+        field_xml += indent_space + "<dataType>" + field_id + "</dataType>\n"
+        field_xml += indent_space + "<station>" + station_id + "</station>\n"
+        field_xml += indent_space + "<value>" + value.strip() + "</value>\n"
+        field_xml += indent_space + "<attributes>\n"
+        field_xml += indent_space + indent_space + "<attribute>" + mflag.strip() + "</attribute>\n"
+        field_xml += indent_space + indent_space + "<attribute>" + qflag.strip() + "</attribute>\n"
+        field_xml += indent_space + indent_space + "<attribute>" + sflag.strip() + "</attribute>\n"
+        field_xml += indent_space + indent_space + "<attribute></attribute>\n"
+        field_xml += indent_space + "</attributes>\n"
+
+        # print field_xml
+        return field_xml
+    
+    def default_xml_end(self):
+        return textwrap.dedent("""\
+            </ghcnd_observation>""")
+
+    def default_xml_data_end(self):
+        return self.default_xml_end_tag("dataCollection", 0)
+
+    def default_xml_station_end(self):
+        return self.default_xml_end_tag("stationCollection", 0)
+
+    def default_xml_element(self, tag, data, indent=1):
+        return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + tag + ">\n"
+
+    def default_xml_start_tag(self, tag, indent=1):
+        return self.get_indent_space(indent) + "<" + tag + ">\n"
+
+    def default_xml_end_tag(self, tag, indent=1):
+        return self.get_indent_space(indent) + "</" + tag + ">\n"
+
+    def get_indent_space(self, indent):
+        return (" " * (4 * indent))
+    
+
+class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
+    """The web service class details how to create files similar to the NOAA web service."""
+    skip_downloading = False
+    # Station data
+    def process_station_data(self, row):
+        """Adds a single station record file either from downloading the data or generating a similar record."""
+        station_id = self.get_dly_field(row, DLY_FIELD_ID)
+        download = 0
+        if self.token is not "" and not self.skip_downloading:
+            download = self.download_station_data(station_id, self.token, True)
+            if download == 0:
+                self.skip_downloading = True
+        
+        # If not downloaded, generate.
+        if download != 0:
+            return download
+        else:
+            # Information for each daily file.
+            station_xml_file = self.default_xml_web_service_start()
+            station_xml_file += self.default_xml_station_start()
+            station_xml_file += self.default_xml_web_service_station(station_id)
+            station_xml_file += self.default_xml_station_end()
+            
+            # Remove white space.
+            station_xml_file = station_xml_file.replace("\n", "");
+            station_xml_file = station_xml_file.replace(self.get_indent_space(1), "");
+
+            # Make sure the station folder is available.
+            ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
+            if not os.path.isdir(ghcnd_xml_station_path):
+                os.makedirs(ghcnd_xml_station_path)
+                    
+            # Save XML string to disk.
+            save_file_name = ghcnd_xml_station_path + station_id + ".xml"
+            save_file_name = self.save_file(save_file_name, station_xml_file)
+    
+            if save_file_name is not "":
+                if self.debug_output:
+                    print "Wrote file: " + save_file_name
+                return 1
+            else:
+                return 0
+
+    # Station data
+    def download_station_data(self, station_id, token, reset=False):
+        """Downloads the station data from the web service."""
+        import time
+        time.sleep(2)
+        # Make sure the station folder is available.
+        ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
+        if not os.path.isdir(ghcnd_xml_station_path):
+            os.makedirs(ghcnd_xml_station_path)
+                
+        # Build download URL.
+        url = "http://www.ncdc.noaa.gov/cdo-services/services/datasets/GHCND/stations/GHCND:" + station_id + ".xml?token=" + token
+        url_file = urllib.urlopen(url)
+        station_xml_file = ""
+        while (True):
+            line = url_file.readline()
+            if not line:
+                break
+            station_xml_file += line
+        
+        if station_xml_file.find("<cdoError>") != -1:
+            if self.debug_output:
+                print "Error in station download"
+            return 0
+        
+        # Save XML string to disk.
+        save_file_name = ghcnd_xml_station_path + station_id + ".xml"
+        save_file_name = self.save_file(save_file_name, station_xml_file)
+    
+        if save_file_name is not "":
+            if self.debug_output:
+                print "Wrote file: " + save_file_name
+            return 2
+        else:
+            return 0
+
+    # Sensor data
+    def process_one_month_sensor_set(self, records, page):
+        """Generates records for a station using the web service xml layout."""
+        found_data = False        
+        year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR))
+        month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH))
+    
+        station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
+
+        # Information for each daily file.
+        count = 0
+        daily_xml_file = ""
+        
+        for day in range(1, 32):
+            try:
+                # TODO find out what is a valid python date range? 1889?
+                # Attempt to see if this is valid date.
+                report_date = date(year, month, day)
+
+                for record in records:
+                    record_xml_snip = self.default_xml_day_reading(record, report_date.day)
+                    if record_xml_snip is not "":
+                        daily_xml_file += self.default_xml_start_tag("data")
+                        daily_xml_file += self.default_xml_field_date(report_date)
+                        daily_xml_file += record_xml_snip
+                        daily_xml_file += self.default_xml_end_tag("data")
+                        found_data = True
+                        count += 1
+
+            except ValueError:
+                pass
+
+        daily_xml_file = self.default_xml_web_service_start() + self.default_xml_data_start(count) + daily_xml_file + self.default_xml_data_end()
+        daily_xml_file = daily_xml_file.replace("\n", "");
+        daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), "");
+
+        if not found_data:
+            return 0
+
+        # Make sure the station folder is available.
+        ghcnd_xml_station_path = self.get_base_folder(station_id) + "/" + station_id + "/" + str(report_date.year) + "/"
+        if not os.path.isdir(ghcnd_xml_station_path):
+            os.makedirs(ghcnd_xml_station_path)
+                
+        # Save XML string to disk.
+        save_file_name = ghcnd_xml_station_path + build_sensor_save_filename(station_id, report_date, page)
+        save_file_name = self.save_file(save_file_name, daily_xml_file)
+
+        if save_file_name is not "":
+            if self.debug_output:
+                print "Wrote file: " + save_file_name
+            return 1
+        else:
+            return 0
+
+def build_base_save_folder(save_path, station_id, data_type="sensors"):
+    # Default
+    station_prefix = station_id[:3]
+    return save_path + data_type + "/" + station_prefix + "/"
+
+def build_sensor_save_filename(station_id, report_date, page):
+    # Default
+    return station_id + "_" + str(report_date.year).zfill(4) + str(report_date.month).zfill(2) + "_" + str(page) + ".xml"
+

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
new file mode 100644
index 0000000..c8b0fa5
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import glob
+import os.path
+import linecache
+import distutils.core
+
+from weather_convert_to_xml import *
+from collections import OrderedDict
+
+# Weather data files created to manage the conversion process.
+# Allows partition and picking up where you left off.
+class WeatherDataFiles:
+
+    LARGE_FILE_ROOT_TAG = "root"
+
+    INDEX_DATA_FILE_NAME = 0
+    INDEX_DATA_SENSORS_STATUS = 1
+    INDEX_DATA_STATION_STATUS = 2
+    INDEX_DATA_FILE_COUNT = 3
+    INDEX_DATA_FOLDER_DATA = 4
+
+    DATA_FILE_START_INDEX = 0
+    DATA_FILE_EXTENSION = ".dly"
+    DATA_FILE_MISSING = "missing"
+    DATA_FILE_INITIAL = "initialized"
+    DATA_FILE_DOWNLOADED = "downloaded"
+    DATA_FILE_GENERATED = "generated"
+    SEPERATOR = ","
+    
+    type = "sensor"
+    data_reset = False
+    
+    def __init__(self, base_path, progress_file_name="/tmp/_weather_data.csv"):
+        self.base_path = base_path
+
+        self.progress_file_name = progress_file_name
+        
+        self.current = self.DATA_FILE_START_INDEX
+        self.progress_data = []
+
+    def get_file_list_iterator(self):
+        """Return the list of files one at a time."""
+        return glob.iglob(self.base_path + "/*" + self.DATA_FILE_EXTENSION)
+
+    # Save Functions
+    def build_progress_file(self, options, convert):
+        if not os.path.isfile(self.progress_file_name) or 'reset' in options:
+            # Build a new file.
+            file = open(self.progress_file_name, 'w')
+            contents = self.get_default_progress_file_csv()
+            file.write(contents)
+            file.close()
+        elif 'append' in options or 'recalculate' in options:
+            self.open_progress_data()
+            row_count = len(self.progress_data)
+            for row in range(0, row_count):
+                row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+                file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+                if self.get_file_row(file_name) < 0 and 'append' in options: 
+                    self.progress_data.append(self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL))
+                elif 'recalculate' in options:
+                    # The folder is hard coded
+                    station_id = os.path.basename(file_name).split('.')[0]
+                    folder_name = convert.get_base_folder(station_id)
+                    if os.path.exists(folder_name):
+                        row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+                        sensor_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
+                        station_status = row_contents[self.INDEX_DATA_STATION_STATUS]
+                        file_count = self.get_file_count(folder_name)
+                        data_size = self.get_folder_size(folder_name)
+                        self.progress_data[row] = self.get_progress_csv_row(file_name, sensor_status, station_status, file_count, data_size)
+                    else:
+                        self.progress_data[row] = self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
+            # Save file
+            self.close_progress_data(True)
+        self.reset()
+        
+    def copy_to_n_partitions(self, save_path, partitions, base_paths, reset):
+        """Once the initial data has been generated, the data can be copied into a set number of partitions. """
+        if (len(base_paths) == 0):
+            return
+        
+        # Initialize the partition paths.
+        partition_paths = get_disk_partition_paths(0, partitions, base_paths)
+        for path in partition_paths:
+            # Make sure the xml folder is available.
+            prepare_path(path, reset)
+
+        import fnmatch
+        import os
+        
+        # copy stations and sensors into each partition
+        current_sensor_partition = 0
+        current_station_partition = 0
+        self.open_progress_data()
+        row_count = len(self.progress_data)
+        for row in range(0, row_count):
+            row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+            file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+            station_id = os.path.basename(file_name).split('.')[0]
+               
+            # Copy sensor files
+            type = "sensors"
+            file_path = build_base_save_folder(save_path, station_id, type) + station_id
+            for root, dirnames, filenames in os.walk(file_path):
+                for filename in fnmatch.filter(filenames, '*.xml'):
+                    xml_path = os.path.join(root, filename)
+                    new_file_base = build_base_save_folder(partition_paths[current_sensor_partition], station_id, type) + station_id
+                    if not os.path.isdir(new_file_base):
+                        os.makedirs(new_file_base)
+                    shutil.copyfile(xml_path, new_file_base + "/" + filename)
+                    current_sensor_partition += 1
+                    if current_sensor_partition >= len(partition_paths):
+                        current_sensor_partition = 0
+            
+            # Copy station files
+            type = "stations"
+            file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+            new_file_base = build_base_save_folder(partition_paths[current_station_partition], station_id, type)
+            new_file_path = new_file_base + station_id + ".xml"
+            if os.path.isfile(file_path):
+                if not os.path.isdir(new_file_base):
+                    os.makedirs(new_file_base)
+                shutil.copyfile(file_path, new_file_path)
+            current_station_partition += 1
+            if current_station_partition >= len(partition_paths):
+                current_station_partition = 0
+
+    def build_to_n_partition_files(self, save_path, partitions, base_paths, reset):
+        """Once the initial data has been generated, the data can be divided into partitions 
+        and stored in single files.
+        """
+        if (len(base_paths) == 0):
+            return
+        
+        XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
+        
+        partition_paths = get_disk_partition_paths(0, partitions, base_paths)
+
+        import fnmatch
+        import os
+        
+        for path in partition_paths:
+            prepare_path(path, reset)
+
+        # Initialize the partition paths.
+        types = ["sensors", "stations"]
+        for type in types:
+            partition_files = []
+            for path in partition_paths:
+                # Make sure the xml folder is available.
+                prepare_path(path + type + "/", False)
+                partition_files.append(open(path + type + "/partition.xml", 'w'))
+                partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
+
+            # copy into each partition
+            current_partition = 0
+            self.open_progress_data()
+            row_count = len(self.progress_data)
+            for row in range(0, row_count):
+                row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+                file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+                station_id = os.path.basename(file_name).split('.')[0]
+                
+                # Copy files
+                if type == "sensors":
+                    file_path = build_base_save_folder(save_path, station_id, type) + station_id
+                    for root, dirnames, filenames in os.walk(file_path):
+                        for filename in fnmatch.filter(filenames, '*.xml'):
+                            xml_path = os.path.join(root, filename)
+                            xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+                            partition_files[current_partition].write(xml_data)
+                            current_partition += 1
+                            if current_partition >= len(partition_files):
+                                current_partition = 0
+                elif type == "stations":
+                    file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+                    xml_path = os.path.join(root, file_path)
+                    xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+                    partition_files[current_partition].write(xml_data)
+                    current_partition += 1
+                    if current_partition >= len(partition_paths):
+                        current_partition = 0
+
+            for row in range(0, len(partition_paths)):
+                partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
+                partition_files[row].close()
+
+    def get_file_row(self, file_name):
+        for i in range(0, len(self.progress_data)):
+            if self.progress_data[i].startswith(file_name):
+                return i
+        return -1
+        
+    def get_default_progress_file_csv(self):
+        contents = ""
+        for path in self.get_file_list_iterator():
+            file_name = os.path.basename(path)
+            contents += self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
+        return contents
+    
+    def print_progress_file_stats(self, convert):
+        sensor_count_missing = 0
+        sensor_count = 0
+        file_count = 0
+        data_size = 0
+        
+        sensor_count_actual = 0
+        file_count_actual = 0
+        data_size_actual = 0
+        
+        station_count_missing = 0
+        station_count_generated = 0
+        station_count_downloaded = 0
+        
+        self.open_progress_data()
+        row_count = len(self.progress_data)
+        for row in range(0, row_count):
+            row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+            if int(row_contents[self.INDEX_DATA_FILE_COUNT]) != -1 and  int(row_contents[self.INDEX_DATA_FOLDER_DATA]) != -1:
+                sensor_count += 1
+                file_count += int(row_contents[self.INDEX_DATA_FILE_COUNT])
+                data_size += int(row_contents[self.INDEX_DATA_FOLDER_DATA])
+            else:
+                sensor_count_missing += 1
+                
+            if row_contents[self.INDEX_DATA_STATION_STATUS] == "generated":
+                station_count_generated += 1
+            if row_contents[self.INDEX_DATA_STATION_STATUS] == "downloaded":
+                station_count_downloaded += 1
+            else:
+                station_count_missing += 1
+
+            file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+            station_id = os.path.basename(file_name).split('.')[0]
+            folder_name = convert.get_base_folder(station_id)
+            if os.path.exists(folder_name):
+                sensor_count_actual += 1
+                file_count_actual += self.get_file_count(folder_name)
+                data_size_actual += self.get_folder_size(folder_name)
+
+
+        print "Progress File:\t" + self.progress_file_name + "\n"
+        
+        print "CSV DETAILS OF PROCESSED SENSORS"
+        print "Number of stations:\t" + "{:,}".format(sensor_count)
+        print "Number of files:\t" + "{:,}".format(file_count)
+        print "Data size:\t\t" + "{:,}".format(data_size) + " Bytes\n"
+
+        print "CSV DETAILS OF unPROCESSED SENSORS"
+        print "Number of stations:\t" + "{:,}".format(sensor_count_missing) + "\n"
+
+        print "CSV DETAILS OF PROCESSED STATIONS"
+        print "Generated:\t\t" + "{:,}".format(station_count_generated)
+        print "Downloaded:\t\t" + "{:,}".format(station_count_downloaded)
+        print "Missing:\t\t" + "{:,}".format(station_count_missing) + "\n"
+
+        print "FOLDER DETAILS"
+        print "Number of stations:\t" + "{:,}".format(sensor_count_actual)
+        print "Number of files:\t" + "{:,}".format(file_count_actual)
+        print "Data size:\t\t" + "{:,}".format(data_size_actual) + " Bytes\n"
+
+    
+    def get_progress_csv_row(self, file_name, sensors_status, station_status, file_count=-1, data_size=-1):
+        return file_name + self.SEPERATOR + sensors_status + self.SEPERATOR + station_status + self.SEPERATOR + str(file_count) + self.SEPERATOR + str(data_size) + "\n"
+    
+    def update_file_sensor_status(self, file_name, sensors_status, file_count=-1, data_size=-1):
+        for row in range(0, len(self.progress_data)):
+            if self.progress_data[row].startswith(file_name):
+                station_status = self.progress_data[row].rsplit(self.SEPERATOR)[self.INDEX_DATA_STATION_STATUS]
+                self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
+                break
+
+        # Save the file            
+        self.close_progress_data(True)
+
+    def update_file_station_status(self, file_name, station_status):
+        for row in range(0, len(self.progress_data)):
+            if self.progress_data[row].startswith(file_name):
+                row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+                sensors_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
+                file_count = int(row_contents[self.INDEX_DATA_FILE_COUNT])
+                data_size = int(row_contents[self.INDEX_DATA_FOLDER_DATA])
+                self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
+                break
+
+        # Save the file            
+        self.close_progress_data(True)
+
+    def get_file_count(self, folder_name):
+        count = 0
+        for dirpath, dirnames, filenames in os.walk(folder_name):
+            for f in filenames:
+                count += 1
+        return count
+
+    def get_folder_size(self, folder_name):
+        total_size = 0
+        for dirpath, dirnames, filenames in os.walk(folder_name):
+            for f in filenames:
+                fp = os.path.join(dirpath, f)
+                total_size += os.path.getsize(fp)
+        return total_size
+
+    def get_station_status(self, return_value):
+        if return_value == 2:
+            return self.DATA_FILE_DOWNLOADED
+        elif return_value == 1:
+            return self.DATA_FILE_GENERATED
+        return self.DATA_FILE_MISSING
+        
+    
+    def open_progress_data(self):
+        with open(self.progress_file_name, 'r') as file:
+            self.progress_data = file.readlines()
+
+    def close_progress_data(self, force=False):
+        if len(self.progress_data) > 0 or force:
+            with open(self.progress_file_name, 'w') as file:
+                file.writelines(self.progress_data)
+
+    
+    def reset(self):
+        self.close_progress_data()
+
+        self.current = self.DATA_FILE_START_INDEX
+        self.open_progress_data()
+
+    def set_type(self, type):
+        self.type = type
+
+    def set_data_reset(self, data_reset):
+        self.data_reset = data_reset
+
+
+    # Iterator Functions
+    def __iter__(self):
+        return self
+
+    def next(self):
+        columns = []
+        while True:
+            # find a row that has not been created.
+            if self.current >= len(self.progress_data):
+                raise StopIteration
+            row = self.progress_data[self.current]
+            self.current += 1
+            columns = row.rsplit(self.SEPERATOR)
+            if self.type == "sensor" and (columns[self.INDEX_DATA_SENSORS_STATUS].strip() != self.DATA_FILE_GENERATED or self.data_reset):
+                break
+            elif self.type == "station" and (columns[self.INDEX_DATA_STATION_STATUS].strip() != self.DATA_FILE_DOWNLOADED or self.data_reset):
+                break
+        return columns[self.INDEX_DATA_FILE_NAME]
+    
+    
+# Index values of each field details.
+PARTITION_INDEX_NODE = 0
+PARTITION_INDEX_DISK = 1
+PARTITION_INDEX_VIRTUAL = 2
+PARTITION_INDEX = 3
+PARTITION_INDEX_PATH = 4
+PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path")
+
+def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"):
+    partition_paths = []
+    for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key):
+        partition_paths.append(scheme[PARTITION_INDEX_PATH])
+    return partition_paths
+
+def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"):
+    partition_scheme = []
+    for i in range(0, virtual_disk_partitions):
+        for j in range(0, len(base_paths)):
+            new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/"
+            partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path))
+    return partition_scheme
+
+def get_partition_folder(disks, partitions, index):        
+    return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
+
+def prepare_path(path, reset):
+    """Ensures the directory is available. If reset, then its a brand new directory."""
+    if os.path.isdir(path) and reset:
+        shutil.rmtree(path)
+                
+    if not os.path.isdir(path):
+        os.makedirs(path)
+
+def file_get_contents(filename):
+    with open(filename) as f:
+        return f.read()

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
new file mode 100644
index 0000000..fb59b50
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os.path
+import shutil
+import tarfile
+import urllib
+import zipfile
+
+# Custom modules.
+from weather_config_ghcnd import *
+from weather_config_mshr import *
+
+class WeatherDownloadFiles:
+
+    def __init__(self, save_path):
+        self.save_path = save_path
+        
+        if not os.path.isdir(save_path):
+            os.makedirs(save_path)
+
+
+    def download_ghcnd_files(self, reset=False):
+        """Download the complete list."""
+        for file_name in FILE_NAMES:
+            url = BASE_DOWNLOAD_URL + file_name
+            self.download_file(url, reset)
+
+    def download_mshr_files(self, reset=False):
+        for url in MSHR_URLS:
+            self.download_file(url, reset)
+
+    def download_file(self, url, reset=False):
+        """Download the file, unless it exists."""
+        file_name = self.save_path + "/" + url.split('/')[-1]
+
+        if not os.path.isfile(file_name) or reset:
+            print "Downloading: " + url
+            urllib.urlretrieve(url, file_name, report_download_status)
+            print
+
+    def unzip_ghcnd_package(self, package, reset=False):
+        """Unzip the package file, unless it exists."""
+        file_name = self.save_path + "/" + package + ".tar.gz"
+        unzipped_path = self.save_path + "/" + package
+        
+        if os.path.isdir(unzipped_path) and reset:
+            shutil.rmtree(unzipped_path)
+            
+        if not os.path.isdir(unzipped_path):
+            print "Unzipping: " + file_name
+            tar_file = tarfile.open(file_name, 'r:gz')
+            tar_file.extractall(unzipped_path)
+ 
+    def unzip_mshr_files(self, reset=False):
+        """Unzip the package file, unless it exists."""
+        for url in MSHR_URLS:
+            if url.endswith('.zip'):
+                file_name = self.save_path + "/" + url.split('/')[-1]
+                print "Unzipping: " + file_name
+                with zipfile.ZipFile(file_name, 'r') as myzip:
+                    myzip.extractall(self.save_path)
+ 
+def report_download_status(count, block, size):
+    """Report download status."""
+    line_size = 50
+    erase = "\b" * line_size
+    sys.stdout.write(erase)
+    report = get_report_line((float(count) * block / size), line_size)
+    sys.stdout.write(report)
+
+def get_report_line(percentage, line_size):
+    """Creates a string to be used in reporting the percentage done."""
+    report = ""
+    for i in range(0, line_size):
+        if (float(i) / line_size < percentage):
+            report += "="
+        else:
+            report += "-"
+    return report
+            
+def download_file_save_as(url, new_file_name, reset=False):
+    """Download the file, unless it exists."""
+    if not os.path.isfile(new_file_name) or reset:
+        print "Downloading: " + url
+        urllib.urlretrieve(url, new_file_name, report_download_status)
+        print
+

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/README.md b/vxquery-benchmark/src/main/resources/util/README.md
new file mode 100644
index 0000000..8e2a204
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/README.md
@@ -0,0 +1,28 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+Utilities for Benchmark Operations
+=====================
+
+# Introduction
+
+Helpful scripts or configuration document to work with the benchmarks.
+
+## Saxon Collection
+
+To test the data with other XQuery processors, the saxon script helps with 
+creating a collection.xml file.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
new file mode 100644
index 0000000..02f39ee
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import getopt, glob, os, sys
+ 
+def main(argv):
+    xml_folder = ""
+     
+    # Get the base folder
+    try:
+        opts, args = getopt.getopt(argv, "f:h", ["folder="])
+    except getopt.GetoptError:
+        print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
+        print 'To see a full list of options try:'
+        print '  $ python build_saxon_collection_xml.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Options:'
+            print '    -f        The base folder to create collection XML file.'
+            sys.exit()
+        elif opt in ('-f', "--folder"):
+            # check if file exists.
+            if os.path.exists(arg):
+                xml_folder = arg
+            else:
+                print 'Error: Argument must be a folder name for --folder (-f).'
+                sys.exit()
+  
+    # Required fields to run the script.
+    if xml_folder == "" or not os.path.exists(xml_folder):
+        print 'Error: The folder path option must be supplied:  --folder (-f).'
+        sys.exit()
+      
+    # find all XML files in folder
+    collection_xml = "<collection>"
+    for i in range(1, 5):
+        # Search the ith directory level.
+        search_pattern = xml_folder + ('/*' * i) + '.xml'
+        for file_path in glob.iglob(search_pattern):
+            collection_xml += '<doc href="' + str.replace(file_path, xml_folder, '') + '"/>'
+    collection_xml += "</collection>"
+          
+    # create collection XML
+    file = open('collection.xml', 'w')
+    file.write(collection_xml)
+    file.close()
+
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/diff_xml_files.py b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
new file mode 100644
index 0000000..8ad2e30
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import getopt, glob, os, sys
+ 
+def main(argv):
+    f1 = ""
+    f2 = ""
+     
+    # Get the base folder
+    try:
+        opts, args = getopt.getopt(argv, "h", ["f1=", "f2="])
+    except getopt.GetoptError:
+        print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
+        print 'To see a full list of options try:'
+        print '  $ python build_saxon_collection_xml.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Options:'
+            print '    -f        The base folder to create collection XML file.'
+            sys.exit()
+        elif opt in ('--f1'):
+            # check if file exists.
+            if os.path.exists(arg):
+                f1 = arg
+            else:
+                print 'Error: Argument must be a file name for --f1.'
+                sys.exit()
+        elif opt in ('--f2'):
+            # check if file exists.
+            if os.path.exists(arg):
+                f2 = arg
+            else:
+                print 'Error: Argument must be a file name for --f2.'
+                sys.exit()
+
+    # Required fields to run the script.
+    if f1 == "" or not os.path.exists(f1):
+        print 'Error: The file path option must be supplied:  --f1.'
+        sys.exit()
+    if f2 == "" or not os.path.exists(f2):
+        print 'Error: The file path option must be supplied:  --f2.'
+        sys.exit()
+      
+    missing_in_f1 = []
+    missing_in_f2 = []
+    found_in_both = []
+    
+    with open(f1) as f:
+        content_f1 = f.readlines()
+    set_f1 = set(content_f1)
+    
+    
+    with open(f2) as f:
+        content_f2 = f.readlines()
+    set_f2 = set(content_f2)
+    
+    missing_in_f1 = set_f2.difference(set_f1)
+    missing_in_f2 = set_f1.difference(set_f2)
+    found_in_both = set_f1.intersection(set_f2)
+    
+    print ""
+    print "Missing files in " + f1
+    for f1_name in missing_in_f1:
+        print " + " + f1_name.strip()
+
+    print ""
+    print "Missing files in " + f2
+    for f2_name in missing_in_f2:
+        print " + " + f2_name.strip()
+    
+    offset = 40
+    print ""
+    print "XML Summary"
+    print (" - Found in both:").ljust(offset) + str(len(found_in_both))
+    print (" - " + f1 + " diff set vs list:").ljust(offset) + str(len(content_f1) - len(set_f1))
+    print (" - " + f2 + " diff set vs list:").ljust(offset) + str(len(content_f2) - len(set_f2))
+    print (" - " + f1 + " missing:").ljust(offset) + str(len(missing_in_f1))
+    print (" - " + f2 + " missing:").ljust(offset) + str(len(missing_in_f2))
+    
+
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
new file mode 100644
index 0000000..1cd7939
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+import csv
+
+SEARCH_STRING = 'Average execution time:'
+
+def find_files(directory, pattern):
+    for root, dirs, files in os.walk(directory):
+        for basename in files:
+            if fnmatch.fnmatch(basename, pattern):
+                yield (root, basename)
+    
+    
+def main(argv):
+    ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+    '''
+    log_folder = ""
+    save_file = ""
+    data_type = ""
+    
+    # Get the base folder
+    try:
+        opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
+    except getopt.GetoptError:
+        print 'The file options for list_xml_files.py were not correctly specified.'
+        print 'To see a full list of options try:'
+        print '  $ python list_xml_files.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Options:'
+            print '    -f        The base folder to build XML file list.'
+            print '    -s        The save file.'
+            sys.exit()
+        elif opt in ('-f', "--folder"):
+            # check if file exists.
+            if os.path.exists(arg):
+                log_folder = arg
+            else:
+                print 'Error: Argument must be a folder name for --folder (-f).'
+                sys.exit()
+        elif opt in ('-s', "--save_file"):
+            save_file = arg
+        elif opt in ('-t', "--data_type"):
+            data_type = arg
+  
+    # Required fields to run the script.
+    if log_folder == "" or not os.path.exists(log_folder):
+        print 'Error: The folder path option must be supplied:  --folder (-f).'
+        sys.exit()
+    if save_file == "":
+        print 'Error: The folder path option must be supplied:  --save_file (-s).'
+        sys.exit()
+      
+    list_xml_csv = ''
+    with open(save_file, 'w') as outfile:
+        csvfile = csv.writer(outfile)
+        for path, filename in find_files(log_folder, '*.log'):
+            # Only write out a specific type of data xml documents found in a specific path.
+            with open(path + "/" + filename) as infile:
+                folders = path.replace(log_folder, "")
+                for line in infile:
+                    # Skip the root tags.
+                    if line.startswith(SEARCH_STRING):
+                        time_split = line.split(" ")
+                        name_split = filename.split(".")
+                        folder_split = folders.split("/")
+
+                        # Build data row
+                        row = folder_split
+                        row.append(name_split[0])
+                        row.append(time_split[3])
+                        row.append(name_split[2])
+                        csvfile.writerow(row)
+        
+          
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/list_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/list_xml_files.py b/vxquery-benchmark/src/main/resources/util/list_xml_files.py
new file mode 100644
index 0000000..750a95e
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/list_xml_files.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+
+def find_files(directory, pattern):
+    for root, dirs, files in os.walk(directory):
+        for basename in files:
+            if fnmatch.fnmatch(basename, pattern):
+                yield (root, basename)
+    
+    
+def main(argv):
+    ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+    '''
+    xml_folder = ""
+     
+    # Get the base folder
+    try:
+        opts, args = getopt.getopt(argv, "f:h", ["folder="])
+    except getopt.GetoptError:
+        print 'The file options for list_xml_files.py were not correctly specified.'
+        print 'To see a full list of options try:'
+        print '  $ python list_xml_files.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Options:'
+            print '    -f        The base folder to build XML file list.'
+            sys.exit()
+        elif opt in ('-f', "--folder"):
+            # check if file exists.
+            if os.path.exists(arg):
+                xml_folder = arg
+            else:
+                print 'Error: Argument must be a folder name for --folder (-f).'
+                sys.exit()
+  
+    # Required fields to run the script.
+    if xml_folder == "" or not os.path.exists(xml_folder):
+        print 'Error: The folder path option must be supplied:  --folder (-f).'
+        sys.exit()
+      
+    list_xml_csv = ''
+    for path, filename in find_files(xml_folder, '*.xml'):
+        list_xml_csv += filename + "\n"
+        #list_xml_csv += filename + "," + path + "\n"
+          
+    # create collection XML
+    file = open('list_xml.csv', 'w')
+    file.write(list_xml_csv)
+    file.close()
+
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
new file mode 100644
index 0000000..2df026b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+
+XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + "\n"
+XML_SUFFIX = '</root>' + "\n"
+
+def find_files(directory, pattern):
+    for root, dirs, files in os.walk(directory, followlinks=True):
+        for basename in files:
+            if fnmatch.fnmatch(basename, pattern):
+                yield (root, basename)
+    
+    
+def main(argv):
+    ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+    '''
+    xml_folder = ""
+    save_file = ""
+    data_type = ""
+     
+    # Get the base folder
+    try:
+        opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
+    except getopt.GetoptError:
+        print 'The file options for list_xml_files.py were not correctly specified.'
+        print 'To see a full list of options try:'
+        print '  $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Options:'
+            print '    -f        The base folder to build XML file list.'
+            print '    -s        The save file.'
+            sys.exit()
+        elif opt in ('-f', "--folder"):
+            # check if file exists.
+            if os.path.exists(arg):
+                xml_folder = arg
+            else:
+                print 'Error: Argument must be a folder name for --folder (-f).'
+                sys.exit()
+        elif opt in ('-s', "--save_file"):
+            save_file = arg
+        elif opt in ('-t', "--data_type"):
+            data_type = arg
+  
+    # Required fields to run the script.
+    if xml_folder == "" or not os.path.exists(xml_folder):
+        print 'Error: The folder path option must be supplied:  --folder (-f).'
+        sys.exit()
+    if save_file == "":
+        print 'Error: The folder path option must be supplied:  --save_file (-s).'
+        sys.exit()
+      
+    list_xml_csv = ''
+    with open(save_file, 'w') as outfile:
+        outfile.write(XML_PREFIX)
+        for path, filename in find_files(xml_folder, '*.xml'):
+            # Only write out a specific type of data xml documents found in a specific path.
+            if data_type in path:
+                with open(path + "/" + filename) as infile:
+                    for line in infile:
+                        # Skip the root tags.
+                        if line != XML_PREFIX and line != XML_SUFFIX:
+                            outfile.write(line)
+        outfile.write(XML_SUFFIX)
+          
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
new file mode 100644
index 0000000..d0621eb
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Function List :)
+(: VXQuery function list in csv with arguments and return types :)
+let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml"
+let $r :=
+    for $f in fn:doc($list)/functions/function
+        let $pl := 
+            for $p in $f/param
+            return $p/@type
+        return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
+return fn:string-join($r , '|')
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
new file mode 100644
index 0000000..f485807
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Function List :)
+(: VXQuery function list in csv with arguments and return types :)
+let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml"
+let $r :=
+    for $f in fn:doc($list)/operators/operator
+        let $pl := 
+            for $p in $f/param
+            return $p/@type
+        return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
+return fn:string-join($r , '|')
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
----------------------------------------------------------------------
diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
index 8451bd5..c0ca612 100644
--- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
+++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
@@ -375,6 +375,7 @@ public class VXQuery {
         ccConfig.clientNetPort = 39000;
         ccConfig.clusterNetIpAddress = "127.0.0.1";
         ccConfig.clusterNetPort = 39001;
+        ccConfig.httpPort = 39002;
         ccConfig.profileDumpPeriod = 10000;
         cc = new ClusterControllerService(ccConfig);
         cc.start();

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/pom.xml b/vxquery-server/pom.xml
index ef8f348..6c99712 100644
--- a/vxquery-server/pom.xml
+++ b/vxquery-server/pom.xml
@@ -47,6 +47,10 @@
             <configuration>
               <programs>
                 <program>
+                  <mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass>
+                  <name>vxqueryshutdown</name>
+                </program>
+                <program>
                   <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
                   <name>vxquerycc</name>
                 </program>

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_actions.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_actions.py b/vxquery-server/src/main/resources/scripts/cluster_actions.py
index a7cda17..deeee33 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_actions.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_actions.py
@@ -62,6 +62,10 @@ class ClusterActions:
         time.sleep(5)
         self.start_all_ncs()
     
+    def stop_cluster(self):
+        machine = self.ci.get_master_node_machine()
+        self.stop_cc_and_all_ncs(machine)
+    
     def stop(self):
         self.stop_all_ncs()
         time.sleep(2)
@@ -109,16 +113,22 @@ class ClusterActions:
     
     def start_cc(self, machine):
         print "Start Cluster Controller."
-        print "  " + machine.get_id() + " " + machine.get_ip() + ":" + machine.get_port()
-        command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_ip() + " \"" + machine.get_port() + "\" \"" + machine.get_java_opts() + "\""
+        print "  " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port()
+        command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
         self.run_remote_command(machine.get_username(), machine.get_id(), command)
     
     def start_nc(self, machine, cc):
         print "Start Node Controller."
         print "  " + machine.get_id() + " " + machine.get_ip()
-        command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_ip() + " \"" + cc.get_port() + "\" \"" + machine.get_java_opts() + "\""
+        command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_client_ip() + " \"" + cc.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
         self.run_remote_command(machine.get_username(), machine.get_id(), command)
 
+    def stop_cc_and_all_ncs(self, machine):
+        print "Stop Cluster and Node Controllers."
+        print "  " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port()
+        command = "./vxquery-server/target/appassembler/bin/stopcluster.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
+        self.run_remote_command(machine.get_username(), machine.get_id(), command)
+    
     def stop_cc(self, machine):
         print "Stop Cluster Controller."
         print "  " + machine.get_id() + " " + machine.get_ip()

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_cli.py b/vxquery-server/src/main/resources/scripts/cluster_cli.py
index 089ad08..bd5efa6 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_cli.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_cli.py
@@ -39,10 +39,10 @@ def main(argv):
             sys.exit()
         elif opt in ('-a', "--action"):
             # check if file exists.
-            if arg in ('deploy', 'start', 'stop'):
+            if arg in ('deploy', 'start', 'stop', 'kill'):
                 action = arg
             else:
-                print 'Error: Argument must be a string ("deploy", "start", or "stop") for --action (-a).'
+                print 'Error: Argument must be a string ("deploy", "start", "stop", or "kill") for --action (-a).'
                 sys.exit()
         elif opt in ('-c', "--cluster"):
             # check if file exists.
@@ -72,6 +72,8 @@ def main(argv):
     if action == 'start':
         cluster.start()
     elif action == 'stop':
+        cluster.stop_cluster()
+    elif action == 'kill':
         cluster.stop()
     elif action == 'deploy':
         if deploy_path != "":

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_information.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_information.py b/vxquery-server/src/main/resources/scripts/cluster_information.py
index 677204b..94b231d 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_information.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_information.py
@@ -32,12 +32,13 @@ class ClusterInformation:
         master_node = self.config.getElementsByTagName("master_node")[0]
         id = NodeXmlReader.get_cluster_id(master_node)
         ip = NodeXmlReader.get_cluster_ip(master_node)
-        port = NodeXmlReader.get_cluster_port(master_node)
+        client_ip = NodeXmlReader.get_client_ip(master_node)
+        client_port = NodeXmlReader.get_client_port(master_node)
         java_opts = NodeXmlReader.get_java_opts(master_node)
         if java_opts is "":
             java_opts = self.get_java_opts()
         username = self.get_username()
-        return Machine(id, ip, username, port, java_opts)
+        return Machine(id, ip, username, client_ip, client_port, java_opts)
 
     def get_node_machine_list(self):
         nodes = []
@@ -48,7 +49,7 @@ class ClusterInformation:
             java_opts = NodeXmlReader.get_java_opts(node)
             if java_opts is "":
                 java_opts = self.get_java_opts()
-            nodes.append(Machine(id, ip, username, "", java_opts))
+            nodes.append(Machine(id, ip, username, "", "", java_opts))
         return nodes
 
 class NodeXmlReader(object):
@@ -64,8 +65,12 @@ class NodeXmlReader(object):
         return get_tag_text(node, "cluster_ip")
 
     @staticmethod
-    def get_cluster_port(node):
-        return get_tag_text(node, "cluster_port")
+    def get_client_ip(node):
+        return get_tag_text(node, "client_ip")
+
+    @staticmethod
+    def get_client_port(node):
+        return get_tag_text(node, "client_port")
 
     @staticmethod
     def get_java_opts(node):
@@ -90,11 +95,12 @@ class Machine:
     log_path = ""
     port = ""
     
-    def __init__(self, id, ip, username, port="", java_opts=""):
+    def __init__(self, id, ip, username, client_ip="", client_port="", java_opts=""):
         self.id = id
         self.ip = ip
         self.username = username
-        self.port = port
+        self.client_ip = client_ip
+        self.client_port = client_port
         self.java_opts = java_opts
     
     def get_id(self):
@@ -106,8 +112,11 @@ class Machine:
     def get_java_opts(self):
         return self.java_opts
     
-    def get_port(self):
-        return self.port
+    def get_client_ip(self):
+        return self.client_ip
+    
+    def get_client_port(self):
+        return self.client_port
     
     def get_username(self):
         return self.username

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startcc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/startcc.sh b/vxquery-server/src/main/resources/scripts/startcc.sh
index 002055c..dd1e5be 100755
--- a/vxquery-server/src/main/resources/scripts/startcc.sh
+++ b/vxquery-server/src/main/resources/scripts/startcc.sh
@@ -23,7 +23,7 @@ CCHOST=$1
 CCPORT=$2
 J_OPTS=$3
 
-#Export JAVA_HOME
+# Export JAVA_HOME
 export JAVA_HOME=${JAVA_HOME}
 
 # java opts added parameters
@@ -43,8 +43,8 @@ mkdir -p ${CCLOGS_DIR}
 CC_OPTIONS=" -client-net-ip-address ${CCHOST} -cluster-net-ip-address ${CCHOST} "
 if [ ! -z "${CCPORT}" ]
 then
-    CC_OPTIONS=" ${CC_OPTIONS} -cluster-net-port ${CCPORT} "
+    CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} "
 fi
 
-#Launch hyracks cc script without toplogy
+# Launch hyracks cc script without toplogy
 ${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerycc ${CC_OPTIONS} &> ${CCLOGS_DIR}/cc_$(date +%Y%m%d%H%M).log &

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startnc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/startnc.sh b/vxquery-server/src/main/resources/scripts/startnc.sh
index c2bda3c..260512e 100755
--- a/vxquery-server/src/main/resources/scripts/startnc.sh
+++ b/vxquery-server/src/main/resources/scripts/startnc.sh
@@ -25,7 +25,7 @@ CCHOST=$3
 CCPORT=$4
 J_OPTS=$5
 
-#Set JAVA_HOME
+# Set JAVA_HOME
 export JAVA_HOME=$JAVA_HOME
 
 # java opts added parameters
@@ -49,5 +49,5 @@ then
 fi
 
 
-#Launch hyracks nc
+# Launch hyracks nc
 ${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerync ${NC_OPTIONS} &> ${NCLOGS_DIR}/nc_$(date +%Y%m%d%H%M).log &

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcc.sh b/vxquery-server/src/main/resources/scripts/stopcc.sh
index 3290ec6..f2b6883 100755
--- a/vxquery-server/src/main/resources/scripts/stopcc.sh
+++ b/vxquery-server/src/main/resources/scripts/stopcc.sh
@@ -21,8 +21,7 @@ hostname
 
 USER=$1
 
-#Kill process
-#Kill process
+# Kill process
 PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerycc'|awk '{print $2}'`
 
 if [ "$PID" == "" ]; then

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcluster.sh b/vxquery-server/src/main/resources/scripts/stopcluster.sh
new file mode 100755
index 0000000..238da7f
--- /dev/null
+++ b/vxquery-server/src/main/resources/scripts/stopcluster.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+CCHOST=$1
+CCPORT=$2
+J_OPTS=$3
+
+# Export JAVA_HOME
+export JAVA_HOME=${JAVA_HOME}
+
+# java opts added parameters
+if [ ! -z "${J_OPTS}" ]
+then
+    JAVA_OPTS="${JAVA_OPTS} ${J_OPTS}"
+    export JAVA_OPTS
+fi
+
+VXQUERY_HOME=`pwd`
+CCLOGS_DIR=${VXQUERY_HOME}/logs
+
+# logs dir
+mkdir -p ${CCLOGS_DIR}
+
+# Set up the options for the cc.
+CC_OPTIONS=" -client-net-ip-address ${CCHOST} "
+if [ ! -z "${CCPORT}" ]
+then
+    CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} "
+fi
+
+# Launch hyracks cc script without toplogy
+echo "${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &"
+${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopnc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopnc.sh b/vxquery-server/src/main/resources/scripts/stopnc.sh
index 56ffc66..8f29de5 100755
--- a/vxquery-server/src/main/resources/scripts/stopnc.sh
+++ b/vxquery-server/src/main/resources/scripts/stopnc.sh
@@ -21,7 +21,7 @@ hostname
 
 USER=$1
 
-#Kill process
+# Kill process
 PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerync'|awk '{print $2}'`
 
 if [ "$PID" == "" ]; then


[03/14] git commit: Remove benchmark files to allow easy copy from other branch.

Posted by pr...@apache.org.
Remove benchmark files to allow easy copy from other branch.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/3167366d
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/3167366d
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/3167366d

Branch: refs/heads/master
Commit: 3167366d02d6170a99d2f8b7818d07347cc32049
Parents: 7f73fe9
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 14:36:53 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 14:36:53 2014 -0700

----------------------------------------------------------------------
 .../main/resources/noaa-ghcn-daily/README.md    |  40 --
 .../other_systems/mrql_gsn/q00.mrql             |  23 -
 .../other_systems/mrql_gsn/q01.mrql             |  21 -
 .../other_systems/mrql_gsn/q02.mrql             |  24 -
 .../other_systems/mrql_gsn/q03.mrql             |  22 -
 .../other_systems/mrql_gsn/q04.mrql             |  24 -
 .../other_systems/mrql_gsn/q04_sensor.mrql      |  21 -
 .../other_systems/mrql_gsn/q04_station.mrql     |  24 -
 .../other_systems/mrql_gsn/q05.mrql             |  27 -
 .../other_systems/mrql_gsn/q05_sensor.mrql      |  23 -
 .../other_systems/mrql_gsn/q05_station.mrql     |  23 -
 .../other_systems/mrql_gsn/q06.mrql             |  26 -
 .../other_systems/mrql_gsn/q06_sensor.mrql      |  23 -
 .../other_systems/mrql_gsn/q06_station.mrql     |  23 -
 .../other_systems/mrql_gsn/q07.mrql             |  26 -
 .../other_systems/mrql_gsn/q07_join_count.mrql  |  26 -
 .../other_systems/mrql_gsn/q07_tmax.mrql        |  22 -
 .../other_systems/mrql_gsn/q07_tmin.mrql        |  22 -
 .../other_systems/mrql_hcn/q00.mrql             |  23 -
 .../other_systems/mrql_hcn/q01.mrql             |  21 -
 .../other_systems/mrql_hcn/q02.mrql             |  24 -
 .../other_systems/mrql_hcn/q03.mrql             |  22 -
 .../other_systems/mrql_hcn/q04.mrql             |  24 -
 .../other_systems/mrql_hcn/q04_sensor.mrql      |  21 -
 .../other_systems/mrql_hcn/q04_station.mrql     |  24 -
 .../other_systems/mrql_hcn/q05.mrql             |  27 -
 .../other_systems/mrql_hcn/q05_sensor.mrql      |  23 -
 .../other_systems/mrql_hcn/q05_station.mrql     |  23 -
 .../other_systems/mrql_hcn/q06.mrql             |  28 -
 .../other_systems/mrql_hcn/q06_sensor.mrql      |  23 -
 .../other_systems/mrql_hcn/q06_station.mrql     |  23 -
 .../other_systems/mrql_hcn/q07.mrql             |  26 -
 .../other_systems/mrql_hcn/q07_join_count.mrql  |  26 -
 .../other_systems/mrql_hcn/q07_tmax.mrql        |  22 -
 .../other_systems/mrql_hcn/q07_tmin.mrql        |  22 -
 .../other_systems/mrql_test/q00.mrql            |  23 -
 .../other_systems/mrql_test/q01.mrql            |  21 -
 .../other_systems/mrql_test/q02.mrql            |  24 -
 .../other_systems/mrql_test/q03.mrql            |  22 -
 .../other_systems/mrql_test/q04.mrql            |  24 -
 .../other_systems/mrql_test/q05.mrql            |  27 -
 .../other_systems/mrql_test/q06.mrql            |  27 -
 .../other_systems/mrql_test/q07.mrql            |  26 -
 .../noaa-ghcn-daily/other_systems/saxon/q04.xq  |  30 -
 .../noaa-ghcn-daily/other_systems/saxon/q05.xq  |  33 --
 .../noaa-ghcn-daily/other_systems/saxon/q06.xq  |  30 -
 .../noaa-ghcn-daily/queries/no_result.xq        |  24 -
 .../resources/noaa-ghcn-daily/queries/q00.xq    |  31 --
 .../resources/noaa-ghcn-daily/queries/q01.xq    |  25 -
 .../resources/noaa-ghcn-daily/queries/q02.xq    |  30 -
 .../resources/noaa-ghcn-daily/queries/q03.xq    |  25 -
 .../resources/noaa-ghcn-daily/queries/q04.xq    |  30 -
 .../noaa-ghcn-daily/queries/q04_sensor.xq       |  27 -
 .../noaa-ghcn-daily/queries/q04_station.xq      |  25 -
 .../resources/noaa-ghcn-daily/queries/q05.xq    |  33 --
 .../noaa-ghcn-daily/queries/q05_sensor.xq       |  28 -
 .../noaa-ghcn-daily/queries/q05_station.xq      |  25 -
 .../resources/noaa-ghcn-daily/queries/q06.xq    |  30 -
 .../noaa-ghcn-daily/queries/q06_sensor.xq       |  27 -
 .../noaa-ghcn-daily/queries/q06_station.xq      |  24 -
 .../resources/noaa-ghcn-daily/queries/q07.xq    |  33 --
 .../noaa-ghcn-daily/queries/q07_tmax.xq         |  26 -
 .../noaa-ghcn-daily/queries/q07_tmin.xq         |  26 -
 .../noaa-ghcn-daily/queries/sensor_count.xq     |  24 -
 .../noaa-ghcn-daily/queries/station_count.xq    |  24 -
 .../resources/noaa-ghcn-daily/scripts/README.md |  51 --
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    |  67 ---
 .../scripts/run_benchmark_cluster.sh            |  90 ---
 .../noaa-ghcn-daily/scripts/run_group_test.sh   |  51 --
 .../noaa-ghcn-daily/scripts/run_mrql_tests.sh   |  42 --
 .../scripts/weather_benchmark.py                | 377 -------------
 .../noaa-ghcn-daily/scripts/weather_cli.py      | 236 --------
 .../noaa-ghcn-daily/scripts/weather_config.py   | 134 -----
 .../scripts/weather_config_ghcnd.py             |  95 ----
 .../scripts/weather_config_mshr.py              |  78 ---
 .../scripts/weather_convert_to_xml.py           | 554 -------------------
 .../scripts/weather_data_files.py               | 416 --------------
 .../scripts/weather_download_files.py           | 102 ----
 .../src/main/resources/util/README.md           |  28 -
 .../util/build_saxon_collection_xml.py          |  63 ---
 .../resources/util/find_averages_in_logs.py     |  97 ----
 .../src/main/resources/util/log_top.sh          |  35 --
 .../src/main/resources/util/merge_xml_files.py  |  88 ---
 .../main/resources/util/vxquery_functions.xq    |  27 -
 .../main/resources/util/vxquery_operators.xq    |  27 -
 85 files changed, 4304 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
deleted file mode 100644
index 9b512dd..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-
-NOAA GHCN-Daily Benchmark
-=====================
-
-# Introduction
-
-The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY) 
-.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor 
-readings. Using the RSS feed as a template, the GHCN-DAILY historical 
-information is used to generate past RSS feed XML documents. The process allows 
-testing on a large set of information with out having to continually monitor 
-the weather.gov site for all the weather details for years.
-
-# Detailed Description
-
-Detailed GHDN-DAILY information: 
-<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
-
-# Folders
-
- * conf
- * other_systems
- * queries
- * scripts
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql
deleted file mode 100644
index 8e83879..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-where text(r.station) = "GHCND:USW00012836"
-    and toInt(substring(text(r.date), 0, 4)) >= 2003
-    and toInt(substring(text(r.date), 5, 7)) = 12
-    and toInt(substring(text(r.date), 8, 10)) = 25
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
deleted file mode 100644
index 643c47b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-where text(r.dataType) = "AWND"
-    and toFloat(text(r.value)) > 491.744
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql
deleted file mode 100644
index 8dc9c4b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-sum(
-    select (toInt(text(r.value)))
-    from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where text(r.station) = "GHCND:USW00014771"
-        and toInt(substring(text(r.date), 0, 4)) = 1999
-        and text(r.dataType) = "PRCP"
-) / 10
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql
deleted file mode 100644
index f81e914..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-max(
-    select (toInt(text(r.value)))
-    from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where text(r.dataType) = "TMAX"
-) / 10
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
deleted file mode 100644
index aaa8599..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (sensors)
-from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
-    stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
-    l in stations.locationLabels
-where text(stations.id) = text(sensors.station) 
-    and text(sensors.date) = "1976-07-04T00:00:00.000"
-    and text(l.displayName) = "Washington"
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql
deleted file mode 100644
index 69fb35a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-where text(r.date) = "1976-07-04T00:00:00.000"
-  and text(r.dataType) = "TMAX"
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql
deleted file mode 100644
index 7c3c8bb..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (r)
-    from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
-        t in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
-        l in t.locationLabels
-    where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql
deleted file mode 100644
index ca5a558..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-min(
-    select (toInt(text(sensors.value)))
-    from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
-        stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
-        l in stations.locationLabels
-    where text(stations.id) = text(sensors.station) 
-        and toInt(substring(text(sensors.date), 0, 4)) = 2001
-        and text(sensors.dataType) = "TMIN"
-        and text(l.id) = "FIPS:US"
-) / 10
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql
deleted file mode 100644
index 95ea398..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (toInt(text(r.value)))
-    from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where toInt(substring(text(r.date), 0, 4)) = 2001
-        and text(r.dataType) = "TMIN"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql
deleted file mode 100644
index 1f41e1e..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (t)
-    from t in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
-        l in t.locationLabels
-    where text(l.id) = "FIPS:US"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
deleted file mode 100644
index b46049f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (n, d, v)
-from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
-    d in sensors.date,
-    v in sensors.value,
-    stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
-    n in stations.displayName,
-where text(stations.id) = text(sensors.station) 
-    and toInt(substring(text(d), 0, 4)) = 2000
-    and text(sensors.dataType) = "TMAX"
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql
deleted file mode 100644
index 2b21287..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (r.date, r.value)
-    from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where toInt(substring(text(r.date), 0, 4)) = 2000
-        and text(r.dataType) = "TMAX"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql
deleted file mode 100644
index fbc1ea3..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (t.displayName)
-    from t in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
-        l in t.locationLabels
-    where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql
deleted file mode 100644
index 5d94e6c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-avg(
-    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
-    from rtmax in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
-        rtmin in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where text(rtmax.date) = text(rtmin.date)
-        and text(rtmax.station) = text(rtmin.station)
-        and text(rtmax.dataType) = "TMAX"
-        and text(rtmin.dataType) = "TMIN"
-) / 10
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql
deleted file mode 100644
index 1c3a87b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
-    from rtmax in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
-        rtmin in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where text(rtmax.date) = text(rtmin.date)
-        and text(rtmax.station) = text(rtmin.station)
-        and text(r.dataType) = "TMAX"
-        and text(r.dataType) = "TMIN"
-)
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql
deleted file mode 100644
index 6863dc1..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (rtmax)
-    from rtmax in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where text(r.dataType) = "TMAX"
-)
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql
deleted file mode 100644
index f2b98b7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (rtmin)
-    from rtmin in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-    where text(r.dataType) = "TMIN"
-)
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql
deleted file mode 100644
index c3e9ddd..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-where text(r.station) = "GHCND:USW00012836"
-    and toInt(substring(text(r.date), 0, 4)) >= 2003
-    and toInt(substring(text(r.date), 5, 7)) = 12
-    and toInt(substring(text(r.date), 8, 10)) = 25
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql
deleted file mode 100644
index 206b391..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-where text(r.dataType) = "AWND"
-    and toInt(text(r.value)) > 491.744
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql
deleted file mode 100644
index d80a259..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-sum(
-    select (toInt(text(r.value)))
-    from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where text(r.station) = "GHCND:USW00014771"
-        and toInt(substring(text(r.date), 0, 4)) = 1999
-        and text(r.dataType) = "PRCP"
-) / 10
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql
deleted file mode 100644
index 0ac697e..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-max(
-    select (toInt(text(r.value)))
-    from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where text(r.dataType) = "TMAX"
-) / 10
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql
deleted file mode 100644
index 1084afb..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
-    t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
-    l in t.locationLabels
-where text(t.id) = text(r.station) 
-    and text(r.date) = "1976-07-04T00:00:00.000"
-    and text(l.displayName) = "WASHINGTON"
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql
deleted file mode 100644
index 7f4b065..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-where text(r.date) = "1976-07-04T00:00:00.000"
-  and text(r.dataType) = "TMAX"
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql
deleted file mode 100644
index 53013f5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (r)
-    from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
-        t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
-        l in t.locationLabels
-    where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql
deleted file mode 100644
index 6dcd0b7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-min(
-  select (toInt(text(r.value)))
-  from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
-    t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
-    l in t.locationLabels
-  where text(t.id) = text(r.station) 
-    and toInt(substring(text(r.date), 0, 4)) = 2001
-    and text(r.dataType) = "TMIN"
-    and text(l.id) = "FIPS:US"
-) / 10
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql
deleted file mode 100644
index c481632..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (toInt(text(r.value)))
-    from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where toInt(substring(text(r.date), 0, 4)) = 2001
-      and text(r.dataType) = "TMIN"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql
deleted file mode 100644
index a040b7c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (t)
-    from t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
-        l in t.locationLabels
-    where text(l.id) = "FIPS:US"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql
deleted file mode 100644
index a68ab4a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (n, d, v)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
-    d in r.date,
-    v in r.value,
-    t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
-    n in t.displayName,
-    l in t.locationLabels
-where text(t.id) = text(r.station) 
-    and toInt(substring(text(d), 0, 4)) = 2000
-    and text(r.dataType) = "TMAX"
-    and text(l.displayName) = "WASHINGTON"
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql
deleted file mode 100644
index e5e26cd..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (r.date, r.value)
-    from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where toInt(substring(text(r.date), 0, 4)) = 2000
-        and text(r.dataType) = "TMAX"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql
deleted file mode 100644
index 99aaed6..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (t.displayName)
-    from t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
-        l in t.locationLabels
-    where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql
deleted file mode 100644
index e6d680c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-avg(
-    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
-    from rtmax in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
-        rtmin in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where text(rtmax.date) = text(rtmin.date)
-        and text(rtmax.station) = text(rtmin.station)
-        and text(rtmax.dataType) = "TMAX"
-        and text(rtmin.dataType) = "TMIN"
-) / 10
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql
deleted file mode 100644
index 5d83f85..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
-    from rtmax in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
-        rtmin in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where text(rtmax.date) = text(rtmin.date)
-        and text(rtmax.station) = text(rtmin.station)
-        and text(r.dataType) = "TMAX"
-        and text(r.dataType) = "TMIN"
-)
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql
deleted file mode 100644
index 579c855..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (rtmax)
-    from rtmax in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where text(r.dataType) = "TMAX"
-)
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql
deleted file mode 100644
index bf5f423..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
-    select (rtmin)
-    from rtmin in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-    where text(r.dataType) = "TMIN"
-)
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
deleted file mode 100644
index d4bd10b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
-where text(r.station) = "GHCND:AS000000003"
-    and toInt(substring(text(r.date), 0, 4)) >= 2000
-    and toInt(substring(text(r.date), 5, 7)) = 3
-    and toInt(substring(text(r.date), 8, 10)) = 3
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
deleted file mode 100644
index 8f100df..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
-where text(r.dataType) = "AWND"
-    and toFloat(text(r.value)) > 491.744
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
deleted file mode 100644
index 3bba05f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-sum(
-    select (toFloat(text(r.value)))
-    from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
-    where text(r.station) = "GHCND:US000000002"
-        and toInt(substring(text(r.date), 0, 4)) = 2002
-        and text(r.dataType) = "PRCP"
-) / 10
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
deleted file mode 100644
index a6f9afe..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-max(
-    select (toFloat(text(r.value)))
-    from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
-    where text(r.dataType) = "TMAX"
-) / 10
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
deleted file mode 100644
index 4d24016..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (sensors)
-from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
-    stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
-    l in stations.locationLabels
-where text(stations.id) = text(sensors.station) 
-    and text(sensors.date) = "2002-02-02T00:00:00.000"
-    and text(l.displayName) = "State 1"
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
deleted file mode 100644
index 0fdb641..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-min(
-    select (toFloat(text(sensors.value)))
-    from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
-        stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
-        l in stations.locationLabels
-    where text(stations.id) = text(sensors.station) 
-        and toInt(substring(text(sensors.date), 0, 4)) = 2001
-        and text(sensors.dataType) = "TMIN"
-        and text(l.id) = "FIPS:US"
-) / 10
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
deleted file mode 100644
index c44f70c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (n, d, v)
-from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
-    d in sensors.date,
-    v in sensors.value,
-    stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
-    n in stations.displayName,
-    l in stations.locationLabels
-where text(stations.id) = text(sensors.station) 
-    and toInt(substring(text(d), 0, 4)) = 2002
-    and text(sensors.dataType) = "TMAX"
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
deleted file mode 100644
index 9046181..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-avg(
-    select (toFloat(text(rtmax.value))-toFloat(text(rtmin.value)))
-    from rtmax in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
-        rtmin in source(xml, "sample_xml/nano_sensors.xml", {"data"})
-    where text(rtmax.date) = text(rtmin.date)
-        and text(rtmax.station) = text(rtmin.station)
-        and text(rtmax.dataType) = "TMAX"
-        and text(rtmin.dataType) = "TMIN"
-) / 10
-;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
deleted file mode 100644
index 8f513ce..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find all the weather readings for King county for a specific day    :)
-(: 1976/7/4.                                                                  :)
-let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
-for $r in collection($sensor_collection)/root/dataCollection/data
-
-let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
-for $s in collection($station_collection)/root/stationCollection/station
-
-where $s/id eq $r/station 
-    and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
-    and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
-return $r

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
deleted file mode 100644
index 5f452c0..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Find the lowest recorded temperature (TMIN) in the state of Oregon for     :)
-(: 2001.                                                                      :)
-fn:min(
-    let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
-    for $r in collection($sensor_collection)/root/dataCollection/data
-        
-    let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
-    for $s in collection($station_collection)/root/stationCollection/station
-    
-    where $s/id eq $r/station
-        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
-        and $r/dataType eq "TMIN" 
-        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
-    return $r/value
-) div 10

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
deleted file mode 100644
index 2c02bc7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find the highest recorded temperature (TMAX) for each station for each     :)
-(: day over the year 2000.                                                    :)
-let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
-for $r in collection($sensor_collection)/root/dataCollection/data
-
-let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
-for $s in collection($station_collection)/root/stationCollection/station
-
-where $s/id eq $r/station
-    and $r/dataType eq "TMAX" 
-    and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
-return ($s/displayName, $r/date, $r/value)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
deleted file mode 100644
index c1363e3..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: 
-VXQuery used to only parse all files with out producing results.
-:)
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-where fn:false()
-return $r

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
deleted file mode 100644
index 5006a21..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
+++ /dev/null
@@ -1,31 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-(: 
-XQuery Filter Query
--------------------
-See historical data for Key West International Airport, FL (USW00012836)
-station by selecting  the weather readings for December 25 over the last 
-10 years. 
-:)
-let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($collection)/dataCollection/data
-let $datetime := xs:dateTime(fn:data($r/date))
-where $r/station eq "GHCND:USW00012836" 
-    and fn:year-from-dateTime($datetime) ge 2003
-    and fn:month-from-dateTime($datetime) eq 12 
-    and fn:day-from-dateTime($datetime) eq 25
-return $r
\ No newline at end of file


[10/14] git commit: Getting MRQL scripts ready for prime time.

Posted by pr...@apache.org.
Getting MRQL scripts ready for prime time.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/eefadb25
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/eefadb25
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/eefadb25

Branch: refs/heads/master
Commit: eefadb254a1a30a191d01778fa62c2d7d9582593
Parents: 17bedfa
Author: Preston Carman <pr...@apache.org>
Authored: Wed Oct 8 15:11:40 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Wed Oct 8 15:11:40 2014 -0700

----------------------------------------------------------------------
 .../noaa-ghcn-daily/other_systems/mrql/q06.mrql |  2 +-
 .../other_systems/mrql_scripts/clear.sh         |  9 ++++
 .../mrql_scripts/run_group_test.sh              | 55 ++++++++++++++++++++
 .../mrql_scripts/run_mrql_tests.sh              |  5 +-
 .../noaa-ghcn-daily/scripts/run_mrql_tests.sh   | 42 ---------------
 5 files changed, 69 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
index 583a5b9..a50dfe2 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
@@ -19,7 +19,7 @@ from sensors in source(xml, args[0], {"data"}),
     d in sensors.date,
     v in sensors.value,
     stations in source(xml, args[1], {"station"}),
-    n in stations.displayName,
+    n in stations.displayName
 where text(stations.id) = text(sensors.station) 
     and toInt(substring(text(d), 0, 4)) = 2000
     and text(sensors.dataType) = "TMAX"

http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
index da7cabe..b775de2 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
@@ -16,4 +16,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
+# Hadoop data reset
 hadoop namenode -format
+
+# Remove data
+rm -rf disk1/hadoop/data
+rm -rf disk2/hadoop/data
+rm -rf disk1/hadoop/tmp
+rm -rf disk1/hadoop/logs
+

http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
new file mode 100755
index 0000000..c34ec95
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+NODES=2
+REPEAT=1
+
+# Start Hadoop
+sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
+
+# Prepare hadoop file system
+hadoop fs -mkdir all
+hadoop fs -mkdir all/sensors
+hadoop fs -mkdir all/stations
+
+
+# Upload test data
+n=0
+while [ ${n} -lt ${NODES} ];
+do
+    # Add each sensor block
+    cp saved/backups/mr/all_sensors_${n}.xml.gz disk1/hadoop/upload/
+    gunzip disk1/hadoop/upload/all_sensors_${n}.xml.gz
+    hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${n}.xml all/sensors
+    rm -f disk1/hadoop/upload/all_sensors_${n}.xml
+    
+    # Add each station block
+    cp saved/backups/mr/all_stations_${n}.xml.gz disk1/hadoop/upload/
+    gunzip disk1/hadoop/upload/all_stations_${n}.xml.gz
+    hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${n}.xml all/stations
+    rm -f disk1/hadoop/upload/all_stations_${n}.xml
+done
+
+
+# Start test
+sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
+
+
+# Stop Hadoop
+sh saved/hadoop/hadoop-1.2.1/bin/stop-all.sh
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
index 10ab4d9..1e512e1 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -25,11 +25,14 @@ REPEAT=${3}
 DATASET="all"
 
 
+# Make log folder
+mkdir -p ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/
+
 for j in $(find ${1} -name '*q??.mrql')
 do
     date
     echo "Running MRQL query: ${j}"
-    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${NODES} ${j} ${DATASET}/sensors.xml ${DATASET}/stations.xml >> ~/disk1/weather_data/mrql/query_logs/$(basename "${j}").log 2>&1; done; 
+    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${NODES} ${j} ${DATASET}/sensors/ ${DATASET}/stations/ >> ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/$(basename "${j}").log 2>&1; done; 
 done
 
 

http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
deleted file mode 100755
index a6788be..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
-REPEAT=${1}
-DATASET="hcn"
-
-for n in `seq 0 7`
-#for n in 0
-do
-    date
-    echo "Running q0${n} on ${DATASET} for MRQL."
-    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; 
-done
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="MRQL Tests Finished (${DATASET})"
-    EMAIL="ecarm002@ucr.edu"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all MRQL tests on ${DATASET}.
-    EOM
-else
-    echo "No mail command to use."
-fi;
\ No newline at end of file


[05/14] copy of all changes in exrt benchmark queries that is copyright free.

Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
new file mode 100644
index 0000000..15b5160
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
@@ -0,0 +1,29 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the weather sensor readings on 1976-07-04.
+:)
+count(
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+    where $date eq xs:date("1976-07-04")
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
new file mode 100644
index 0000000..d21fe37
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the weather stations for Washington state.
+:)
+count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
new file mode 100644
index 0000000..c95f3f5
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Aggregate Query :)
+(: Find the lowest recorded temperature (TMIN) in the United States for     :)
+(: 2001.                                                                      :)
+fn:min(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    where $s/id eq $r/station
+        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+        and $r/dataType eq "TMIN" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+    return $r/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
new file mode 100644
index 0000000..76e3458
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Aggregate Query
+-------------------
+Find the lowest recorded temperature (TMIN) in the United States for 2001.
+:)
+fn:count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    where $s/id eq $r/station
+        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+        and $r/dataType eq "TMIN" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
new file mode 100644
index 0000000..3b1046b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
@@ -0,0 +1,31 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: 
+XQuery Join Aggregate Query
+-------------------
+Count all sensor readings for TMIN in 2001.
+:)
+count(
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+    where $r/dataType eq "TMIN" 
+        and fn:year-from-date($date) eq 2001
+    return $r/value
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
new file mode 100644
index 0000000..7c2a7ef
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: 
+XQuery Join Aggregate Query
+-------------------
+Count all stations in the United States.
+:)
+count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    where (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
new file mode 100644
index 0000000..5c8ed54
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Query :)
+(: Find the highest recorded temperature (TMAX) for each station for each     :)
+(: day over the year 2000.                                                    :)
+let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+for $s in collection($station_collection)/stationCollection/station
+
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
+
+where $s/id eq $r/station
+    and $r/dataType eq "TMAX" 
+    and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+return ($s/displayName, $r/date, $r/value)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
new file mode 100644
index 0000000..bad6406
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
@@ -0,0 +1,34 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the highest recorded temperature (TMAX) for each station for each day over the year 2000.
+:)
+fn:count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    where $s/id eq $r/station
+        and $r/dataType eq "TMAX" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
new file mode 100644
index 0000000..54d81c6
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
@@ -0,0 +1,29 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count max temperature (TMAX) readings for 2000-01-01.
+:)
+count(
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    where $r/dataType eq "TMAX" 
+    	and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
new file mode 100644
index 0000000..c94dc78
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the stations.
+:)
+count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
new file mode 100644
index 0000000..5b1f2ac
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:avg(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and $r_max/dataType eq "TMAX"
+    return $r_max/value - $r_min/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
new file mode 100644
index 0000000..0ddada0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+fn:count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and $r_max/dataType eq "TMAX"
+    return $r_max
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
new file mode 100644
index 0000000..0b5511f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMAX.
+:)
+count(
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    where $r_max/dataType eq "TMAX"
+    return $r_max
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
new file mode 100644
index 0000000..fda029a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    where $r_min/dataType eq "TMIN"
+    return $r_min
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
new file mode 100644
index 0000000..58bea51
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
@@ -0,0 +1,51 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+Weather Data Conversion To XML
+=====================
+
+# Introduction
+
+The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY) 
+.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor 
+readings. Using the RSS feed as a template, the GHCN-DAILY historical 
+information is used to generate past RSS feed XML documents. The process allows 
+testing on a large set of information with out having to continually monitor 
+the weather.gov site for all the weather details for years.
+
+# Detailed Description
+
+Detailed GHDN-DAILY information: 
+<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
+
+The process takes a save folder for the data. The folder contains a several 
+folders:
+
+ - all_xml_files (The generated xml files for a given package)
+ - downloads (All files taken from the NOAA HTTP site)
+ - dataset-[name] (all files related to a single dataset)
+     
+     
+# Examples commands
+
+Building
+
+
+Partitioning
+python weather_cli.py -x weather_example.xml
+
+Linking
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
new file mode 100644
index 0000000..2fb0af0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
@@ -0,0 +1 @@
+java.util.logging.ConsoleHandler.level=OFF
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
new file mode 100755
index 0000000..88339bd
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run all the queries and save a log. 
+# First argument: Supply the folder which houses all the queries (recursive).
+# Second argument: adds options to the VXQuery CLI.
+#
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
+#
+REPEAT=5
+IGNORE=2
+FRAME_SIZE=$((8*1024))
+BUFFER_SIZE=$((32*1024*1024))
+JOIN_HASH_SIZE=-1
+
+if [ -z "${1}" ]
+then
+    echo "Please supply a directory for query files to be found."
+    exit
+fi
+
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+
+for j in $(find ${1} -name '*q??.xq')
+do
+    if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]] 
+    then
+        date
+        echo "Running query: ${j}"
+        log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
+        log_base_path=$(dirname ${j/queries/query_logs})
+        mkdir -p ${log_base_path}
+        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+        echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
+        echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+        echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
+        fi;
+done
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Tests Finished"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in folder ${1}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
new file mode 100755
index 0000000..98ab04b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run all the queries and save a log. 
+# First argument: Supply the folder which houses all the queries (recursive).
+# Second argument: adds options to the VXQuery CLI.
+#
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
+#
+CLUSTER="uci"
+REPEAT=5
+FRAME_SIZE=$((8*1024))
+BUFFER_SIZE=$((32*1024*1024))
+#JOIN_HASH_SIZE=$((256*1024*1024))
+JOIN_HASH_SIZE=-1
+
+if [ -z "${1}" ]
+then
+    echo "Please supply a directory for query files to be found."
+    exit
+fi
+
+if [ -z "${2}" ]
+then
+    echo "Please the number of nodes (start at 0)."
+    exit
+fi
+
+# Run queries for the specified number of nodes.
+echo "Starting ${2} cluster nodes"
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
+
+# wait for cluster to finish setting up  
+sleep 5
+
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+
+for j in $(find ${1} -name '*q??.xq')
+do
+    # Only work with i nodes.
+    if [[ "${j}" =~ "${2}nodes" ]]
+    then
+        # Only run for specified queries.
+        if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]]
+        then
+            date
+            echo "Running query: ${j}"
+            log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
+            log_base_path=$(dirname ${j/queries/query_logs})
+            mkdir -p ${log_base_path}
+            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+            echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
+            echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+            echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
+        fi;
+    fi;
+done
+    
+# Stop cluster.
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Cluster Tests Finished"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
new file mode 100755
index 0000000..58976b7
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+DATASET="dataset-hcn-d2"
+cluster_ip=${1}
+base_weather_folder=${2}
+
+for n in 7 6 5 3 4 2 1 0
+do
+    #for t in "batch_scale_out" "speed_up"
+    for t in "batch_scale_out"
+    #for t in "speed_up"
+    do 
+        for p in 2 
+        do 
+            for c in 4
+            do 
+                echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
+                sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
+            done
+        done
+    done
+done
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Group Tests Finished"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in the predefined group for ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
new file mode 100755
index 0000000..a6788be
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
+REPEAT=${1}
+DATASET="hcn"
+
+for n in `seq 0 7`
+#for n in 0
+do
+    date
+    echo "Running q0${n} on ${DATASET} for MRQL."
+    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; 
+done
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="MRQL Tests Finished (${DATASET})"
+    EMAIL="ecarm002@ucr.edu"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all MRQL tests on ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
new file mode 100644
index 0000000..4f81f86
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os.path
+import linecache
+import distutils.core
+import fileinput
+import socket
+
+from weather_config import *
+from weather_data_files import *
+
+# Weather data files created to manage the conversion process.
+# Allows partition and picking up where you left off.
+#
+# benchmark_name/
+#   data/
+#   queries/
+#   logs/
+class WeatherBenchmark:
+
+    DATA_LINKS_FOLDER = "data_links/"
+    LARGE_FILE_ROOT_TAG = WeatherDataFiles.LARGE_FILE_ROOT_TAG
+    QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/"
+    QUERY_MASTER_FOLDER = "../queries/"
+    QUERY_FILE_LIST = [
+                       "q00.xq",
+                       "q01.xq",
+                       "q02.xq",
+                       "q03.xq",
+                       "q04.xq",
+                       "q05.xq",
+                       "q06.xq",
+                       "q07.xq"
+                       ] 
+    QUERY_UTILITY_LIST = [
+                          "no_result.xq",
+                          "sensor_count.xq",
+                          "station_count.xq",
+                          "q04_sensor.xq",
+                          "q04_station.xq",
+                          "q05_sensor.xq",
+                          "q05_station.xq",
+                          "q06_sensor.xq",
+                          "q06_station.xq",
+                          "q07_tmin.xq",
+                          "q07_tmax.xq",
+                          ] 
+    BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] 
+    BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] 
+    QUERY_COLLECTIONS = ["sensors", "stations"]
+
+    SEPERATOR = "|"
+    
+    def __init__(self, base_paths, partitions, dataset, nodes):
+        self.base_paths = base_paths
+        self.partitions = partitions
+        self.dataset = dataset
+        self.nodes = nodes
+        
+    def print_partition_scheme(self):
+        if (len(self.base_paths) == 0):
+            return
+        for test in self.dataset.get_tests():
+            if test in self.BENCHMARK_LOCAL_TESTS:
+                self.print_local_partition_schemes(test)
+            elif test in self.BENCHMARK_CLUSTER_TESTS:
+                self.print_cluster_partition_schemes(test)
+            else:
+                print "Unknown test."
+                exit()
+            
+    def print_local_partition_schemes(self, test):
+        node_index = 0
+        virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
+        for p in self.partitions:
+            scheme = self.get_local_partition_scheme(test, p)
+            self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
+        
+    def print_cluster_partition_schemes(self, test):
+        node_index = self.get_current_node_index()
+        virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+        for p in self.partitions:
+            scheme = self.get_cluster_partition_scheme(test, p)
+            self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
+        
+    def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id):
+        print
+        print "---------------- Partition Scheme --------------------"
+        print "    Test: " + test
+        print "    Virtual Partitions: " + str(virtual_partitions)
+        print "    Disks: " + str(len(self.base_paths))
+        print "    Partitions: " + str(partitions)
+        print "    Node Id: " + str(node_id)
+        
+        if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
+            folder_length = len(scheme[0][3]) + 5
+            row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} {:<" + str(folder_length) + "}"
+            HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
+            print row_format.format(*HEADER)
+            for row in scheme:
+                print row_format.format(*row)
+            print
+        else:
+            print "    Scheme is EMPTY."
+
+    def get_local_partition_scheme(self, test, partition):
+        scheme = []
+        virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
+        data_schemes = get_disk_partition_scheme(0, virtual_disk_partitions, self.base_paths)
+        link_base_schemes = get_disk_partition_scheme(0, partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+
+        # Match link paths to real data paths.
+        group_size = len(data_schemes) / len(link_base_schemes)
+        for d in range(len(self.base_paths)):
+            offset = 0
+            for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+                if d == link_disk:
+                    # Only consider a single disk at a time.
+                    for data_node, data_disk, data_virtual, data_index, data_path in data_schemes:
+                        if test == "local_speed_up" and data_disk == link_disk \
+                                and offset <= data_index and data_index < offset + group_size:
+                            scheme.append([data_disk, data_index, link_index, data_path, link_path])
+                        elif test == "local_batch_scale_out" and data_disk == link_disk \
+                                and data_index == link_index:
+                            scheme.append([data_disk, data_index, link_index, data_path, link_path])
+                    offset += group_size
+        return scheme
+    
+    def get_cluster_partition_scheme(self, test, partition):
+        node_index = self.get_current_node_index()
+        if node_index == -1:
+            print "Unknown host."
+            return 
+        
+        scheme = []
+        virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+        data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths)
+        link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+
+        # Match link paths to real data paths.
+        for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+            # Prep
+            if test == "speed_up":
+                group_size = virtual_disk_partitions / (link_node + 1) / partition
+            elif test == "batch_scale_out":
+                group_size = virtual_disk_partitions / len(self.nodes) / partition
+            else:
+                print "Unknown test."
+                return
+            
+            node_offset = group_size * node_index * partition
+            node_offset += group_size * link_index
+            has_data = True
+            if link_node < node_index:
+                has_data = False
+    
+            # Make links
+            for date_node, data_disk, data_virtual, data_index, data_path in data_schemes:
+                if has_data and data_disk == link_disk \
+                        and node_offset <= data_index and data_index < node_offset + group_size:
+                    scheme.append([link_disk, data_index, link_index, data_path, link_path])
+            scheme.append([link_disk, -1, link_index, "", link_path])
+        return scheme
+    
+    def build_data_links(self, reset):
+        if (len(self.base_paths) == 0):
+            return
+        if reset:
+            shutil.rmtree(self.base_paths[0] + self.DATA_LINKS_FOLDER)
+        for test in self.dataset.get_tests():
+            if test in self.BENCHMARK_LOCAL_TESTS:
+                for i in self.partitions:
+                    scheme = self.get_local_partition_scheme(test, i)
+                    self.build_data_links_scheme(scheme)
+                if 1 in self.partitions and len(self.base_paths) > 1:
+                    scheme = self.build_data_links_local_zero_partition(test)
+                    self.build_data_links_scheme(scheme)
+            elif test in self.BENCHMARK_CLUSTER_TESTS:
+                for i in self.partitions:
+                    scheme = self.get_cluster_partition_scheme(test, i)
+                    self.build_data_links_scheme(scheme)
+                if 1 in self.partitions and len(self.base_paths) > 1:
+                    scheme = self.build_data_links_cluster_zero_partition(test)
+                    self.build_data_links_scheme(scheme)
+            else:
+                print "Unknown test."
+                exit()
+    
+    def build_data_links_scheme(self, scheme):
+        '''Build all the data links based on the scheme information.'''
+        for (data_disk, data_index, partition, data_path, link_path) in scheme:
+            self.add_collection_links_for(data_path, link_path, data_index)
+    
+    def build_data_links_cluster_zero_partition(self, test):
+        '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
+        scheme = []
+        link_base_schemes = get_cluster_link_scheme(len(self.nodes), 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
+        for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+            new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test + "/" + str(link_node) + "nodes")
+            scheme.append([0, link_disk, 0, link_path, new_link_path])
+        return scheme
+
+    def build_data_links_local_zero_partition(self, test):
+        '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
+        scheme = []
+        index = 0
+        link_base_schemes = get_disk_partition_scheme(0, 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
+        for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+            if test == "local_batch_scale_out" and index > 0:
+                continue
+            new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test)
+            scheme.append([0, index, 0, link_path, new_link_path])
+            index += 1
+        return scheme
+
+    def get_zero_partition_path(self, node, key):
+        '''Return a partition path for the zero partition.'''
+        base_path = self.base_paths[0]
+        new_link_path = get_disk_partition_scheme(node, 1, [base_path], key)[0][PARTITION_INDEX_PATH]
+        return new_link_path.replace("p1", "p0")
+        
+    def get_current_node_index(self):
+        found = False
+        node_index = 0
+        for machine in self.nodes:
+            if socket.gethostname().startswith(machine.get_node_name()):
+                found = True
+                break
+            node_index += 1
+    
+        if found:
+            return node_index
+        else:
+            return -1
+    
+    def add_collection_links_for(self, real_path, link_path, index):
+        for collection in self.QUERY_COLLECTIONS:
+            collection_path = link_path + collection + "/"
+            collection_index = collection_path + "index" + str(index)
+            if not os.path.isdir(collection_path):
+                os.makedirs(collection_path)
+            if index >= 0:
+                if os.path.islink(collection_index):
+                    os.unlink(collection_index)
+                os.symlink(real_path + collection + "/", collection_index)
+            
+    def copy_query_files(self, reset):
+        for test in self.dataset.get_tests():
+            if test in self.BENCHMARK_LOCAL_TESTS:
+                self.copy_local_query_files(test, reset)
+            elif test in self.BENCHMARK_CLUSTER_TESTS:
+                self.copy_cluster_query_files(test, reset)
+            else:
+                print "Unknown test."
+                exit()
+            
+    def copy_cluster_query_files(self, test, reset):
+        '''Determine the data_link path for cluster query files and copy with
+        new location for collection.'''
+        if 1 in self.partitions and len(self.base_paths) > 1:
+            for n in range(len(self.nodes)):
+                query_path = get_cluster_query_path(self.base_paths, test, 0, n)
+                prepare_path(query_path, reset)
+            
+                # Copy query files.
+                new_link_path = self.get_zero_partition_path(n, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+                self.copy_and_replace_query(query_path, [new_link_path])
+        for n in range(len(self.nodes)):
+            for p in self.partitions:
+                query_path = get_cluster_query_path(self.base_paths, test, p, n)
+                prepare_path(query_path, reset)
+            
+                # Copy query files.
+                partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+                self.copy_and_replace_query(query_path, partition_paths)
+
+    def copy_local_query_files(self, test, reset):
+        '''Determine the data_link path for local query files and copy with
+        new location for collection.'''
+        if 1 in self.partitions and len(self.base_paths) > 1:
+            query_path = get_local_query_path(self.base_paths, test, 0)
+            prepare_path(query_path, reset)
+    
+            # Copy query files.
+            new_link_path = self.get_zero_partition_path(0, self.DATA_LINKS_FOLDER + test)
+            self.copy_and_replace_query(query_path, [new_link_path])
+        for p in self.partitions:
+            query_path = get_local_query_path(self.base_paths, test, p)
+            prepare_path(query_path, reset)
+    
+            # Copy query files.
+            partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
+            self.copy_and_replace_query(query_path, partition_paths)
+
+    def copy_and_replace_query(self, query_path, replacement_list):
+        '''Copy the query files over to the query_path and replace the path
+        for the where the collection data is located.'''
+        for query_file in self.QUERY_FILE_LIST + self.QUERY_UTILITY_LIST:
+            shutil.copyfile(self.QUERY_MASTER_FOLDER + query_file, query_path + query_file)
+        
+            # Make a search replace for each collection.
+            for collection in self.QUERY_COLLECTIONS:
+                replacement_list_with_type = []
+                for replace in replacement_list:
+                    replacement_list_with_type.append(replace + collection)
+
+                replace_string = self.SEPERATOR.join(replacement_list_with_type)
+                for line in fileinput.input(query_path + query_file, True):
+                    sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string))
+                    
+            # Make a search replace for partition type.
+            if self.dataset.get_partition_type() == "large_files":
+                for line in fileinput.input(query_path + query_file, True):
+                    sys.stdout.write(line.replace("/stationCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/stationCollection"))
+                for line in fileinput.input(query_path + query_file, True):
+                    sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
+                    
+    def get_number_of_slices_per_disk(self):
+        if len(self.dataset.get_tests()) == 0:
+            print "No test has been defined in config file."
+        else:
+            for test in self.dataset.get_tests():
+                if test in self.BENCHMARK_LOCAL_TESTS:
+                    return get_local_virtual_disk_partitions(self.partitions)
+                elif test in self.BENCHMARK_CLUSTER_TESTS:
+                    return get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+                else:
+                    print "Unknown test."
+                    exit()
+
+def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):        
+    link_paths = []
+    for n in range(0, nodes):
+        new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
+        link_paths.extend(new_link_path)
+    return link_paths
+
+def get_local_query_path(base_paths, test, partition):        
+    return base_paths[0] + "queries/" + test + "/" + get_local_query_folder(len(base_paths), partition) + "/"
+
+def get_local_query_folder(disks, partitions):        
+    return "d" + str(disks) + "_p" + str(partitions)
+
+def get_cluster_query_path(base_paths, test, partition, nodes):        
+    return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/"
+
+def get_cluster_virtual_disk_partitions(nodes, partitions):
+    vp = get_local_virtual_disk_partitions(partitions)
+    vn = calculate_partitions(range(1, len(nodes)+1, 1))
+    return vp * vn
+
+def get_local_virtual_disk_partitions(partitions):
+    return calculate_partitions(partitions)
+
+def calculate_partitions(list):
+    x = 1
+    for i in list:
+        if x % i != 0:
+            if i % x == 0:
+                x = i
+            else:
+                x *= i
+    return x

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
new file mode 100644
index 0000000..eeae25c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys, getopt
+
+# Custom modules.
+from weather_data_files import *
+from weather_download_files import *
+from weather_convert_to_xml import *
+from weather_config import *
+from weather_benchmark import *
+
+DEBUG_OUTPUT = False
+
+#
+# Weather conversion for GHCN-DAILY files to xml.
+#
+# http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
+#
+def main(argv):
+    append = False
+    max_records = 0
+    process_file_name = ""
+    reset = False
+    section = "all"
+    token = ""
+    update = False
+    xml_config_path = ""
+    
+    try:
+        opts, args = getopt.getopt(argv, "af:hl:m:ruvw:x:", ["file=", "locality=", "max_station_files=", "web_service=", "xml_config="])
+    except getopt.GetoptError:
+        print 'The file options for weather_cli.py were not correctly specified.'
+        print 'To see a full list of options try:'
+        print '  $ python weather_cli.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Converting weather daily files to xml options:'
+            print '    -a        Append the results to the progress file.'
+            print '    -f (str)  The file name of a specific station to process.'
+            print '              * Helpful when testing a single stations XML file output.'
+            print '    -l (str)  Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).'
+            print '    -m (int)  Limits the number of files created for each station.'
+            print '              * Helpful when testing to make sure all elements are supported for each station.'
+            print '              Alternate form: --max_station_files=(int)'
+            print '    -r        Reset the build process. (For one section or all sections depending on other parameters.)'
+            print '    -u        Recalculate the file count and data size for each data source file.'
+            print '    -v        Extra debug information.'
+            print '    -w (str)  Downloads the station XML file form the web service.'
+            print '    -x (str)  XML config file for weather data.'
+            sys.exit()
+        elif opt in ('-a', "--append"):
+            append = True
+        elif opt in ('-f', "--file"):
+            # check if file exists.
+            if os.path.exists(arg):
+                process_file_name = arg
+            else:
+                print 'Error: Argument must be a file name for --file (-f).'
+                sys.exit()
+        elif opt in ('-l', "--locality"):
+            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "partition_scheme", "test_links", "queries", "inventory", "statistics"):
+                section = arg
+            else:
+                print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
+                sys.exit()
+        elif opt in ('-m', "--max_station_files"):
+            if arg.isdigit():
+                max_records = int(arg)
+            else:
+                print 'Error: Argument must be an integer for --max_station_files (-m).'
+                sys.exit()
+        elif opt == '-r':
+            reset = True
+        elif opt == '-u':
+            update = True
+        elif opt == '-v':
+            global DEBUG_OUTPUT
+            DEBUG_OUTPUT = True
+        elif opt == '-w':
+            # check if file exists.
+            if arg is not "":
+                token = arg
+            else:
+                print 'Error: Argument must be a string --web_service (-w).'
+                sys.exit()
+        elif opt in ('-x', "--xml_config"):
+            # check if file exists.
+            if os.path.exists(arg):
+                xml_config_path = arg
+            else:
+                print 'Error: Argument must be a xml file for --xml_config (-x).'
+                sys.exit()
+
+    # Required fields to run the script.
+    if xml_config_path == "" or not os.path.exists(xml_config_path):
+        print 'Error: The xml config option must be supplied: --xml_config (-x).'
+        sys.exit()
+    config = WeatherConfig(xml_config_path)
+    
+    # Required fields to run the script.
+    if config.get_save_path() == "" or not os.path.exists(config.get_save_path()):
+        print 'Error: The save directory option must be supplied in the config file.'
+        sys.exit()
+
+    # Set up downloads folder.
+    download_path = config.get_save_path() + "/downloads"
+    if section in ("all", "download"):
+        print 'Processing the download section.'
+        download = WeatherDownloadFiles(download_path)
+        download.download_ghcnd_files(reset)
+        download.download_mshr_files(reset)
+
+        # Unzip the required file.
+        download.unzip_ghcnd_package(config.get_package(), reset)
+        download.unzip_mshr_files(reset)
+
+
+    # Create some basic paths for save files and references.
+    ghcnd_data_dly_path = download_path + '/' + config.get_package() + '/' + config.get_package()
+    xml_data_save_path = config.get_save_path() + '/all_xml_files/'
+
+    # Make sure the xml folder is available.
+    if not os.path.isdir(xml_data_save_path):
+        os.makedirs(xml_data_save_path)
+
+    # Set up the XML build objects.
+    convert = WeatherWebServiceMonthlyXMLFile(download_path, xml_data_save_path, DEBUG_OUTPUT)
+    progress_file = xml_data_save_path + "_data_progress.csv"
+    data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
+    if section in ("all", "progress_file"):
+        print 'Processing the progress_file section.'
+        options = list()
+        if append:
+            options.append('append')
+        if update:
+            options.append('recalculate')
+        if reset:
+            options.append('reset')
+        data.build_progress_file(options, convert)
+    
+    if section in ("all", "sensor_build"):
+        print 'Processing the sensor_build section.'
+        if process_file_name is not "":
+            # process a single file
+            if os.path.exists(process_file_name):
+                (file_count, data_size) = convert.process_sensor_file(process_file_name, max_records, 4)
+                data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
+            else:
+                data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_MISSING)
+        else:
+            # process directory
+            data.reset()
+            data.set_type("sensor")
+            data.set_data_reset(reset)
+            for file_name in data:
+                file_path = ghcnd_data_dly_path + '/' + file_name
+                if os.path.exists(file_path):
+                    (file_count, data_size) = convert.process_sensor_file(file_path, max_records, 4)
+                    data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
+                else:
+                    data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
+                
+    if section in ("all", "station_build"):
+        print 'Processing the station_build section.'
+        data.reset()
+        data.set_type("station")
+        data.set_data_reset(reset)
+        if token is not "":
+            convert.set_token(token)
+        for file_name in data: 
+            file_path = ghcnd_data_dly_path + '/' + file_name
+            if os.path.exists(file_path):
+                return_status = convert.process_station_file(file_path)
+                status = data.get_station_status(return_status)
+                data.update_file_station_status(file_name, status)
+            else:
+                data.update_file_station_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
+                    
+    for dataset in config.get_dataset_list():
+        # Set up the setting for each dataset.
+        dataset_folder = "/dataset-" + dataset.get_name()
+        progress_file = config.get_save_path() + dataset_folder + "/_data_progress.csv"
+        data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
+
+        base_paths = []
+        for paths in dataset.get_save_paths():
+            base_paths.append(paths + dataset_folder + "/")
+        benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list())
+        
+        if section in ("all", "partition", "partition_scheme"):
+            slices = benchmark.get_number_of_slices_per_disk()
+            print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
+            data.reset()
+            if section == "partition_scheme":
+                benchmark.print_partition_scheme()
+            else:
+                if dataset.get_partition_type() == "large_files":
+                    data.build_to_n_partition_files(xml_data_save_path, slices, base_paths, reset)
+                else:
+                    data.copy_to_n_partitions(xml_data_save_path, slices, base_paths, reset)
+    
+        if section in ("all", "test_links"):
+            # TODO determine current node 
+            print 'Processing the test links section (' + dataset.get_name() + ').'
+            benchmark.print_partition_scheme()
+            benchmark.build_data_links(reset)
+
+        if section in ("all", "queries"):
+            print 'Processing the queries section (' + dataset.get_name() + ').'
+            benchmark.copy_query_files(reset)
+    
+    if section in ("inventory"):
+        print 'Processing the inventory section.'
+        convert.process_inventory_file()
+                  
+#     if section in ("statistics"):
+#         print 'Processing the statistics section.'
+#         data.print_progress_file_stats(convert)
+                  
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
new file mode 100644
index 0000000..80607b8
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from xml.dom.minidom import parse
+
+class WeatherConfig:
+    def __init__(self, config_xml_file):
+        self.config_xml_file = config_xml_file
+        
+        self.config = parse(self.config_xml_file)
+
+    def get_save_path(self):
+        return self.get_text(self.config.getElementsByTagName("save_path")[0])
+
+    def get_package(self):
+        return self.get_text(self.config.getElementsByTagName("package")[0])
+
+    def get_node_machine_list(self):
+        nodes = []
+        for node in self.config.getElementsByTagName("node"):
+            id = self.get_node_name(node)
+            ip = self.get_node_ip(node)
+            nodes.append(Machine(id, ip))
+        return nodes
+
+    def get_dataset_list(self):
+        nodes = []
+        for node in self.config.getElementsByTagName("dataset"):
+            name = self.get_dataset_name(node)
+            save_paths = self.get_dataset_save_paths(node)
+            partition_type = self.get_dataset_partition_type(node)
+            partitions = self.get_dataset_partitions(node)
+            tests = self.get_dataset_tests(node)
+            nodes.append(Dataset(name, save_paths, partition_type, partitions, tests))
+        return nodes
+
+
+    # --------------------------------------------------------------------------
+    # Node Specific Functions
+    # --------------------------------------------------------------------------
+    def get_node_ip(self, node):
+        return self.get_text(node.getElementsByTagName("cluster_ip")[0])
+
+    def get_node_name(self, node):
+        return self.get_text(node.getElementsByTagName("id")[0])
+
+    
+    # --------------------------------------------------------------------------
+    # Dataset Specific Functions
+    # --------------------------------------------------------------------------
+    def get_dataset_name(self, node):
+        return self.get_text(node.getElementsByTagName("name")[0])
+
+    def get_dataset_save_paths(self, node):
+        paths = []
+        for item in node.getElementsByTagName("save_path"):
+            paths.append(self.get_text(item))
+        return paths
+
+    def get_dataset_partition_type(self, node):
+        return self.get_text(node.getElementsByTagName("partition_type")[0])
+
+    def get_dataset_partitions(self, node):
+        paths = []
+        for item in node.getElementsByTagName("partitions_per_path"):
+            paths.append(int(self.get_text(item)))
+        return paths
+
+    def get_dataset_tests(self, node):
+        tests = []
+        for item in node.getElementsByTagName("test"):
+            tests.append(self.get_text(item))
+        return tests
+
+    def get_text(self, xml_node):
+        rc = []
+        for node in xml_node.childNodes:
+            if node.nodeType == node.TEXT_NODE:
+                rc.append(node.data)
+        return ''.join(rc)
+
+class Machine:
+    def __init__(self, id, ip):
+        self.id = id
+        self.ip = ip
+    
+    def get_node_name(self):
+        return self.id
+    
+    def get_node_ip(self):
+        return self.ip
+    
+    def __repr__(self):
+        return self.id + "(" + self.ip + ")"
+    
+class Dataset:
+    def __init__(self, name, save_paths, partition_type, partitions, tests):
+        self.name = name
+        self.save_paths = save_paths
+        self.partitions = partitions
+        self.partition_type = partition_type
+        self.tests = tests
+    
+    def get_name(self):
+        return self.name
+    
+    def get_save_paths(self):
+        return self.save_paths
+    
+    def get_partitions(self):
+        return self.partitions
+    
+    def get_partition_type(self):
+        return self.partition_type
+    
+    def get_tests(self):
+        return self.tests
+    
+    def __repr__(self):
+        return self.name + ":" + str(self.save_paths) + ":" + str(self.partitions)
+    

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
new file mode 100644
index 0000000..04fff52
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Base URL used to get all the required files.
+BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
+
+# List of required files for a build.
+FILE_NAMES = []
+FILE_NAMES.append('ghcnd-countries.txt')
+FILE_NAMES.append('ghcnd-inventory.txt')
+FILE_NAMES.append('ghcnd-states.txt')
+FILE_NAMES.append('ghcnd-stations.txt')
+FILE_NAMES.append('ghcnd-version.txt')
+FILE_NAMES.append('ghcnd_all.tar.gz')
+FILE_NAMES.append('ghcnd_gsn.tar.gz')
+FILE_NAMES.append('ghcnd_hcn.tar.gz')
+FILE_NAMES.append('readme.txt')
+FILE_NAMES.append('status.txt')
+
+# Store the row details here.
+
+# Index values of each field details.
+FIELD_INDEX_NAME = 0
+FIELD_INDEX_START = 1
+FIELD_INDEX_END = 2
+FIELD_INDEX_TYPE = 3
+
+DLY_FIELD_ID = 0
+DLY_FIELD_YEAR = 1
+DLY_FIELD_MONTH = 2
+DLY_FIELD_ELEMENT = 3
+
+DLY_FIELD_DAY_OFFSET = 4
+DLY_FIELD_DAY_FIELDS = 4
+
+DLY_FIELDS = []
+
+# Details about the row.
+DLY_FIELDS.append(['ID', 1, 11, 'Character'])
+DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
+DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
+DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
+
+# Days in each row.
+for i in range(1, 32):
+    start = 22 + ((i - 1) * 8)
+    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
+    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 'Character'])
+    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 'Character'])
+    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 'Character'])
+
+# Details about the row.
+STATIONS_FIELDS = {}
+STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
+STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
+STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
+STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
+STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
+STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
+
+# Details about the row.
+COUNTRIES_FIELDS = {}
+COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+STATES_FIELDS = {}
+STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+INVENTORY_FIELDS = {}
+INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
+INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
+INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
new file mode 100644
index 0000000..7b1434f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# List of required files for a build.
+MSHR_URLS = []
+MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
+MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
+
+# Index values of each field details.
+MSHR_FIELD_INDEX_NAME = 0
+MSHR_FIELD_INDEX_START = 1
+MSHR_FIELD_INDEX_END = 2
+MSHR_FIELD_INDEX_TYPE = 3
+
+# Store the row details here.
+MSHR_FIELDS = {}
+
+# Details about the row.
+MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
+MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
+MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
+MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
+MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
+MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
+MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
+MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
+MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
+MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
+MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
+MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
+MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
+MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
+MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
+MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 'X(30)']
+MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
+MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
+MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
+MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
+MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
+MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
+MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
+MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
+MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
+MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
+MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
+MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
+MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
+MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
+MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
+MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
+MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
+MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
+MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
+MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
+MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
+MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
+MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
+MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
+MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
+MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
+MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
+MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']


[12/14] git commit: More tweaking to get the MRQL benchmark to work efficiently.

Posted by pr...@apache.org.
More tweaking to get the MRQL benchmark to work efficiently.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/7f06298f
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/7f06298f
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/7f06298f

Branch: refs/heads/master
Commit: 7f06298fa350ca8b0a81bb9ffb78aa29b26368c1
Parents: 31b3f4d
Author: Preston Carman <pr...@apache.org>
Authored: Thu Oct 9 15:29:16 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Oct 9 15:29:16 2014 -0700

----------------------------------------------------------------------
 .../other_systems/mrql_scripts/README.md        | 23 ++++++++++++++++++++
 .../other_systems/mrql_scripts/clear.sh         |  4 ----
 .../mrql_scripts/load_node_file.sh              | 17 ++++++++-------
 .../mrql_scripts/run_group_test.sh              | 22 +++++++++++++------
 .../other_systems/mrql_scripts/start.sh         | 20 -----------------
 .../other_systems/mrql_scripts/stop.sh          | 20 -----------------
 6 files changed, 47 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md
new file mode 100644
index 0000000..53a7ecf
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+
+clear.sh
+
+hadoop namenode -format
+
+run_group_test.sh
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
index b775de2..9a912b2 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
@@ -17,12 +17,8 @@
 # limitations under the License.
 #
 
-# Hadoop data reset
-hadoop namenode -format
-
 # Remove data
 rm -rf disk1/hadoop/data
 rm -rf disk2/hadoop/data
 rm -rf disk1/hadoop/tmp
 rm -rf disk1/hadoop/logs
-

http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
index 048274f..ead0902 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -23,15 +23,16 @@ then
     exit
 fi
 
+echo "Loading node ${1} data file in to cluster."
 
 # Add each sensor block
-cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/upload/
-gunzip disk1/hadoop/upload/all_sensors_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${1}.xml all/sensors
-rm -f disk1/hadoop/upload/all_sensors_${1}.xml
+cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/all_sensors_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/all_sensors_${1}.xml all/sensors
+rm -f disk1/hadoop/all_sensors_${1}.xml
 
 # Add each station block
-cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/upload/
-gunzip disk1/hadoop/upload/all_stations_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${1}.xml all/stations
-rm -f disk1/hadoop/upload/all_stations_${1}.xml
+cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/all_stations_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/all_stations_${1}.xml all/stations
+rm -f disk1/hadoop/all_stations_${1}.xml

http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index 60dc255..f42a451 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -17,28 +17,36 @@
 # limitations under the License.
 #
 
-NODES=2
+if [ -z "${1}" ]
+then
+    echo "Please enter the number of nodes."
+    exit
+fi
+
+NODES=${1}
 REPEAT=1
 
 # Start Hadoop
 sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
 
+sleep 10
+
 # Prepare hadoop file system
 hadoop fs -mkdir all
+hadoop fs -ls 
 hadoop fs -mkdir all/sensors
 hadoop fs -mkdir all/stations
+hadoop fs -ls all
 
 
 # Upload test data
-n=0
-while [ ${n} -lt ${NODES} ];
+COUNTER=0
+while [ ${COUNTER} -lt ${NODES} ];
 do
-    sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${n} &
+    sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${COUNTER}
+    let COUNTER=COUNTER+1 
 done
 
-# After all files have been uploaded, continue.
-wait
-
 
 # Start test
 sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}

http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
deleted file mode 100755
index a1766c9..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-bin/start-all.sh

http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
deleted file mode 100755
index e49d818..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-bin/stop-all.sh


[14/14] git commit: Merge branch 'prestonc/september_update'

Posted by pr...@apache.org.
Merge branch 'prestonc/september_update'


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/e97888ed
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/e97888ed
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/e97888ed

Branch: refs/heads/master
Commit: e97888ed881f8adc749e1e695c9700cc9d4c797f
Parents: 72fd5c6 9e0133a
Author: Preston Carman <pr...@apache.org>
Authored: Tue Oct 21 12:35:39 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Oct 21 12:35:39 2014 -0700

----------------------------------------------------------------------
 .../noaa-ghcn-daily/conf/weather_example.xml    | 35 +++++++
 .../conf/weather_example_cluster.xml            | 58 ++++++++++++
 .../noaa-ghcn-daily/other_systems/mrql/q00.mrql | 23 +++++
 .../noaa-ghcn-daily/other_systems/mrql/q01.mrql | 21 +++++
 .../noaa-ghcn-daily/other_systems/mrql/q02.mrql | 24 +++++
 .../noaa-ghcn-daily/other_systems/mrql/q03.mrql | 22 +++++
 .../noaa-ghcn-daily/other_systems/mrql/q04.mrql | 24 +++++
 .../other_systems/mrql/q04_count_sensor.mrql    | 21 +++++
 .../other_systems/mrql/q04_count_station.mrql   | 23 +++++
 .../noaa-ghcn-daily/other_systems/mrql/q05.mrql | 27 ++++++
 .../other_systems/mrql/q05_count_sensor.mrql    | 23 +++++
 .../other_systems/mrql/q05_count_station.mrql   | 23 +++++
 .../noaa-ghcn-daily/other_systems/mrql/q06.mrql | 26 ++++++
 .../other_systems/mrql/q06_count_sensor.mrql    | 23 +++++
 .../other_systems/mrql/q06_count_station.mrql   | 23 +++++
 .../noaa-ghcn-daily/other_systems/mrql/q07.mrql | 26 ++++++
 .../other_systems/mrql/q07_count_join.mrql      | 26 ++++++
 .../other_systems/mrql/q07_count_tmax.mrql      | 22 +++++
 .../other_systems/mrql/q07_count_tmin.mrql      | 22 +++++
 .../other_systems/mrql_gsn/q00.mrql             | 23 -----
 .../other_systems/mrql_gsn/q01.mrql             | 21 -----
 .../other_systems/mrql_gsn/q02.mrql             | 24 -----
 .../other_systems/mrql_gsn/q03.mrql             | 22 -----
 .../other_systems/mrql_gsn/q04.mrql             | 24 -----
 .../other_systems/mrql_gsn/q04_sensor.mrql      | 21 -----
 .../other_systems/mrql_gsn/q04_station.mrql     | 24 -----
 .../other_systems/mrql_gsn/q05.mrql             | 27 ------
 .../other_systems/mrql_gsn/q05_sensor.mrql      | 23 -----
 .../other_systems/mrql_gsn/q05_station.mrql     | 23 -----
 .../other_systems/mrql_gsn/q06.mrql             | 26 ------
 .../other_systems/mrql_gsn/q06_sensor.mrql      | 23 -----
 .../other_systems/mrql_gsn/q06_station.mrql     | 23 -----
 .../other_systems/mrql_gsn/q07.mrql             | 26 ------
 .../other_systems/mrql_gsn/q07_join_count.mrql  | 26 ------
 .../other_systems/mrql_gsn/q07_tmax.mrql        | 22 -----
 .../other_systems/mrql_gsn/q07_tmin.mrql        | 22 -----
 .../other_systems/mrql_hcn/q00.mrql             | 23 -----
 .../other_systems/mrql_hcn/q01.mrql             | 21 -----
 .../other_systems/mrql_hcn/q02.mrql             | 24 -----
 .../other_systems/mrql_hcn/q03.mrql             | 22 -----
 .../other_systems/mrql_hcn/q04.mrql             | 24 -----
 .../other_systems/mrql_hcn/q04_sensor.mrql      | 21 -----
 .../other_systems/mrql_hcn/q04_station.mrql     | 24 -----
 .../other_systems/mrql_hcn/q05.mrql             | 27 ------
 .../other_systems/mrql_hcn/q05_sensor.mrql      | 23 -----
 .../other_systems/mrql_hcn/q05_station.mrql     | 23 -----
 .../other_systems/mrql_hcn/q06.mrql             | 28 ------
 .../other_systems/mrql_hcn/q06_sensor.mrql      | 23 -----
 .../other_systems/mrql_hcn/q06_station.mrql     | 23 -----
 .../other_systems/mrql_hcn/q07.mrql             | 26 ------
 .../other_systems/mrql_hcn/q07_join_count.mrql  | 26 ------
 .../other_systems/mrql_hcn/q07_tmax.mrql        | 22 -----
 .../other_systems/mrql_hcn/q07_tmin.mrql        | 22 -----
 .../other_systems/mrql_scripts/README.md        | 23 +++++
 .../other_systems/mrql_scripts/clear.sh         | 24 +++++
 .../mrql_scripts/load_node_file.sh              | 47 ++++++++++
 .../mrql_scripts/run_group_test.sh              | 65 +++++++++++++
 .../mrql_scripts/run_mrql_tests.sh              | 49 ++++++++++
 .../other_systems/mrql_test/q00.mrql            | 23 -----
 .../other_systems/mrql_test/q01.mrql            | 21 -----
 .../other_systems/mrql_test/q02.mrql            | 24 -----
 .../other_systems/mrql_test/q03.mrql            | 22 -----
 .../other_systems/mrql_test/q04.mrql            | 24 -----
 .../other_systems/mrql_test/q05.mrql            | 27 ------
 .../other_systems/mrql_test/q06.mrql            | 27 ------
 .../other_systems/mrql_test/q07.mrql            | 26 ------
 .../other_systems/saxon/count_sensor.xq         |  7 ++
 .../other_systems/saxon/count_station.xq        |  7 ++
 .../noaa-ghcn-daily/other_systems/saxon/q00.xq  | 15 +++
 .../noaa-ghcn-daily/other_systems/saxon/q01.xq  |  8 ++
 .../noaa-ghcn-daily/other_systems/saxon/q02.xq  | 14 +++
 .../noaa-ghcn-daily/other_systems/saxon/q03.xq  |  8 ++
 .../other_systems/saxon/q04_count_sensor.xq     | 10 ++
 .../other_systems/saxon/q04_count_station.xq    |  8 ++
 .../other_systems/saxon/q05_count_sensor.xq     | 11 +++
 .../other_systems/saxon/q05_count_station.xq    |  8 ++
 .../other_systems/saxon/q06_count_sensor.xq     |  8 ++
 .../other_systems/saxon/q06_count_station.xq    |  5 +
 .../noaa-ghcn-daily/other_systems/saxon/q07.xq  | 15 +++
 .../other_systems/saxon/q07_count_tmax.xq       |  9 ++
 .../other_systems/saxon/q07_count_tmin.xq       |  9 ++
 .../saxon_scripts/run_saxon_tests.sh            | 44 +++++++++
 .../noaa-ghcn-daily/queries/count_sensor.xq     | 27 ++++++
 .../noaa-ghcn-daily/queries/count_station.xq    | 27 ++++++
 .../resources/noaa-ghcn-daily/queries/q04.xq    |  8 +-
 .../noaa-ghcn-daily/queries/q04_count_join.xq   | 34 +++++++
 .../noaa-ghcn-daily/queries/q04_count_sensor.xq | 29 ++++++
 .../queries/q04_count_station.xq                | 28 ++++++
 .../noaa-ghcn-daily/queries/q04_sensor.xq       | 27 ------
 .../noaa-ghcn-daily/queries/q04_station.xq      | 25 -----
 .../noaa-ghcn-daily/queries/q05_count_join.xq   | 35 +++++++
 .../noaa-ghcn-daily/queries/q05_count_sensor.xq | 31 +++++++
 .../queries/q05_count_station.xq                | 28 ++++++
 .../noaa-ghcn-daily/queries/q05_sensor.xq       | 28 ------
 .../noaa-ghcn-daily/queries/q05_station.xq      | 25 -----
 .../noaa-ghcn-daily/queries/q06_count_join.xq   | 34 +++++++
 .../noaa-ghcn-daily/queries/q06_count_sensor.xq | 29 ++++++
 .../queries/q06_count_station.xq                | 27 ++++++
 .../noaa-ghcn-daily/queries/q06_sensor.xq       | 27 ------
 .../noaa-ghcn-daily/queries/q06_station.xq      | 24 -----
 .../noaa-ghcn-daily/queries/q07_count_join.xq   | 35 +++++++
 .../noaa-ghcn-daily/queries/q07_count_tmax.xq   | 28 ++++++
 .../noaa-ghcn-daily/queries/q07_count_tmin.xq   | 28 ++++++
 .../noaa-ghcn-daily/queries/q07_tmax.xq         | 26 ------
 .../noaa-ghcn-daily/queries/q07_tmin.xq         | 26 ------
 .../noaa-ghcn-daily/queries/sensor_count.xq     | 24 -----
 .../noaa-ghcn-daily/queries/station_count.xq    | 24 -----
 .../scripts/benchmark_logging.properties        |  1 +
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    |  3 +-
 .../noaa-ghcn-daily/scripts/run_mrql_tests.sh   | 42 ---------
 .../scripts/weather_benchmark.py                | 10 +-
 .../scripts/weather_data_files.py               | 14 +--
 .../src/main/resources/util/diff_xml_files.py   | 97 ++++++++++++++++++++
 .../src/main/resources/util/list_xml_files.py   | 72 +++++++++++++++
 .../src/main/resources/util/log_top.sh          | 35 -------
 .../src/main/resources/util/merge_xml_files.py  |  2 +-
 .../java/org/apache/vxquery/cli/VXQuery.java    |  1 +
 .../RemoveUnusedSortDistinctNodesRule.java      | 19 +---
 .../rules/util/CardinalityRuleToolbox.java      | 13 ---
 .../rewriter/rules/util/OperatorToolbox.java    | 72 ---------------
 .../vxquery/functions/builtin-functions.xml     |  1 +
 .../xmlquery/query/XMLQueryCompiler.java        |  2 +-
 vxquery-server/pom.xml                          |  4 +
 .../vxquery/cli/VXQueryClusterShutdown.java     | 76 +++++++++++++++
 .../src/main/resources/conf/cluster_example.xml | 12 +--
 .../src/main/resources/conf/local.xml           | 18 ++--
 .../main/resources/scripts/cluster_actions.py   | 16 +++-
 .../src/main/resources/scripts/cluster_cli.py   |  6 +-
 .../resources/scripts/cluster_information.py    | 27 ++++--
 .../src/main/resources/scripts/startcc.sh       |  6 +-
 .../src/main/resources/scripts/startnc.sh       |  4 +-
 .../src/main/resources/scripts/stopcc.sh        |  3 +-
 .../src/main/resources/scripts/stopcluster.sh   | 48 ++++++++++
 .../src/main/resources/scripts/stopnc.sh        |  2 +-
 134 files changed, 1681 insertions(+), 1491 deletions(-)
----------------------------------------------------------------------



[08/14] git commit: Removed debug output.

Posted by pr...@apache.org.
Removed debug output.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/0e666fc5
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/0e666fc5
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/0e666fc5

Branch: refs/heads/master
Commit: 0e666fc5dea4268d0fa8e2ce2d9d310159ebd162
Parents: 44d07d9
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:40:00 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:40:00 2014 -0700

----------------------------------------------------------------------
 vxquery-server/src/main/resources/scripts/stopcluster.sh | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/0e666fc5/vxquery-server/src/main/resources/scripts/stopcluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcluster.sh b/vxquery-server/src/main/resources/scripts/stopcluster.sh
index 238da7f..5172a2e 100755
--- a/vxquery-server/src/main/resources/scripts/stopcluster.sh
+++ b/vxquery-server/src/main/resources/scripts/stopcluster.sh
@@ -45,5 +45,4 @@ then
 fi
 
 # Launch hyracks cc script without toplogy
-echo "${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &"
 ${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &