You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/10/21 21:36:02 UTC
[01/14] Remove benchmark files to allow easy copy from other branch.
Repository: vxquery
Updated Branches:
refs/heads/master 72fd5c645 -> e97888ed8
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
deleted file mode 100644
index 5db090a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
+++ /dev/null
@@ -1,554 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import textwrap
-from datetime import date
-import os
-from collections import OrderedDict
-
-# Custom modules.
-from weather_config_ghcnd import *
-from weather_config_mshr import *
-from weather_download_files import *
-
-class WeatherConvertToXML:
-
- STATES = OrderedDict({
- 'AK': 'Alaska',
- 'AL': 'Alabama',
- 'AR': 'Arkansas',
- 'AS': 'American Samoa',
- 'AZ': 'Arizona',
- 'CA': 'California',
- 'CO': 'Colorado',
- 'CT': 'Connecticut',
- 'DC': 'District of Columbia',
- 'DE': 'Delaware',
- 'FL': 'Florida',
- 'GA': 'Georgia',
- 'GU': 'Guam',
- 'HI': 'Hawaii',
- 'IA': 'Iowa',
- 'ID': 'Idaho',
- 'IL': 'Illinois',
- 'IN': 'Indiana',
- 'KS': 'Kansas',
- 'KY': 'Kentucky',
- 'LA': 'Louisiana',
- 'MA': 'Massachusetts',
- 'MD': 'Maryland',
- 'ME': 'Maine',
- 'MI': 'Michigan',
- 'MN': 'Minnesota',
- 'MO': 'Missouri',
- 'MP': 'Northern Mariana Islands',
- 'MS': 'Mississippi',
- 'MT': 'Montana',
- 'NA': 'National',
- 'NC': 'North Carolina',
- 'ND': 'North Dakota',
- 'NE': 'Nebraska',
- 'NH': 'New Hampshire',
- 'NJ': 'New Jersey',
- 'NM': 'New Mexico',
- 'NV': 'Nevada',
- 'NY': 'New York',
- 'OH': 'Ohio',
- 'OK': 'Oklahoma',
- 'OR': 'Oregon',
- 'PA': 'Pennsylvania',
- 'PR': 'Puerto Rico',
- 'RI': 'Rhode Island',
- 'SC': 'South Carolina',
- 'SD': 'South Dakota',
- 'TN': 'Tennessee',
- 'TX': 'Texas',
- 'UT': 'Utah',
- 'VA': 'Virginia',
- 'VI': 'Virgin Islands',
- 'VT': 'Vermont',
- 'WA': 'Washington',
- 'WI': 'Wisconsin',
- 'WV': 'West Virginia',
- 'WY': 'Wyoming'
- })
-
- MONTHS = [
- "January",
- "February",
- "March",
- "April",
- "May",
- "June",
- "July",
- "August",
- "September",
- "October",
- "November",
- "December"
- ]
-
- token = ""
-
- def __init__(self, base_path, save_path, debug_output):
- self.save_path = save_path
- self.debug_output = debug_output
-
- # Extra support files.
- self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
- self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt'
- self.ghcnd_states = base_path + '/ghcnd-states.txt'
- self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
-
- # MSHR support files.
- self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
-
- def set_token(self, token):
- self.token = token
-
- def get_field_from_definition(self, row, field_definition):
- return row[(field_definition[FIELD_INDEX_START] - 1):field_definition[FIELD_INDEX_END]]
-
- def get_field(self, fields_array, row, index):
- return row[(fields_array[index][FIELD_INDEX_START] - 1):fields_array[index][FIELD_INDEX_END]]
-
- def get_dly_field(self, row, index):
- return self.get_field(DLY_FIELDS, row, index)
-
- def print_row_files(self, row):
- for field in DLY_FIELDS:
- print str(field[FIELD_INDEX_NAME]) + " = '" + row[(field[FIELD_INDEX_START] - 1):field[FIELD_INDEX_END]] + "'"
-
- def save_file(self, filename, contents):
- file = open(filename, 'w')
- file.write(contents)
- file.close()
- return filename
-
- def get_folder_size(self, folder_name):
- total_size = 0
- for dirpath, dirnames, filenames in os.walk(folder_name):
- for f in filenames:
- fp = os.path.join(dirpath, f)
- total_size += os.path.getsize(fp)
- return total_size
-
- def process_one_month_sensor_set(self, records, page):
- # Default
- return 0
-
- def process_station_data(self, row):
- # Default
- return 0
-
- def get_base_folder(self, station_id, data_type="sensors"):
- return build_base_save_folder(self.save_path, station_id, data_type)
-
- def process_inventory_file(self):
- print "Processing inventory file"
- file_stream = open(self.ghcnd_inventory, 'r')
-
- csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT', 'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS']
- row = file_stream.readline()
- csv_inventory = {}
- for row in file_stream:
- id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID'])
- sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT'])
- start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR']))
- end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR']))
- if id in csv_inventory:
- new_count = str(int(csv_inventory[id][2]) + 1)
- new_max = str(max(int(csv_inventory[id][3]), (end - start)))
- new_total = str(int(csv_inventory[id][3]) + end - start)
- csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count, new_max, new_total]
- else:
- csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)]
-
- path = self.save_path + "/inventory.csv"
- self.save_csv_file(path, csv_inventory, csv_header)
-
- def save_csv_file(self, path, csv_inventory, header):
- csv_content = "|".join(header) + "\n"
- for row_id in csv_inventory:
- csv_content += "|".join(csv_inventory[row_id]) + "\n"
- self.save_file(path, csv_content)
-
-
- def process_station_file(self, file_name):
- print "Processing station file: " + file_name
- file_stream = open(file_name, 'r')
-
- row = file_stream.readline()
- return self.process_station_data(row)
-
- def process_sensor_file(self, file_name, max_files, sensor_max=99):
- print "Processing sensor file: " + file_name
- file_stream = open(file_name, 'r')
-
- month_last = 0
- year_last = 0
- records = []
- page = 0
- sensor_count = 0
-
- file_count = 0
- for row in file_stream:
- month = self.get_dly_field(row, DLY_FIELD_MONTH)
- year = self.get_dly_field(row, DLY_FIELD_YEAR)
-
- if (month_last != 0 and year_last != 0) and (sensor_count >= sensor_max or month != month_last or year != year_last):
- # process set
- file_count += self.process_one_month_sensor_set(records, page)
- records = []
- if sensor_count >= sensor_max and month == month_last and year == year_last:
- # start a new page.
- page += 1
- else:
- # start over.
- page = 0
- sensor_count = 0
-
- records.append(row)
- sensor_count += 1
- if max_files != 0 and file_count >= max_files:
- # Stop creating more files after the max is reached.
- break
-
- month_last = month
- year_last = year
-
- station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
- data_size = self.get_folder_size(self.get_base_folder(station_id) + "/" + station_id)
- print "Created " + str(file_count) + " XML files for a data size of " + str(data_size) + "."
-
- return (file_count, data_size)
-
- def convert_c2f(self, c):
- return (9 / 5 * c) + 32
-
- def default_xml_web_service_start(self):
- field_xml = ""
- field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
- return field_xml
-
- def default_xml_data_start(self, total_records):
- field_xml = ""
- field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + str(total_records) + "\">\n"
- return field_xml
-
- def default_xml_station_start(self):
- field_xml = ""
- field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" totalCount=\"1\">\n"
- return field_xml
-
- def default_xml_field_date(self, report_date, indent=2):
- field_xml = ""
- field_xml += self.get_indent_space(indent) + "<date>" + str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2) + "T00:00:00.000</date>\n"
- return field_xml
-
- def default_xml_mshr_station_additional(self, station_id):
- """The web service station data is generate from the MSHR data supplemented with GHCN-Daily."""
- station_mshr_row = ""
- stations_mshr_file = open(self.mshr_stations, 'r')
- for line in stations_mshr_file:
- if station_id == self.get_field_from_definition(line, MSHR_FIELDS['GHCND_ID']).strip():
- station_mshr_row = line
- break
-
- if station_mshr_row == "":
- return ""
-
- additional_xml = ""
-
- county = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['COUNTY']).strip()
- if county != "":
- additional_xml += self.default_xml_location_labels("CNTY", "FIPS:-9999", county)
-
- country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
- country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
- if country_code != "" and country_name != "":
- additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code, country_name)
-
- return additional_xml
-
- def default_xml_location_labels(self, type, id, display_name):
- label_xml = ""
- label_xml += self.default_xml_start_tag("locationLabels", 2)
- label_xml += self.default_xml_element("type", type, 3)
- label_xml += self.default_xml_element("id", id, 3)
- label_xml += self.default_xml_element("displayName", display_name, 3)
- label_xml += self.default_xml_end_tag("locationLabels", 2)
- return label_xml
-
-
- def default_xml_web_service_station(self, station_id):
- """The web service station data is generate from available historical sources."""
- station_ghcnd_row = ""
- stations_ghcnd_file = open(self.ghcnd_stations, 'r')
- for line in stations_ghcnd_file:
- if station_id == self.get_field_from_definition(line, STATIONS_FIELDS['ID']):
- station_ghcnd_row = line
- break
-
- xml_station = ""
- xml_station += self.default_xml_start_tag("station", 1)
-
- xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2)
- xml_station += self.default_xml_element("displayName", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['NAME']).strip(), 2)
- xml_station += self.default_xml_element("latitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LATITUDE']).strip(), 2)
- xml_station += self.default_xml_element("longitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LONGITUDE']).strip(), 2)
-
- elevation = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['ELEVATION']).strip()
- if elevation != "-999.9":
- xml_station += self.default_xml_element("elevation", elevation, 2)
-
- state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip()
- if state_code != "" and state_code in self.STATES:
- xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code])
-
- # Add the MSHR data to the station generated information.
- xml_station += self.default_xml_mshr_station_additional(station_id)
-
- xml_station += self.default_xml_end_tag("station", 1)
- return xml_station
-
- def default_xml_day_reading_as_field(self, row, day):
- day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
- value = self.get_dly_field(row, day_index);
- if value == "-9999":
- return ""
-
- field_xml = ""
- field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
- if field_id in ("MDTN", "MDTX", "MNPN", "MXPN", "TMAX", "TMIN", "TOBS",):
- # Add both the celcius and fahrenheit temperatures.
- celcius = float(value) / 10
- field_xml += " <" + field_id + "_c>" + str(celcius) + "</" + field_id + "_c>\n"
- fahrenheit = self.convert_c2f(celcius)
- field_xml += " <" + field_id + "_f>" + str(fahrenheit) + "</" + field_id + "_f>\n"
- elif field_id in ("AWND", "EVAP", "PRCP", "THIC", "WESD", "WESF", "WSF1", "WSF2", "WSF5", "WSFG", "WSFI", "WSFM",):
- # Field values that are in tenths.
- converted_value = float(value) / 10
- field_xml += " <" + field_id + ">" + str(converted_value) + "</" + field_id + ">\n"
- elif field_id in ("ACMC", "ACMH", "ACSC", "ACSH", "PSUN",):
- # Fields is a percentage.
- field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
- elif field_id in ("FMTM", "PGTM",):
- # Fields is a time value HHMM.
- field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
- elif field_id in ("DAEV", "DAPR", "DASF", "DATN", "DATX", "DAWM", "DWPR", "FRGB", "FRGT", "FRTH", "GAHT", "MDSF", "MDWM", "MDEV", "MDPR", "SNOW", "SNWD", "TSUN", "WDF1", "WDF2", "WDF5", "WDFG", "WDFI", "WDFM", "WDMV",):
- # Fields with no alternation needed.
- field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
- else:
- field_xml += " <unknown>" + field_id + "</unknown>\n"
-
- # print field_xml
- return field_xml
-
- def default_xml_day_reading(self, row, day, indent=2):
- day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
- value = self.get_dly_field(row, day_index);
- mflag = self.get_dly_field(row, day_index + 1);
- qflag = self.get_dly_field(row, day_index + 2);
- sflag = self.get_dly_field(row, day_index + 3);
-
- if value == "-9999":
- return ""
-
- indent_space = self.get_indent_space(indent)
- field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
- station_id = "GHCND:" + self.get_dly_field(row, DLY_FIELD_ID)
-
- field_xml = ""
- field_xml += indent_space + "<dataType>" + field_id + "</dataType>\n"
- field_xml += indent_space + "<station>" + station_id + "</station>\n"
- field_xml += indent_space + "<value>" + value.strip() + "</value>\n"
- field_xml += indent_space + "<attributes>\n"
- field_xml += indent_space + indent_space + "<attribute>" + mflag.strip() + "</attribute>\n"
- field_xml += indent_space + indent_space + "<attribute>" + qflag.strip() + "</attribute>\n"
- field_xml += indent_space + indent_space + "<attribute>" + sflag.strip() + "</attribute>\n"
- field_xml += indent_space + indent_space + "<attribute></attribute>\n"
- field_xml += indent_space + "</attributes>\n"
-
- # print field_xml
- return field_xml
-
- def default_xml_end(self):
- return textwrap.dedent("""\
- </ghcnd_observation>""")
-
- def default_xml_data_end(self):
- return self.default_xml_end_tag("dataCollection", 0)
-
- def default_xml_station_end(self):
- return self.default_xml_end_tag("stationCollection", 0)
-
- def default_xml_element(self, tag, data, indent=1):
- return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + tag + ">\n"
-
- def default_xml_start_tag(self, tag, indent=1):
- return self.get_indent_space(indent) + "<" + tag + ">\n"
-
- def default_xml_end_tag(self, tag, indent=1):
- return self.get_indent_space(indent) + "</" + tag + ">\n"
-
- def get_indent_space(self, indent):
- return (" " * (4 * indent))
-
-
-class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
- """The web service class details how to create files similar to the NOAA web service."""
- skip_downloading = False
- # Station data
- def process_station_data(self, row):
- """Adds a single station record file either from downloading the data or generating a similar record."""
- station_id = self.get_dly_field(row, DLY_FIELD_ID)
- download = 0
- if self.token is not "" and not self.skip_downloading:
- download = self.download_station_data(station_id, self.token, True)
- if download == 0:
- self.skip_downloading = True
-
- # If not downloaded, generate.
- if download != 0:
- return download
- else:
- # Information for each daily file.
- station_xml_file = self.default_xml_web_service_start()
- station_xml_file += self.default_xml_station_start()
- station_xml_file += self.default_xml_web_service_station(station_id)
- station_xml_file += self.default_xml_station_end()
-
- # Remove white space.
- station_xml_file = station_xml_file.replace("\n", "");
- station_xml_file = station_xml_file.replace(self.get_indent_space(1), "");
-
- # Make sure the station folder is available.
- ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
- if not os.path.isdir(ghcnd_xml_station_path):
- os.makedirs(ghcnd_xml_station_path)
-
- # Save XML string to disk.
- save_file_name = ghcnd_xml_station_path + station_id + ".xml"
- save_file_name = self.save_file(save_file_name, station_xml_file)
-
- if save_file_name is not "":
- if self.debug_output:
- print "Wrote file: " + save_file_name
- return 1
- else:
- return 0
-
- # Station data
- def download_station_data(self, station_id, token, reset=False):
- """Downloads the station data from the web service."""
- import time
- time.sleep(2)
- # Make sure the station folder is available.
- ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
- if not os.path.isdir(ghcnd_xml_station_path):
- os.makedirs(ghcnd_xml_station_path)
-
- # Build download URL.
- url = "http://www.ncdc.noaa.gov/cdo-services/services/datasets/GHCND/stations/GHCND:" + station_id + ".xml?token=" + token
- url_file = urllib.urlopen(url)
- station_xml_file = ""
- while (True):
- line = url_file.readline()
- if not line:
- break
- station_xml_file += line
-
- if station_xml_file.find("<cdoError>") != -1:
- if self.debug_output:
- print "Error in station download"
- return 0
-
- # Save XML string to disk.
- save_file_name = ghcnd_xml_station_path + station_id + ".xml"
- save_file_name = self.save_file(save_file_name, station_xml_file)
-
- if save_file_name is not "":
- if self.debug_output:
- print "Wrote file: " + save_file_name
- return 2
- else:
- return 0
-
- # Sensor data
- def process_one_month_sensor_set(self, records, page):
- """Generates records for a station using the web service xml layout."""
- found_data = False
- year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR))
- month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH))
-
- station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
-
- # Information for each daily file.
- count = 0
- daily_xml_file = ""
-
- for day in range(1, 32):
- try:
- # TODO find out what is a valid python date range? 1889?
- # Attempt to see if this is valid date.
- report_date = date(year, month, day)
-
- for record in records:
- record_xml_snip = self.default_xml_day_reading(record, report_date.day)
- if record_xml_snip is not "":
- daily_xml_file += self.default_xml_start_tag("data")
- daily_xml_file += self.default_xml_field_date(report_date)
- daily_xml_file += record_xml_snip
- daily_xml_file += self.default_xml_end_tag("data")
- found_data = True
- count += 1
-
- except ValueError:
- pass
-
- daily_xml_file = self.default_xml_web_service_start() + self.default_xml_data_start(count) + daily_xml_file + self.default_xml_data_end()
- daily_xml_file = daily_xml_file.replace("\n", "");
- daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), "");
-
- if not found_data:
- return 0
-
- # Make sure the station folder is available.
- ghcnd_xml_station_path = self.get_base_folder(station_id) + "/" + station_id + "/" + str(report_date.year) + "/"
- if not os.path.isdir(ghcnd_xml_station_path):
- os.makedirs(ghcnd_xml_station_path)
-
- # Save XML string to disk.
- save_file_name = ghcnd_xml_station_path + build_sensor_save_filename(station_id, report_date, page)
- save_file_name = self.save_file(save_file_name, daily_xml_file)
-
- if save_file_name is not "":
- if self.debug_output:
- print "Wrote file: " + save_file_name
- return 1
- else:
- return 0
-
-def build_base_save_folder(save_path, station_id, data_type="sensors"):
- # Default
- station_prefix = station_id[:3]
- return save_path + data_type + "/" + station_prefix + "/"
-
-def build_sensor_save_filename(station_id, report_date, page):
- # Default
- return station_id + "_" + str(report_date.year).zfill(4) + str(report_date.month).zfill(2) + "_" + str(page) + ".xml"
-
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
deleted file mode 100644
index 4877120..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ /dev/null
@@ -1,416 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import glob
-import os.path
-import linecache
-import distutils.core
-
-from weather_convert_to_xml import *
-from collections import OrderedDict
-
-# Weather data files created to manage the conversion process.
-# Allows partition and picking up where you left off.
-class WeatherDataFiles:
-
- LARGE_FILE_ROOT_TAG = "root"
-
- INDEX_DATA_FILE_NAME = 0
- INDEX_DATA_SENSORS_STATUS = 1
- INDEX_DATA_STATION_STATUS = 2
- INDEX_DATA_FILE_COUNT = 3
- INDEX_DATA_FOLDER_DATA = 4
-
- DATA_FILE_START_INDEX = 0
- DATA_FILE_EXTENSION = ".dly"
- DATA_FILE_MISSING = "missing"
- DATA_FILE_INITIAL = "initialized"
- DATA_FILE_DOWNLOADED = "downloaded"
- DATA_FILE_GENERATED = "generated"
- SEPERATOR = ","
-
- type = "sensor"
- data_reset = False
-
- def __init__(self, base_path, progress_file_name="/tmp/_weather_data.csv"):
- self.base_path = base_path
-
- self.progress_file_name = progress_file_name
-
- self.current = self.DATA_FILE_START_INDEX
- self.progress_data = []
-
- def get_file_list_iterator(self):
- """Return the list of files one at a time."""
- return glob.iglob(self.base_path + "/*" + self.DATA_FILE_EXTENSION)
-
- # Save Functions
- def build_progress_file(self, options, convert):
- if not os.path.isfile(self.progress_file_name) or 'reset' in options:
- # Build a new file.
- file = open(self.progress_file_name, 'w')
- contents = self.get_default_progress_file_csv()
- file.write(contents)
- file.close()
- elif 'append' in options or 'recalculate' in options:
- self.open_progress_data()
- row_count = len(self.progress_data)
- for row in range(0, row_count):
- row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
- file_name = row_contents[self.INDEX_DATA_FILE_NAME]
- if self.get_file_row(file_name) < 0 and 'append' in options:
- self.progress_data.append(self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL))
- elif 'recalculate' in options:
- # The folder is hard coded
- station_id = os.path.basename(file_name).split('.')[0]
- folder_name = convert.get_base_folder(station_id)
- if os.path.exists(folder_name):
- row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
- sensor_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
- station_status = row_contents[self.INDEX_DATA_STATION_STATUS]
- file_count = self.get_file_count(folder_name)
- data_size = self.get_folder_size(folder_name)
- self.progress_data[row] = self.get_progress_csv_row(file_name, sensor_status, station_status, file_count, data_size)
- else:
- self.progress_data[row] = self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
- # Save file
- self.close_progress_data(True)
- self.reset()
-
- def copy_to_n_partitions(self, save_path, partitions, base_paths, reset):
- """Once the initial data has been generated, the data can be copied into a set number of partitions. """
- if (len(base_paths) == 0):
- return
-
- # Initialize the partition paths.
- partition_paths = get_partition_paths(0, partitions, base_paths)
- for path in partition_paths:
- # Make sure the xml folder is available.
- prepare_path(path, reset)
-
- import fnmatch
- import os
-
- # copy stations and sensors into each partition
- current_sensor_partition = 0
- current_station_partition = 0
- self.open_progress_data()
- row_count = len(self.progress_data)
- for row in range(0, row_count):
- row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
- file_name = row_contents[self.INDEX_DATA_FILE_NAME]
- station_id = os.path.basename(file_name).split('.')[0]
-
- # Copy sensor files
- type = "sensors"
- file_path = build_base_save_folder(save_path, station_id, type) + station_id
- for root, dirnames, filenames in os.walk(file_path):
- for filename in fnmatch.filter(filenames, '*.xml'):
- xml_path = os.path.join(root, filename)
- new_file_base = build_base_save_folder(partition_paths[current_sensor_partition], station_id, type) + station_id
- if not os.path.isdir(new_file_base):
- os.makedirs(new_file_base)
- shutil.copyfile(xml_path, new_file_base + "/" + filename)
- current_sensor_partition += 1
- if current_sensor_partition >= len(partition_paths):
- current_sensor_partition = 0
-
- # Copy station files
- type = "stations"
- file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
- new_file_base = build_base_save_folder(partition_paths[current_station_partition], station_id, type)
- new_file_path = new_file_base + station_id + ".xml"
- if os.path.isfile(file_path):
- if not os.path.isdir(new_file_base):
- os.makedirs(new_file_base)
- shutil.copyfile(file_path, new_file_path)
- current_station_partition += 1
- if current_station_partition >= len(partition_paths):
- current_station_partition = 0
-
- def build_to_n_partition_files(self, save_path, partitions, base_paths, reset):
- """Once the initial data has been generated, the data can be divided into partitions
- and stored in single files.
- """
- if (len(base_paths) == 0):
- return
-
- XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
-
- partition_paths = get_partition_paths(0, partitions, base_paths)
-
- import fnmatch
- import os
-
- for path in partition_paths:
- prepare_path(path, reset)
-
- # Initialize the partition paths.
- types = ["sensors", "stations"]
- for type in types:
- partition_files = []
- for path in partition_paths:
- # Make sure the xml folder is available.
- prepare_path(path + type + "/", False)
- partition_files.append(open(path + type + "/partition.xml", 'w'))
- partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
-
- # copy into each partition
- current_partition = 0
- self.open_progress_data()
- row_count = len(self.progress_data)
- for row in range(0, row_count):
- row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
- file_name = row_contents[self.INDEX_DATA_FILE_NAME]
- station_id = os.path.basename(file_name).split('.')[0]
-
- # Copy files
- if type == "sensors":
- file_path = build_base_save_folder(save_path, station_id, type) + station_id
- for root, dirnames, filenames in os.walk(file_path):
- for filename in fnmatch.filter(filenames, '*.xml'):
- xml_path = os.path.join(root, filename)
- xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
- partition_files[current_partition].write(xml_data)
- current_partition += 1
- if current_partition >= len(partition_files):
- current_partition = 0
- elif type == "stations":
- file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
- xml_path = os.path.join(root, file_path)
- xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
- partition_files[current_partition].write(xml_data)
- current_partition += 1
- if current_partition >= len(partition_paths):
- current_partition = 0
-
- for row in range(0, len(partition_paths)):
- partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
- partition_files[row].close()
-
- def get_file_row(self, file_name):
- for i in range(0, len(self.progress_data)):
- if self.progress_data[i].startswith(file_name):
- return i
- return -1
-
- def get_default_progress_file_csv(self):
- contents = ""
- for path in self.get_file_list_iterator():
- file_name = os.path.basename(path)
- contents += self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
- return contents
-
- def print_progress_file_stats(self, convert):
- sensor_count_missing = 0
- sensor_count = 0
- file_count = 0
- data_size = 0
-
- sensor_count_actual = 0
- file_count_actual = 0
- data_size_actual = 0
-
- station_count_missing = 0
- station_count_generated = 0
- station_count_downloaded = 0
-
- self.open_progress_data()
- row_count = len(self.progress_data)
- for row in range(0, row_count):
- row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
- if int(row_contents[self.INDEX_DATA_FILE_COUNT]) != -1 and int(row_contents[self.INDEX_DATA_FOLDER_DATA]) != -1:
- sensor_count += 1
- file_count += int(row_contents[self.INDEX_DATA_FILE_COUNT])
- data_size += int(row_contents[self.INDEX_DATA_FOLDER_DATA])
- else:
- sensor_count_missing += 1
-
- if row_contents[self.INDEX_DATA_STATION_STATUS] == "generated":
- station_count_generated += 1
- if row_contents[self.INDEX_DATA_STATION_STATUS] == "downloaded":
- station_count_downloaded += 1
- else:
- station_count_missing += 1
-
- file_name = row_contents[self.INDEX_DATA_FILE_NAME]
- station_id = os.path.basename(file_name).split('.')[0]
- folder_name = convert.get_base_folder(station_id)
- if os.path.exists(folder_name):
- sensor_count_actual += 1
- file_count_actual += self.get_file_count(folder_name)
- data_size_actual += self.get_folder_size(folder_name)
-
-
- print "Progress File:\t" + self.progress_file_name + "\n"
-
- print "CSV DETAILS OF PROCESSED SENSORS"
- print "Number of stations:\t" + "{:,}".format(sensor_count)
- print "Number of files:\t" + "{:,}".format(file_count)
- print "Data size:\t\t" + "{:,}".format(data_size) + " Bytes\n"
-
- print "CSV DETAILS OF unPROCESSED SENSORS"
- print "Number of stations:\t" + "{:,}".format(sensor_count_missing) + "\n"
-
- print "CSV DETAILS OF PROCESSED STATIONS"
- print "Generated:\t\t" + "{:,}".format(station_count_generated)
- print "Downloaded:\t\t" + "{:,}".format(station_count_downloaded)
- print "Missing:\t\t" + "{:,}".format(station_count_missing) + "\n"
-
- print "FOLDER DETAILS"
- print "Number of stations:\t" + "{:,}".format(sensor_count_actual)
- print "Number of files:\t" + "{:,}".format(file_count_actual)
- print "Data size:\t\t" + "{:,}".format(data_size_actual) + " Bytes\n"
-
-
- def get_progress_csv_row(self, file_name, sensors_status, station_status, file_count=-1, data_size=-1):
- return file_name + self.SEPERATOR + sensors_status + self.SEPERATOR + station_status + self.SEPERATOR + str(file_count) + self.SEPERATOR + str(data_size) + "\n"
-
- def update_file_sensor_status(self, file_name, sensors_status, file_count=-1, data_size=-1):
- for row in range(0, len(self.progress_data)):
- if self.progress_data[row].startswith(file_name):
- station_status = self.progress_data[row].rsplit(self.SEPERATOR)[self.INDEX_DATA_STATION_STATUS]
- self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
- break
-
- # Save the file
- self.close_progress_data(True)
-
- def update_file_station_status(self, file_name, station_status):
- for row in range(0, len(self.progress_data)):
- if self.progress_data[row].startswith(file_name):
- row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
- sensors_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
- file_count = int(row_contents[self.INDEX_DATA_FILE_COUNT])
- data_size = int(row_contents[self.INDEX_DATA_FOLDER_DATA])
- self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
- break
-
- # Save the file
- self.close_progress_data(True)
-
- def get_file_count(self, folder_name):
- count = 0
- for dirpath, dirnames, filenames in os.walk(folder_name):
- for f in filenames:
- count += 1
- return count
-
- def get_folder_size(self, folder_name):
- total_size = 0
- for dirpath, dirnames, filenames in os.walk(folder_name):
- for f in filenames:
- fp = os.path.join(dirpath, f)
- total_size += os.path.getsize(fp)
- return total_size
-
- def get_station_status(self, return_value):
- if return_value == 2:
- return self.DATA_FILE_DOWNLOADED
- elif return_value == 1:
- return self.DATA_FILE_GENERATED
- return self.DATA_FILE_MISSING
-
-
- def open_progress_data(self):
- with open(self.progress_file_name, 'r') as file:
- self.progress_data = file.readlines()
-
- def close_progress_data(self, force=False):
- if len(self.progress_data) > 0 or force:
- with open(self.progress_file_name, 'w') as file:
- file.writelines(self.progress_data)
-
-
- def reset(self):
- self.close_progress_data()
-
- self.current = self.DATA_FILE_START_INDEX
- self.open_progress_data()
-
- def set_type(self, type):
- self.type = type
-
- def set_data_reset(self, data_reset):
- self.data_reset = data_reset
-
-
- # Iterator Functions
- def __iter__(self):
- return self
-
- def next(self):
- columns = []
- while True:
- # find a row that has not been created.
- if self.current >= len(self.progress_data):
- raise StopIteration
- row = self.progress_data[self.current]
- self.current += 1
- columns = row.rsplit(self.SEPERATOR)
- if self.type == "sensor" and (columns[self.INDEX_DATA_SENSORS_STATUS].strip() != self.DATA_FILE_GENERATED or self.data_reset):
- break
- elif self.type == "station" and (columns[self.INDEX_DATA_STATION_STATUS].strip() != self.DATA_FILE_DOWNLOADED or self.data_reset):
- break
- return columns[self.INDEX_DATA_FILE_NAME]
-
-
-# Index values of each field details.
-PARTITION_INDEX_NODE = 0
-PARTITION_INDEX_DISK = 1
-PARTITION_INDEX_VIRTUAL = 2
-PARTITION_INDEX = 3
-PARTITION_INDEX_PATH = 4
-PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path")
-
-def get_partition_paths(node_id, partitions, base_paths, key="partitions"):
- partition_paths = []
- for scheme in get_partition_scheme(node_id, partitions, base_paths, key):
- partition_paths.append(scheme[PARTITION_INDEX_PATH])
- return partition_paths
-
-def get_partition_scheme(node_id, virtual_partitions, base_paths, key="partitions"):
- partitions_per_disk = virtual_partitions / len(base_paths)
- return get_disk_partition_scheme(node_id, partitions_per_disk, base_paths, key)
-
-def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"):
- partition_paths = []
- for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key):
- partition_paths.append(scheme[PARTITION_INDEX_PATH])
- return partition_paths
-
-def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"):
- partition_scheme = []
- for i in range(0, virtual_disk_partitions):
- for j in range(0, len(base_paths)):
- new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/"
- partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path))
- return partition_scheme
-
-def get_partition_folder(disks, partitions, index):
- return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
-
-def prepare_path(path, reset):
- """Ensures the directory is available. If reset, then its a brand new directory."""
- if os.path.isdir(path) and reset:
- shutil.rmtree(path)
-
- if not os.path.isdir(path):
- os.makedirs(path)
-
-def file_get_contents(filename):
- with open(filename) as f:
- return f.read()
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
deleted file mode 100644
index fb59b50..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os.path
-import shutil
-import tarfile
-import urllib
-import zipfile
-
-# Custom modules.
-from weather_config_ghcnd import *
-from weather_config_mshr import *
-
-class WeatherDownloadFiles:
-
- def __init__(self, save_path):
- self.save_path = save_path
-
- if not os.path.isdir(save_path):
- os.makedirs(save_path)
-
-
- def download_ghcnd_files(self, reset=False):
- """Download the complete list."""
- for file_name in FILE_NAMES:
- url = BASE_DOWNLOAD_URL + file_name
- self.download_file(url, reset)
-
- def download_mshr_files(self, reset=False):
- for url in MSHR_URLS:
- self.download_file(url, reset)
-
- def download_file(self, url, reset=False):
- """Download the file, unless it exists."""
- file_name = self.save_path + "/" + url.split('/')[-1]
-
- if not os.path.isfile(file_name) or reset:
- print "Downloading: " + url
- urllib.urlretrieve(url, file_name, report_download_status)
- print
-
- def unzip_ghcnd_package(self, package, reset=False):
- """Unzip the package file, unless it exists."""
- file_name = self.save_path + "/" + package + ".tar.gz"
- unzipped_path = self.save_path + "/" + package
-
- if os.path.isdir(unzipped_path) and reset:
- shutil.rmtree(unzipped_path)
-
- if not os.path.isdir(unzipped_path):
- print "Unzipping: " + file_name
- tar_file = tarfile.open(file_name, 'r:gz')
- tar_file.extractall(unzipped_path)
-
- def unzip_mshr_files(self, reset=False):
- """Unzip the package file, unless it exists."""
- for url in MSHR_URLS:
- if url.endswith('.zip'):
- file_name = self.save_path + "/" + url.split('/')[-1]
- print "Unzipping: " + file_name
- with zipfile.ZipFile(file_name, 'r') as myzip:
- myzip.extractall(self.save_path)
-
-def report_download_status(count, block, size):
- """Report download status."""
- line_size = 50
- erase = "\b" * line_size
- sys.stdout.write(erase)
- report = get_report_line((float(count) * block / size), line_size)
- sys.stdout.write(report)
-
-def get_report_line(percentage, line_size):
- """Creates a string to be used in reporting the percentage done."""
- report = ""
- for i in range(0, line_size):
- if (float(i) / line_size < percentage):
- report += "="
- else:
- report += "-"
- return report
-
-def download_file_save_as(url, new_file_name, reset=False):
- """Download the file, unless it exists."""
- if not os.path.isfile(new_file_name) or reset:
- print "Downloading: " + url
- urllib.urlretrieve(url, new_file_name, report_download_status)
- print
-
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/README.md b/vxquery-benchmark/src/main/resources/util/README.md
deleted file mode 100644
index 8e2a204..0000000
--- a/vxquery-benchmark/src/main/resources/util/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-Utilities for Benchmark Operations
-=====================
-
-# Introduction
-
-Helpful scripts or configuration document to work with the benchmarks.
-
-## Saxon Collection
-
-To test the data with other XQuery processors, the saxon script helps with
-creating a collection.xml file.
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
deleted file mode 100644
index 02f39ee..0000000
--- a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import getopt, glob, os, sys
-
-def main(argv):
- xml_folder = ""
-
- # Get the base folder
- try:
- opts, args = getopt.getopt(argv, "f:h", ["folder="])
- except getopt.GetoptError:
- print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
- print 'To see a full list of options try:'
- print ' $ python build_saxon_collection_xml.py -h'
- sys.exit(2)
- for opt, arg in opts:
- if opt == '-h':
- print 'Options:'
- print ' -f The base folder to create collection XML file.'
- sys.exit()
- elif opt in ('-f', "--folder"):
- # check if file exists.
- if os.path.exists(arg):
- xml_folder = arg
- else:
- print 'Error: Argument must be a folder name for --folder (-f).'
- sys.exit()
-
- # Required fields to run the script.
- if xml_folder == "" or not os.path.exists(xml_folder):
- print 'Error: The folder path option must be supplied: --folder (-f).'
- sys.exit()
-
- # find all XML files in folder
- collection_xml = "<collection>"
- for i in range(1, 5):
- # Search the ith directory level.
- search_pattern = xml_folder + ('/*' * i) + '.xml'
- for file_path in glob.iglob(search_pattern):
- collection_xml += '<doc href="' + str.replace(file_path, xml_folder, '') + '"/>'
- collection_xml += "</collection>"
-
- # create collection XML
- file = open('collection.xml', 'w')
- file.write(collection_xml)
- file.close()
-
-if __name__ == "__main__":
- main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
deleted file mode 100644
index 1cd7939..0000000
--- a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import fnmatch
-import getopt
-import glob
-import os
-import sys
-import csv
-
-SEARCH_STRING = 'Average execution time:'
-
-def find_files(directory, pattern):
- for root, dirs, files in os.walk(directory):
- for basename in files:
- if fnmatch.fnmatch(basename, pattern):
- yield (root, basename)
-
-
-def main(argv):
- ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
- '''
- log_folder = ""
- save_file = ""
- data_type = ""
-
- # Get the base folder
- try:
- opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
- except getopt.GetoptError:
- print 'The file options for list_xml_files.py were not correctly specified.'
- print 'To see a full list of options try:'
- print ' $ python list_xml_files.py -h'
- sys.exit(2)
- for opt, arg in opts:
- if opt == '-h':
- print 'Options:'
- print ' -f The base folder to build XML file list.'
- print ' -s The save file.'
- sys.exit()
- elif opt in ('-f', "--folder"):
- # check if file exists.
- if os.path.exists(arg):
- log_folder = arg
- else:
- print 'Error: Argument must be a folder name for --folder (-f).'
- sys.exit()
- elif opt in ('-s', "--save_file"):
- save_file = arg
- elif opt in ('-t', "--data_type"):
- data_type = arg
-
- # Required fields to run the script.
- if log_folder == "" or not os.path.exists(log_folder):
- print 'Error: The folder path option must be supplied: --folder (-f).'
- sys.exit()
- if save_file == "":
- print 'Error: The folder path option must be supplied: --save_file (-s).'
- sys.exit()
-
- list_xml_csv = ''
- with open(save_file, 'w') as outfile:
- csvfile = csv.writer(outfile)
- for path, filename in find_files(log_folder, '*.log'):
- # Only write out a specific type of data xml documents found in a specific path.
- with open(path + "/" + filename) as infile:
- folders = path.replace(log_folder, "")
- for line in infile:
- # Skip the root tags.
- if line.startswith(SEARCH_STRING):
- time_split = line.split(" ")
- name_split = filename.split(".")
- folder_split = folders.split("/")
-
- # Build data row
- row = folder_split
- row.append(name_split[0])
- row.append(time_split[3])
- row.append(name_split[2])
- csvfile.writerow(row)
-
-
-if __name__ == "__main__":
- main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/log_top.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/log_top.sh b/vxquery-benchmark/src/main/resources/util/log_top.sh
deleted file mode 100755
index 4a2f7e1..0000000
--- a/vxquery-benchmark/src/main/resources/util/log_top.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOG_FILE=logs/top.log
-
-# Reset counters
-iostat >> /dev/null
-sar -n DEV 1 1 >> /dev/null
-
-# Save IO, CPU and Network snapshot to a log file.
-while (sleep 7)
-do
- echo "---------------------------------------------" >> ${LOG_FILE}
- date >> ${LOG_FILE}
- echo >> ${LOG_FILE}
- iostat -y 1 1 >> ${LOG_FILE}
- top -n 1 -b | head -11 | tail -6 >> ${LOG_FILE}
- sar -n DEV 1 1 >> ${LOG_FILE}
-done;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
deleted file mode 100644
index 9238a19..0000000
--- a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import fnmatch
-import getopt
-import glob
-import os
-import sys
-
-XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + "\n"
-XML_SUFFIX = '</root>' + "\n"
-
-def find_files(directory, pattern):
- for root, dirs, files in os.walk(directory):
- for basename in files:
- if fnmatch.fnmatch(basename, pattern):
- yield (root, basename)
-
-
-def main(argv):
- ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
- '''
- xml_folder = ""
- save_file = ""
- data_type = ""
-
- # Get the base folder
- try:
- opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
- except getopt.GetoptError:
- print 'The file options for list_xml_files.py were not correctly specified.'
- print 'To see a full list of options try:'
- print ' $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors'
- sys.exit(2)
- for opt, arg in opts:
- if opt == '-h':
- print 'Options:'
- print ' -f The base folder to build XML file list.'
- print ' -s The save file.'
- sys.exit()
- elif opt in ('-f', "--folder"):
- # check if file exists.
- if os.path.exists(arg):
- xml_folder = arg
- else:
- print 'Error: Argument must be a folder name for --folder (-f).'
- sys.exit()
- elif opt in ('-s', "--save_file"):
- save_file = arg
- elif opt in ('-t', "--data_type"):
- data_type = arg
-
- # Required fields to run the script.
- if xml_folder == "" or not os.path.exists(xml_folder):
- print 'Error: The folder path option must be supplied: --folder (-f).'
- sys.exit()
- if save_file == "":
- print 'Error: The folder path option must be supplied: --save_file (-s).'
- sys.exit()
-
- list_xml_csv = ''
- with open(save_file, 'w') as outfile:
- outfile.write(XML_PREFIX)
- for path, filename in find_files(xml_folder, '*.xml'):
- # Only write out a specific type of data xml documents found in a specific path.
- if data_type in path:
- with open(path + "/" + filename) as infile:
- for line in infile:
- # Skip the root tags.
- if line != XML_PREFIX and line != XML_SUFFIX:
- outfile.write(line)
- outfile.write(XML_SUFFIX)
-
-if __name__ == "__main__":
- main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
deleted file mode 100644
index d0621eb..0000000
--- a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Function List :)
-(: VXQuery function list in csv with arguments and return types :)
-let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml"
-let $r :=
- for $f in fn:doc($list)/functions/function
- let $pl :=
- for $p in $f/param
- return $p/@type
- return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
-return fn:string-join($r , '|')
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
deleted file mode 100644
index f485807..0000000
--- a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Function List :)
-(: VXQuery function list in csv with arguments and return types :)
-let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml"
-let $r :=
- for $f in fn:doc($list)/operators/operator
- let $pl :=
- for $p in $f/param
- return $p/@type
- return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
-return fn:string-join($r , '|')
\ No newline at end of file
[07/14] git commit: Found missing file.
Posted by pr...@apache.org.
Found missing file.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/44d07d98
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/44d07d98
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/44d07d98
Branch: refs/heads/master
Commit: 44d07d98e404bb8786b62c14a9284660e36fc228
Parents: c182925
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:18:00 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:18:00 2014 -0700
----------------------------------------------------------------------
.../vxquery/cli/VXQueryClusterShutdown.java | 76 ++++++++++++++++++++
1 file changed, 76 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/44d07d98/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java b/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java
new file mode 100644
index 0000000..37fe4f6
--- /dev/null
+++ b/vxquery-server/src/main/java/org/apache/vxquery/cli/VXQueryClusterShutdown.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.cli;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.kohsuke.args4j.Argument;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+
+public class VXQueryClusterShutdown {
+ /**
+ * Main method to get command line options and execute query process.
+ *
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+ final CmdLineOptions opts = new CmdLineOptions();
+ CmdLineParser parser = new CmdLineParser(opts);
+
+ // parse command line options
+ try {
+ parser.parseArgument(args);
+ } catch (Exception e) {
+ parser.printUsage(System.err);
+ return;
+ }
+
+ // give error message if missing arguments
+ if (opts.clientNetIpAddress == null) {
+ parser.printUsage(System.err);
+ return;
+ }
+
+ try {
+ IHyracksClientConnection hcc = new HyracksConnection(opts.clientNetIpAddress, opts.clientNetPort);
+ hcc.stopCluster();
+ } catch (Exception e) {
+ System.err.println("Unable to connect and shutdown the Hyracks cluster.");
+ System.err.println(e);
+ return;
+ }
+ }
+
+ /**
+ * Helper class with fields and methods to handle all command line options
+ */
+ private static class CmdLineOptions {
+ @Option(name = "-client-net-ip-address", usage = "IP Address of the ClusterController", required = true)
+ private String clientNetIpAddress;
+
+ @Option(name = "-client-net-port", usage = "Port of the ClusterController")
+ private int clientNetPort = 1098;
+
+ @Argument
+ private List<String> arguments = new ArrayList<String>();
+ }
+
+}
[13/14] git commit: Continuing to tweak the MRQL scripts.
Posted by pr...@apache.org.
Continuing to tweak the MRQL scripts.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/9e0133ad
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/9e0133ad
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/9e0133ad
Branch: refs/heads/master
Commit: 9e0133adc899f580d89a47765da92b53c6d3ee17
Parents: 7f06298
Author: Preston Carman <pr...@apache.org>
Authored: Tue Oct 21 11:08:28 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Oct 21 11:08:28 2014 -0700
----------------------------------------------------------------------
.../mrql_scripts/load_node_file.sh | 29 +++++---
.../mrql_scripts/run_group_test.sh | 25 ++++---
.../mrql_scripts/run_mrql_tests.sh | 2 +-
.../noaa-ghcn-daily/scripts/run_benchmark.sh | 2 +-
.../RemoveUnusedSortDistinctNodesRule.java | 19 +-----
.../rules/util/CardinalityRuleToolbox.java | 13 ----
.../rewriter/rules/util/OperatorToolbox.java | 72 --------------------
.../vxquery/functions/builtin-functions.xml | 1 +
.../xmlquery/query/XMLQueryCompiler.java | 2 +-
.../src/main/resources/conf/cluster_example.xml | 12 ++--
.../src/main/resources/conf/local.xml | 18 ++---
11 files changed, 57 insertions(+), 138 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
index ead0902..206c38b 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -19,20 +19,29 @@
if [ -z "${1}" ]
then
- echo "Please enter the node number."
+ echo "Please enter the data set as the first argument."
exit
fi
-echo "Loading node ${1} data file in to cluster."
+if [ -z "${2}" ]
+then
+ echo "Please enter the node number as the second argument."
+ exit
+fi
+
+DATASET=${1}
+NODES=${2}
+
+echo "Loading ${NODES} node ${DATASET} data file in to cluster."
# Add each sensor block
-cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_sensors_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_sensors_${1}.xml all/sensors
-rm -f disk1/hadoop/all_sensors_${1}.xml
+cp saved/backups/mr/${DATASET}_sensors_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_sensors_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_sensors_${NODES}.xml ${DATASET}/sensors
+rm -f disk1/hadoop/${DATASET}_sensors_${NODES}.xml
# Add each station block
-cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/
-gunzip disk1/hadoop/all_stations_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/all_stations_${1}.xml all/stations
-rm -f disk1/hadoop/all_stations_${1}.xml
+cp saved/backups/mr/${DATASET}_stations_${NODES}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/${DATASET}_stations_${NODES}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_stations_${NODES}.xml ${DATASET}/stations
+rm -f disk1/hadoop/${DATASET}_stations_${NODES}.xml
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index f42a451..0208beb 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -19,11 +19,18 @@
if [ -z "${1}" ]
then
- echo "Please enter the number of nodes."
+ echo "Please enter the data set as the first argument."
exit
fi
-NODES=${1}
+if [ -z "${2}" ]
+then
+ echo "Please enter the node number as the second argument."
+ exit
+fi
+
+DATASET=${1}
+NODES=${2}
REPEAT=1
# Start Hadoop
@@ -32,24 +39,26 @@ sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
sleep 10
# Prepare hadoop file system
-hadoop fs -mkdir all
+hadoop fs -mkdir ${DATASET}
hadoop fs -ls
-hadoop fs -mkdir all/sensors
-hadoop fs -mkdir all/stations
-hadoop fs -ls all
+hadoop fs -mkdir ${DATASET}/sensors
+hadoop fs -mkdir ${DATASET}/stations
+hadoop fs -ls ${DATASET}
+
+hadoop balancer
# Upload test data
COUNTER=0
while [ ${COUNTER} -lt ${NODES} ];
do
- sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${COUNTER}
+ sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${DATASET} ${COUNTER}
let COUNTER=COUNTER+1
done
# Start test
-sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
+sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} ${DATASET}
# Stop Hadoop
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
index 1e512e1..d6bc9ab 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -22,7 +22,7 @@
NODES=${2}
REPEAT=${3}
-DATASET="all"
+DATASET=${4}
# Make log folder
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index 88339bd..5146586 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -52,7 +52,7 @@ do
echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
- fi;
+ fi;
done
if which programname >/dev/null;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
index 43d636b..43e2603 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
@@ -384,8 +384,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
// Find the last operator to set a variable and call this function again.
SubplanOperator subplan = (SubplanOperator) op;
for (int index = 0; index < subplan.getNestedPlans().size(); index++) {
- AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans().get(index)
- .getRoots().get(0).getValue();
+ AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans()
+ .get(index).getRoots().get(0).getValue();
updateVariableMap(lastOperator, cardinalityVariable, documentOrderVariables, uniqueNodesVariables,
vxqueryContext);
}
@@ -437,21 +437,6 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
break;
// The following operators' analysis has not yet been implemented.
- case CLUSTER:
- case DISTINCT:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INSERT_DELETE:
- case LIMIT:
- case PARTITIONINGSPLIT:
- case REPLICATE:
- case RUNNINGAGGREGATE:
- case SCRIPT:
- case SINK:
- case UNIONALL:
- case UNNEST_MAP:
- case UPDATE:
default:
throw new RuntimeException("Operator (" + op.getOperatorTag()
+ ") has not been implemented in rewrite rule.");
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
index a586c06..5b4594e 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java
@@ -81,19 +81,6 @@ public class CardinalityRuleToolbox {
break;
// The following operators' analysis has not yet been implemented.
- case CLUSTER:
- case DISTINCT:
- case EXTENSION_OPERATOR:
- case INDEX_INSERT_DELETE:
- case INSERT_DELETE:
- case PARTITIONINGSPLIT:
- case REPLICATE:
- case RUNNINGAGGREGATE:
- case SCRIPT:
- case SINK:
- case UNIONALL:
- case UNNEST_MAP:
- case UPDATE:
default:
throw new RuntimeException("Operator (" + op.getOperatorTag()
+ ") has not been implemented in rewrite rule.");
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
index da85f2d..725a082 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
@@ -84,29 +84,6 @@ public class OperatorToolbox {
AbstractUnnestOperator auo = (AbstractUnnestOperator) op;
result.add(auo.getExpressionRef());
break;
- case CLUSTER:
- case DATASOURCESCAN:
- case DISTINCT:
- case DISTRIBUTE_RESULT:
- case EMPTYTUPLESOURCE:
- case EXCHANGE:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INSERT_DELETE:
- case LIMIT:
- case NESTEDTUPLESOURCE:
- case ORDER:
- case PARTITIONINGSPLIT:
- case PROJECT:
- case REPLICATE:
- case SCRIPT:
- case SINK:
- case SUBPLAN:
- case UNIONALL:
- case UPDATE:
- case WRITE:
- case WRITE_RESULT:
default:
// TODO Not yet implemented.
break;
@@ -129,32 +106,6 @@ public class OperatorToolbox {
case UNNEST_MAP:
AbstractUnnestOperator ano = (AbstractUnnestOperator) op;
return ano.getExpressionRef();
- case CLUSTER:
- case DATASOURCESCAN:
- case DISTINCT:
- case DISTRIBUTE_RESULT:
- case EMPTYTUPLESOURCE:
- case EXCHANGE:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INNERJOIN:
- case INSERT_DELETE:
- case LEFTOUTERJOIN:
- case LIMIT:
- case NESTEDTUPLESOURCE:
- case ORDER:
- case PARTITIONINGSPLIT:
- case PROJECT:
- case REPLICATE:
- case SCRIPT:
- case SELECT:
- case SINK:
- case SUBPLAN:
- case UNIONALL:
- case UPDATE:
- case WRITE:
- case WRITE_RESULT:
default:
// TODO Not yet implemented.
break;
@@ -196,29 +147,6 @@ public class OperatorToolbox {
case EMPTYTUPLESOURCE:
case NESTEDTUPLESOURCE:
return null;
- case CLUSTER:
- case DISTINCT:
- case DISTRIBUTE_RESULT:
- case EXCHANGE:
- case EXTENSION_OPERATOR:
- case GROUP:
- case INDEX_INSERT_DELETE:
- case INNERJOIN:
- case INSERT_DELETE:
- case LEFTOUTERJOIN:
- case LIMIT:
- case ORDER:
- case PARTITIONINGSPLIT:
- case PROJECT:
- case REPLICATE:
- case SCRIPT:
- case SELECT:
- case SINK:
- case SUBPLAN:
- case UNIONALL:
- case UPDATE:
- case WRITE:
- case WRITE_RESULT:
default:
// Skip operators and go look at input.
for (Mutable<ILogicalOperator> input : op.getInputs()) {
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
index b439a83..38f03a4 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
+++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
@@ -125,6 +125,7 @@
<function name="fn:collection">
<param name="arg" type="xs:string?"/>
<return type="node()*"/>
+ <!-- Collection operator is added during the rewrite rules phase. -->
</function>
<!-- fn:compare($comparand1 as xs:string?, $comparand2 as xs:string?) as xs:integer? -->
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
index 966bd87..3cdc492 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
@@ -113,7 +113,7 @@ public class XMLQueryCompiler {
});
builder.getPhysicalOptimizationConfig().setFrameSize(this.frameSize);
if (joinHashSize > 0) {
- builder.getPhysicalOptimizationConfig().setInMemHashJoinTableSize(joinHashSize);
+ builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinHashSize);
}
builder.setLogicalRewrites(buildDefaultLogicalRewrites());
builder.setPhysicalRewrites(buildDefaultPhysicalRewrites());
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/cluster_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/cluster_example.xml b/vxquery-server/src/main/resources/conf/cluster_example.xml
index 41d642d..18d9173 100644
--- a/vxquery-server/src/main/resources/conf/cluster_example.xml
+++ b/vxquery-server/src/main/resources/conf/cluster_example.xml
@@ -15,13 +15,13 @@
limitations under the License.
-->
<cluster xmlns="cluster">
- <name>local</name>
+ <name>local</name>
<username>joe</username>
- <master_node>
- <id>master</id>
- <client_ip>128.195.52.177</client_ip>
- <cluster_ip>192.168.100.0</cluster_ip>
- </master_node>
+ <master_node>
+ <id>master</id>
+ <client_ip>128.195.52.177</client_ip>
+ <cluster_ip>192.168.100.0</cluster_ip>
+ </master_node>
<node>
<id>nodeA</id>
<cluster_ip>192.168.100.1</cluster_ip>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/local.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/conf/local.xml b/vxquery-server/src/main/resources/conf/local.xml
index 5c27e11..4a48620 100644
--- a/vxquery-server/src/main/resources/conf/local.xml
+++ b/vxquery-server/src/main/resources/conf/local.xml
@@ -15,14 +15,14 @@
limitations under the License.
-->
<cluster xmlns="cluster">
- <name>local</name>
- <master_node>
- <id>master</id>
- <client_ip>127.0.0.1</client_ip>
- <cluster_ip>127.0.0.1</cluster_ip>
- </master_node>
- <node>
- <id>node1</id>
- <cluster_ip>127.0.0.1</cluster_ip>
+ <name>local</name>
+ <master_node>
+ <id>master</id>
+ <client_ip>127.0.0.1</client_ip>
+ <cluster_ip>127.0.0.1</cluster_ip>
+ </master_node>
+ <node>
+ <id>node1</id>
+ <cluster_ip>127.0.0.1</cluster_ip>
</node>
</cluster>
[06/14] git commit: copy of all changes in exrt benchmark queries
that is copyright free.
Posted by pr...@apache.org.
copy of all changes in exrt benchmark queries that is copyright free.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/c182925c
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/c182925c
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/c182925c
Branch: refs/heads/master
Commit: c182925cc23f8662077150da7f6c9a3d67a6fbc8
Parents: 3167366
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:15:03 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:15:03 2014 -0700
----------------------------------------------------------------------
.../main/resources/noaa-ghcn-daily/README.md | 40 ++
.../noaa-ghcn-daily/conf/weather_example.xml | 35 ++
.../conf/weather_example_cluster.xml | 58 ++
.../noaa-ghcn-daily/other_systems/mrql/q00.mrql | 23 +
.../noaa-ghcn-daily/other_systems/mrql/q01.mrql | 21 +
.../noaa-ghcn-daily/other_systems/mrql/q02.mrql | 24 +
.../noaa-ghcn-daily/other_systems/mrql/q03.mrql | 22 +
.../noaa-ghcn-daily/other_systems/mrql/q04.mrql | 24 +
.../other_systems/mrql/q04_count_sensor.mrql | 21 +
.../other_systems/mrql/q04_count_station.mrql | 23 +
.../noaa-ghcn-daily/other_systems/mrql/q05.mrql | 27 +
.../other_systems/mrql/q05_count_sensor.mrql | 23 +
.../other_systems/mrql/q05_count_station.mrql | 23 +
.../noaa-ghcn-daily/other_systems/mrql/q06.mrql | 26 +
.../other_systems/mrql/q06_count_sensor.mrql | 23 +
.../other_systems/mrql/q06_count_station.mrql | 23 +
.../noaa-ghcn-daily/other_systems/mrql/q07.mrql | 26 +
.../other_systems/mrql/q07_count_join.mrql | 26 +
.../other_systems/mrql/q07_count_tmax.mrql | 22 +
.../other_systems/mrql/q07_count_tmin.mrql | 22 +
.../other_systems/mrql_scripts/clear.sh | 19 +
.../mrql_scripts/run_mrql_tests.sh | 46 ++
.../other_systems/mrql_scripts/start.sh | 20 +
.../other_systems/mrql_scripts/stop.sh | 20 +
.../other_systems/saxon/count_sensor.xq | 7 +
.../other_systems/saxon/count_station.xq | 7 +
.../noaa-ghcn-daily/other_systems/saxon/q00.xq | 15 +
.../noaa-ghcn-daily/other_systems/saxon/q01.xq | 8 +
.../noaa-ghcn-daily/other_systems/saxon/q02.xq | 14 +
.../noaa-ghcn-daily/other_systems/saxon/q03.xq | 8 +
.../noaa-ghcn-daily/other_systems/saxon/q04.xq | 30 +
.../other_systems/saxon/q04_count_sensor.xq | 10 +
.../other_systems/saxon/q04_count_station.xq | 8 +
.../noaa-ghcn-daily/other_systems/saxon/q05.xq | 33 ++
.../other_systems/saxon/q05_count_sensor.xq | 11 +
.../other_systems/saxon/q05_count_station.xq | 8 +
.../noaa-ghcn-daily/other_systems/saxon/q06.xq | 30 +
.../other_systems/saxon/q06_count_sensor.xq | 8 +
.../other_systems/saxon/q06_count_station.xq | 5 +
.../noaa-ghcn-daily/other_systems/saxon/q07.xq | 15 +
.../other_systems/saxon/q07_count_tmax.xq | 9 +
.../other_systems/saxon/q07_count_tmin.xq | 9 +
.../saxon_scripts/run_saxon_tests.sh | 44 ++
.../noaa-ghcn-daily/queries/count_sensor.xq | 24 +
.../noaa-ghcn-daily/queries/count_station.xq | 24 +
.../noaa-ghcn-daily/queries/no_result.xq | 24 +
.../resources/noaa-ghcn-daily/queries/q00.xq | 31 ++
.../resources/noaa-ghcn-daily/queries/q01.xq | 25 +
.../resources/noaa-ghcn-daily/queries/q02.xq | 30 +
.../resources/noaa-ghcn-daily/queries/q03.xq | 25 +
.../resources/noaa-ghcn-daily/queries/q04.xq | 32 ++
.../noaa-ghcn-daily/queries/q04_count_join.xq | 34 ++
.../noaa-ghcn-daily/queries/q04_count_sensor.xq | 29 +
.../queries/q04_count_station.xq | 28 +
.../resources/noaa-ghcn-daily/queries/q05.xq | 33 ++
.../noaa-ghcn-daily/queries/q05_count_join.xq | 35 ++
.../noaa-ghcn-daily/queries/q05_count_sensor.xq | 31 ++
.../queries/q05_count_station.xq | 28 +
.../resources/noaa-ghcn-daily/queries/q06.xq | 30 +
.../noaa-ghcn-daily/queries/q06_count_join.xq | 34 ++
.../noaa-ghcn-daily/queries/q06_count_sensor.xq | 29 +
.../queries/q06_count_station.xq | 27 +
.../resources/noaa-ghcn-daily/queries/q07.xq | 33 ++
.../noaa-ghcn-daily/queries/q07_count_join.xq | 35 ++
.../noaa-ghcn-daily/queries/q07_count_tmax.xq | 28 +
.../noaa-ghcn-daily/queries/q07_count_tmin.xq | 28 +
.../resources/noaa-ghcn-daily/scripts/README.md | 51 ++
.../scripts/benchmark_logging.properties | 1 +
.../noaa-ghcn-daily/scripts/run_benchmark.sh | 68 +++
.../scripts/run_benchmark_cluster.sh | 90 +++
.../noaa-ghcn-daily/scripts/run_group_test.sh | 51 ++
.../noaa-ghcn-daily/scripts/run_mrql_tests.sh | 42 ++
.../scripts/weather_benchmark.py | 377 +++++++++++++
.../noaa-ghcn-daily/scripts/weather_cli.py | 236 ++++++++
.../noaa-ghcn-daily/scripts/weather_config.py | 134 +++++
.../scripts/weather_config_ghcnd.py | 95 ++++
.../scripts/weather_config_mshr.py | 78 +++
.../scripts/weather_convert_to_xml.py | 554 +++++++++++++++++++
.../scripts/weather_data_files.py | 406 ++++++++++++++
.../scripts/weather_download_files.py | 102 ++++
.../src/main/resources/util/README.md | 28 +
.../util/build_saxon_collection_xml.py | 63 +++
.../src/main/resources/util/diff_xml_files.py | 97 ++++
.../resources/util/find_averages_in_logs.py | 97 ++++
.../src/main/resources/util/list_xml_files.py | 72 +++
.../src/main/resources/util/merge_xml_files.py | 88 +++
.../main/resources/util/vxquery_functions.xq | 27 +
.../main/resources/util/vxquery_operators.xq | 27 +
.../java/org/apache/vxquery/cli/VXQuery.java | 1 +
vxquery-server/pom.xml | 4 +
.../main/resources/scripts/cluster_actions.py | 16 +-
.../src/main/resources/scripts/cluster_cli.py | 6 +-
.../resources/scripts/cluster_information.py | 27 +-
.../src/main/resources/scripts/startcc.sh | 6 +-
.../src/main/resources/scripts/startnc.sh | 4 +-
.../src/main/resources/scripts/stopcc.sh | 3 +-
.../src/main/resources/scripts/stopcluster.sh | 49 ++
.../src/main/resources/scripts/stopnc.sh | 2 +-
98 files changed, 4473 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
new file mode 100644
index 0000000..9b512dd
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
@@ -0,0 +1,40 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+NOAA GHCN-Daily Benchmark
+=====================
+
+# Introduction
+
+The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY)
+.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor
+readings. Using the RSS feed as a template, the GHCN-DAILY historical
+information is used to generate past RSS feed XML documents. The process allows
+testing on a large set of information with out having to continually monitor
+the weather.gov site for all the weather details for years.
+
+# Detailed Description
+
+Detailed GHDN-DAILY information:
+<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
+
+# Folders
+
+ * conf
+ * other_systems
+ * queries
+ * scripts
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
new file mode 100644
index 0000000..2c15a33
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
@@ -0,0 +1,35 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<data xmlns="data">
+ <name>Local Example</name>
+ <save_path>/data</save_path>
+ <package>ghcnd_all</package>
+ <node>
+ <id>localhost</id>
+ <cluster_ip>127.0.0.1</cluster_ip>
+ </node>
+ <dataset>
+ <name>tiny-example</name>
+ <test>local_speed_up</test>
+ <save_path>/data</save_path>
+ <partition_type>small_files</partition_type>
+ <partitions_per_path>1</partitions_per_path>
+ <partitions_per_path>2</partitions_per_path>
+ <partitions_per_path>4</partitions_per_path>
+ <partitions_per_path>8</partitions_per_path>
+ </dataset>
+</data>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
new file mode 100644
index 0000000..7d05ac0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
@@ -0,0 +1,58 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<data xmlns="data">
+ <name>Cluster Example</name>
+ <save_path>/data</save_path>
+ <package>ghcnd_all</package>
+ <node>
+ <id>machine1</id>
+ <cluster_ip>127.0.0.1</cluster_ip>
+ </node>
+ <node>
+ <id>machine2</id>
+ <cluster_ip>127.0.0.2</cluster_ip>
+ </node>
+ <node>
+ <id>machine3</id>
+ <cluster_ip>127.0.0.3</cluster_ip>
+ </node>
+ <node>
+ <id>machine4</id>
+ <cluster_ip>127.0.0.4</cluster_ip>
+ </node>
+ <node>
+ <id>machine5</id>
+ <cluster_ip>127.0.0.5</cluster_ip>
+ </node>
+ <dataset>
+ <name>tiny-1drive</name>
+ <test>speed_up</test>
+ <test>batch_scale_out</test>
+ <save_path>/data</save_path>
+ <partition_type>small_files</partition_type>
+ <partitions_per_path>1</partitions_per_path>
+ </dataset>
+ <dataset>
+ <name>small-2drives</name>
+ <test>speed_up</test>
+ <test>batch_scale_out</test>
+ <save_path>/data</save_path>
+ <save_path>/data2</save_path>
+ <partition_type>large_files</partition_type>
+ <partitions_per_path>1</partitions_per_path>
+ </dataset>
+</data>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql
new file mode 100644
index 0000000..49d005e
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q00.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (r)
+from r in source(xml, args[0], {"data"})
+where text(r.station) = "GHCND:USW00012836"
+ and toInt(substring(text(r.date), 0, 4)) >= 2003
+ and toInt(substring(text(r.date), 5, 7)) = 12
+ and toInt(substring(text(r.date), 8, 10)) = 25
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql
new file mode 100644
index 0000000..f4cbd45
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q01.mrql
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (r)
+from r in source(xml, args[0], {"data"})
+where text(r.dataType) = "AWND"
+ and toFloat(text(r.value)) > 491.744
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql
new file mode 100644
index 0000000..5b7b507
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q02.mrql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+sum(
+ select (toInt(text(r.value)))
+ from r in source(xml, args[0], {"data"})
+ where text(r.station) = "GHCND:USW00014771"
+ and toInt(substring(text(r.date), 0, 4)) = 1999
+ and text(r.dataType) = "PRCP"
+) / 10
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql
new file mode 100644
index 0000000..b444e55
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q03.mrql
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+max(
+ select (toInt(text(r.value)))
+ from r in source(xml, args[0], {"data"})
+ where text(r.dataType) = "TMAX"
+) / 10
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql
new file mode 100644
index 0000000..1b36852
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04.mrql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (sensors)
+from sensors in source(xml, args[0], {"data"}),
+ stations in source(xml, args[1], {"station"}),
+ l in stations.locationLabels
+where text(stations.id) = text(sensors.station)
+ and text(sensors.date) = "1976-07-04T00:00:00.000"
+ and text(l.displayName) = "Washington"
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql
new file mode 100644
index 0000000..da0eae3
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_sensor.mrql
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (r)
+from r in source(xml, args[0], {"data"})
+where text(r.date) = "1976-07-04T00:00:00.000"
+ and text(r.dataType) = "TMAX"
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql
new file mode 100644
index 0000000..16bbe5a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q04_count_station.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (r)
+ from t in source(xml, args[1], {"station"}),
+ l in t.locationLabels
+ where text(l.displayName) = "WASHINGTON"
+)
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql
new file mode 100644
index 0000000..230f6e7
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05.mrql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+min(
+ select (toInt(text(sensors.value)))
+ from sensors in source(xml, args[0], {"data"}),
+ stations in source(xml, args[1], {"station"}),
+ l in stations.locationLabels
+ where text(stations.id) = text(sensors.station)
+ and toInt(substring(text(sensors.date), 0, 4)) = 2001
+ and text(sensors.dataType) = "TMIN"
+ and text(l.id) = "FIPS:US"
+) / 10
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql
new file mode 100644
index 0000000..dd801ed
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_sensor.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (toInt(text(r.value)))
+ from r in source(xml, args[0], {"data"})
+ where toInt(substring(text(r.date), 0, 4)) = 2001
+ and text(r.dataType) = "TMIN"
+)
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql
new file mode 100644
index 0000000..d8fcacc
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q05_count_station.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (t)
+ from t in source(xml, args[1], {"station"}),
+ l in t.locationLabels
+ where text(l.id) = "FIPS:US"
+)
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
new file mode 100644
index 0000000..583a5b9
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+select (n, d, v)
+from sensors in source(xml, args[0], {"data"}),
+ d in sensors.date,
+ v in sensors.value,
+ stations in source(xml, args[1], {"station"}),
+ n in stations.displayName,
+where text(stations.id) = text(sensors.station)
+ and toInt(substring(text(d), 0, 4)) = 2000
+ and text(sensors.dataType) = "TMAX"
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql
new file mode 100644
index 0000000..22e5918
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_sensor.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (r.date, r.value)
+ from r in source(xml, args[0], {"data"})
+ where toInt(substring(text(r.date), 0, 4)) = 2000
+ and text(r.dataType) = "TMAX"
+)
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql
new file mode 100644
index 0000000..9fb9e84
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06_count_station.mrql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (t.displayName)
+ from t in source(xml, args[1], {"station"}),
+ l in t.locationLabels
+ where text(l.displayName) = "WASHINGTON"
+)
+;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql
new file mode 100644
index 0000000..cdb0b0c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07.mrql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+avg(
+ select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+ from rtmax in source(xml, args[0], {"data"}),
+ rtmin in source(xml, args[0], {"data"})
+ where text(rtmax.date) = text(rtmin.date)
+ and text(rtmax.station) = text(rtmin.station)
+ and text(rtmax.dataType) = "TMAX"
+ and text(rtmin.dataType) = "TMIN"
+) / 10
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
new file mode 100644
index 0000000..8dec470
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+ from rtmax in source(xml, args[0], {"data"}),
+ rtmin in source(xml, args[0], {"data"})
+ where text(rtmax.date) = text(rtmin.date)
+ and text(rtmax.station) = text(rtmin.station)
+ and text(r.dataType) = "TMAX"
+ and text(r.dataType) = "TMIN"
+)
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
new file mode 100644
index 0000000..ca8ab4c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (rtmax)
+ from rtmax in source(xml, args[0], {"data"})
+ where text(r.dataType) = "TMAX"
+)
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
new file mode 100644
index 0000000..fe17ebe
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+ select (rtmin)
+ from rtmin in source(xml, args[0], {"data"})
+ where text(r.dataType) = "TMIN"
+)
+;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
new file mode 100755
index 0000000..da7cabe
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+hadoop namenode -format
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
new file mode 100755
index 0000000..10ab4d9
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Examples
+# run_mrql_tests.sh mrql_all/ 1 2
+
+NODES=${2}
+REPEAT=${3}
+DATASET="all"
+
+
+for j in $(find ${1} -name '*q??.mrql')
+do
+ date
+ echo "Running MRQL query: ${j}"
+ time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${NODES} ${j} ${DATASET}/sensors.xml ${DATASET}/stations.xml >> ~/disk1/weather_data/mrql/query_logs/$(basename "${j}").log 2>&1; done;
+done
+
+
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="MRQL Tests Finished (${DATASET})"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all MRQL tests on ${DATASET}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
new file mode 100755
index 0000000..a1766c9
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+bin/start-all.sh
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
new file mode 100755
index 0000000..e49d818
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+bin/stop-all.sh
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq
new file mode 100644
index 0000000..1e8e312
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_sensor.xq
@@ -0,0 +1,7 @@
+(: XQuery Join Query :)
+(: Count all the weather sensor readings available. :)
+count(
+ let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r in collection($sensor_collection)/root/dataCollection/data
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq
new file mode 100644
index 0000000..fe6ec8f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/count_station.xq
@@ -0,0 +1,7 @@
+(: XQuery Join Query :)
+(: Count all the weather stations available. :)
+count(
+ let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+ for $s in collection($station_collection)/root/stationCollection/station
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq
new file mode 100644
index 0000000..09c5b79
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q00.xq
@@ -0,0 +1,15 @@
+(:
+XQuery Filter Query
+-------------------
+See historical data for Key West International Airport, FL (USW00012836)
+station by selecting the weather readings for December 25 over the last
+10 years.
+:)
+let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in collection($collection)/root/dataCollection/data
+let $datetime := xs:dateTime(fn:data($r/date))
+where $r/station eq "GHCND:USW00012836"
+ and fn:year-from-dateTime($datetime) ge 2003
+ and fn:month-from-dateTime($datetime) eq 12
+ and fn:day-from-dateTime($datetime) eq 25
+return $r
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq
new file mode 100644
index 0000000..39d7e20
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q01.xq
@@ -0,0 +1,8 @@
+(: XQuery Filter Query :)
+(: Find all reading for hurricane force wind warning or extreme wind warning. :)
+(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :)
+(: meters per second). (Wind value is in tenth of a meter per second) :)
+let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in fn:collection($collection)/root/dataCollection/data
+where $r/dataType eq "AWND" and xs:decimal($r/value) gt 491.744
+return $r
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq
new file mode 100644
index 0000000..3fb0975
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q02.xq
@@ -0,0 +1,14 @@
+(:
+XQuery Aggregate Query
+----------------------
+Find the annual precipitation (PRCP) for a Syracuse, NY using the airport
+weather station (USW00014771) report for 1999.
+:)
+fn:sum(
+ let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r in collection($collection)/root/dataCollection/data
+ where $r/station eq "GHCND:USW00014771"
+ and $r/dataType eq "PRCP"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999
+ return $r/value
+) div 10
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq
new file mode 100644
index 0000000..cda344a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q03.xq
@@ -0,0 +1,8 @@
+(: XQuery Aggregate Query :)
+(: Find the highest recorded temperature (TMAX) in Celsius. :)
+fn:max(
+ let $collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r in collection($collection)/root/dataCollection/data
+ where $r/dataType eq "TMAX"
+ return $r/value
+) div 10
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
new file mode 100644
index 0000000..8f513ce
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Join Query :)
+(: Find all the weather readings for King county for a specific day :)
+(: 1976/7/4. :)
+let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in collection($sensor_collection)/root/dataCollection/data
+
+let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+for $s in collection($station_collection)/root/stationCollection/station
+
+where $s/id eq $r/station
+ and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+ and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
+return $r
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq
new file mode 100644
index 0000000..6c927a4
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_sensor.xq
@@ -0,0 +1,10 @@
+(: XQuery Join Query :)
+(: Count all the weather sensor readings on 1976-07-04. :)
+count(
+ let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r in collection($sensor_collection)/root/dataCollection/data
+
+ let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+ where $date eq xs:date("1976-07-04")
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq
new file mode 100644
index 0000000..957aec0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04_count_station.xq
@@ -0,0 +1,8 @@
+(: XQuery Join Query :)
+(: Count all the weather stations for King county. :)
+count(
+ let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+ for $s in collection($station_collection)/root/stationCollection/station
+ where (some $x in $s/locationLabels satisfies ($x/type eq "CNTY" and fn:contains(fn:upper-case(fn:data($x/displayName)), "KING")))
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
new file mode 100644
index 0000000..5f452c0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Join Aggregate Query :)
+(: Find the lowest recorded temperature (TMIN) in the state of Oregon for :)
+(: 2001. :)
+fn:min(
+ let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r in collection($sensor_collection)/root/dataCollection/data
+
+ let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+ for $s in collection($station_collection)/root/stationCollection/station
+
+ where $s/id eq $r/station
+ and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+ and $r/dataType eq "TMIN"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+ return $r/value
+) div 10
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq
new file mode 100644
index 0000000..4ac353b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_sensor.xq
@@ -0,0 +1,11 @@
+(: XQuery Join Aggregate Query :)
+(: Count all sensor readings for TMIN in 2001. :)
+count(
+ let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r in collection($sensor_collection)/root/dataCollection/data
+
+ let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+ where $r/dataType eq "TMIN"
+ and fn:year-from-date($date) eq 2001
+ return $r/value
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq
new file mode 100644
index 0000000..4349805
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05_count_station.xq
@@ -0,0 +1,8 @@
+(: XQuery Join Aggregate Query :)
+(: Count all stations in the state of Oregon. :)
+count(
+ let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+ for $s in collection($station_collection)/root/stationCollection/station
+ where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "OREGON"))
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
new file mode 100644
index 0000000..2c02bc7
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Join Query :)
+(: Find the highest recorded temperature (TMAX) for each station for each :)
+(: day over the year 2000. :)
+let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r in collection($sensor_collection)/root/dataCollection/data
+
+let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+for $s in collection($station_collection)/root/stationCollection/station
+
+where $s/id eq $r/station
+ and $r/dataType eq "TMAX"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+return ($s/displayName, $r/date, $r/value)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq
new file mode 100644
index 0000000..5ca3329
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_sensor.xq
@@ -0,0 +1,8 @@
+count(
+ let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r in collection($sensor_collection)/root/dataCollection/data
+
+ where $r/dataType eq "TMAX"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq
new file mode 100644
index 0000000..ef1e732
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06_count_station.xq
@@ -0,0 +1,5 @@
+count(
+ let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
+ for $s in collection($station_collection)/root/stationCollection/station
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq
new file mode 100644
index 0000000..35e5ea2
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07.xq
@@ -0,0 +1,15 @@
+(: XQuery Join Aggregate Query :)
+(: Self join with all sensor readings after the year 2000. :)
+fn:avg(
+let $sensor_collection_min := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r_min in collection($sensor_collection_min)/root/dataCollection/data
+
+let $sensor_collection_max := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+for $r_max in collection($sensor_collection_max)/root/dataCollection/data
+
+where $r_min/station eq $r_max/station
+ and $r_min/date eq $r_max/date
+ and $r_min/dataType eq "TMIN"
+ and $r_max/dataType eq "TMAX"
+return ($r_max/value - $r_min/value)
+) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq
new file mode 100644
index 0000000..3245746
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmax.xq
@@ -0,0 +1,9 @@
+(: XQuery Join Query :)
+(: Find the all the records for TMAX. :)
+count(
+ let $sensor_collection_max := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r_max in collection($sensor_collection_max)/root/dataCollection/data
+
+ where $r_max/dataType eq "TMAX"
+ return $r_max
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq
new file mode 100644
index 0000000..6fcd276
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q07_count_tmin.xq
@@ -0,0 +1,9 @@
+(: XQuery Join Query :)
+(: Find the all the records for TMIN. :)
+count(
+ let $sensor_collection_min := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
+ for $r_min in collection($sensor_collection_min)/root/dataCollection/data
+
+ where $r_min/dataType eq "TMIN"
+ return $r_min
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh
new file mode 100755
index 0000000..c1c2132
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon_scripts/run_saxon_tests.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+REPEAT=${3}
+DATASET="hcn"
+
+
+mkdir -p ~/logs/saxon/
+
+for j in $(find ${1} -name '*q??.xq')
+do
+ date
+ echo "Running Saxon query: ${j}"
+time for i in {1..${REPEAT}}; do JAVA_OPTS="-Xmx8g" java -cp saxon9he.jar net.sf.saxon.Query -t -repeat:${REPEAT} -q:${j} >> ~/logs/saxon/$(basename "${j}").log 2>&1; done;
+done
+
+
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="Saxon Tests Finished (${DATASET})"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all Saxon tests on ${DATASET}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
new file mode 100644
index 0000000..6fa981b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Join Query :)
+(: Count all the weather sensor readings available. :)
+count(
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
new file mode 100644
index 0000000..1958ec6
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Join Query :)
+(: Count all the weather stations available. :)
+count(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
new file mode 100644
index 0000000..c1363e3
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+VXQuery used to only parse all files with out producing results.
+:)
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
+where fn:false()
+return $r
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
new file mode 100644
index 0000000..5006a21
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
@@ -0,0 +1,31 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+(:
+XQuery Filter Query
+-------------------
+See historical data for Key West International Airport, FL (USW00012836)
+station by selecting the weather readings for December 25 over the last
+10 years.
+:)
+let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($collection)/dataCollection/data
+let $datetime := xs:dateTime(fn:data($r/date))
+where $r/station eq "GHCND:USW00012836"
+ and fn:year-from-dateTime($datetime) ge 2003
+ and fn:month-from-dateTime($datetime) eq 12
+ and fn:day-from-dateTime($datetime) eq 25
+return $r
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
new file mode 100644
index 0000000..0827c45
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Filter Query :)
+(: Find all reading for hurricane force wind warning or extreme wind warning. :)
+(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :)
+(: meters per second). (Wind value is in tenth of a meter per second) :)
+let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($collection)/dataCollection/data
+where $r/dataType eq "AWND" and xs:decimal(fn:data($r/value)) gt 491.744
+return $r
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
new file mode 100644
index 0000000..0635618
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+(:
+XQuery Aggregate Query
+----------------------
+Find the annual precipitation (PRCP) for a Syracuse, NY using the airport
+weather station (USW00014771) report for 1999.
+:)
+fn:sum(
+ let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($collection)/dataCollection/data
+ where $r/station eq "GHCND:USW00014771"
+ and $r/dataType eq "PRCP"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999
+ return $r/value
+) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
new file mode 100644
index 0000000..c58b0a3
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Aggregate Query :)
+(: Find the highest recorded temperature (TMAX) in Celsius. :)
+fn:max(
+ let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($collection)/dataCollection/data
+ where $r/dataType eq "TMAX"
+ return $r/value
+) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
new file mode 100644
index 0000000..7d5fd77
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
@@ -0,0 +1,32 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find all the weather readings for Washington state for a specific day 1976/7/4.
+:)
+let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+for $s in collection($station_collection)/stationCollection/station
+
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
+
+where $s/id eq $r/station
+ and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+ and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
+return $r
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq
new file mode 100644
index 0000000..b28312e
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_join.xq
@@ -0,0 +1,34 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find all the weather readings for Washington state for a specific day 1976/7/4.
+:)
+fn:count(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+
+ where $s/id eq $r/station
+ and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+ and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
+ return $r
+)
\ No newline at end of file
[09/14] git commit: New comments on renamed files.
Posted by pr...@apache.org.
New comments on renamed files.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/17bedfa9
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/17bedfa9
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/17bedfa9
Branch: refs/heads/master
Commit: 17bedfa9ff55b5b17b480773818db938d6184923
Parents: 0e666fc
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:42:29 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:42:29 2014 -0700
----------------------------------------------------------------------
.../main/resources/noaa-ghcn-daily/queries/count_sensor.xq | 7 +++++--
.../main/resources/noaa-ghcn-daily/queries/count_station.xq | 7 +++++--
2 files changed, 10 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/17bedfa9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
index 6fa981b..a93ad66 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_sensor.xq
@@ -15,8 +15,11 @@
specific language governing permissions and limitations
under the License. :)
-(: XQuery Join Query :)
-(: Count all the weather sensor readings available. :)
+(:
+XQuery Join Query
+-------------------
+Count all the weather sensor readings available.
+:)
count(
let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
for $r in collection($sensor_collection)/dataCollection/data
http://git-wip-us.apache.org/repos/asf/vxquery/blob/17bedfa9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
index 1958ec6..ebce764 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/count_station.xq
@@ -15,8 +15,11 @@
specific language governing permissions and limitations
under the License. :)
-(: XQuery Join Query :)
-(: Count all the weather stations available. :)
+(:
+XQuery Join Query
+-------------------
+Count all the weather stations available.
+:)
count(
let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
for $s in collection($station_collection)/stationCollection/station
[02/14] Remove benchmark files to allow easy copy from other branch.
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
deleted file mode 100644
index 0827c45..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Filter Query :)
-(: Find all reading for hurricane force wind warning or extreme wind warning. :)
-(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :)
-(: meters per second). (Wind value is in tenth of a meter per second) :)
-let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($collection)/dataCollection/data
-where $r/dataType eq "AWND" and xs:decimal(fn:data($r/value)) gt 491.744
-return $r
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
deleted file mode 100644
index 0635618..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-(:
-XQuery Aggregate Query
-----------------------
-Find the annual precipitation (PRCP) for a Syracuse, NY using the airport
-weather station (USW00014771) report for 1999.
-:)
-fn:sum(
- let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r in collection($collection)/dataCollection/data
- where $r/station eq "GHCND:USW00014771"
- and $r/dataType eq "PRCP"
- and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999
- return $r/value
-) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
deleted file mode 100644
index c58b0a3..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Aggregate Query :)
-(: Find the highest recorded temperature (TMAX) in Celsius. :)
-fn:max(
- let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r in collection($collection)/dataCollection/data
- where $r/dataType eq "TMAX"
- return $r/value
-) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
deleted file mode 100644
index 5b7246d..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Find all the weather readings for Washington state for a specific day :)
-(: 1976/7/4. :)
-let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-for $s in collection($station_collection)/stationCollection/station
-
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-
-where $s/id eq $r/station
- and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
- and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
-return $r
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
deleted file mode 100644
index 6c7810a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather sensor readings on 1976-07-04. :)
-count(
- let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r in collection($sensor_collection)/dataCollection/data
-
- let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
- where $date eq xs:date("1976-07-04")
- return $r
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
deleted file mode 100644
index 18e627a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather stations for Washington state. :)
-count(
- let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
- for $s in collection($station_collection)/stationCollection/station
- where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
- return $s
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
deleted file mode 100644
index c95f3f5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Find the lowest recorded temperature (TMIN) in the United States for :)
-(: 2001. :)
-fn:min(
- let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
- for $s in collection($station_collection)/stationCollection/station
-
- let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r in collection($sensor_collection)/dataCollection/data
-
- where $s/id eq $r/station
- and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
- and $r/dataType eq "TMIN"
- and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
- return $r/value
-) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
deleted file mode 100644
index 8548742..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
+++ /dev/null
@@ -1,28 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Count all sensor readings for TMIN in 2001. :)
-count(
- let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r in collection($sensor_collection)/dataCollection/data
-
- let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
- where $r/dataType eq "TMIN"
- and fn:year-from-date($date) eq 2001
- return $r/value
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
deleted file mode 100644
index 6f3a6b8..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Count all stations in the United States. :)
-count(
- let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
- for $s in collection($station_collection)/stationCollection/station
- where (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
- return $s
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
deleted file mode 100644
index 5c8ed54..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Find the highest recorded temperature (TMAX) for each station for each :)
-(: day over the year 2000. :)
-let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-for $s in collection($station_collection)/stationCollection/station
-
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-
-where $s/id eq $r/station
- and $r/dataType eq "TMAX"
- and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
-return ($s/displayName, $r/date, $r/value)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
deleted file mode 100644
index 1938151..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Count max temperature (TMAX) readings for 2000-01-01. :)
-count(
- let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r in collection($sensor_collection)/dataCollection/data
-
- where $r/dataType eq "TMAX"
- and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
- return $r
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
deleted file mode 100644
index 3c1dc98..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the stations. :)
-count(
- let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
- for $s in collection($station_collection)/stationCollection/station
- return $s
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
deleted file mode 100644
index 5b1f2ac..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Self Join Query :)
-(: Self join with all stations finding the difference in min and max :)
-(: temperature and get the average. :)
-fn:avg(
- let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r_min in collection($sensor_collection_min)/dataCollection/data
-
- let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r_max in collection($sensor_collection_max)/dataCollection/data
-
- where $r_min/station eq $r_max/station
- and $r_min/date eq $r_max/date
- and $r_min/dataType eq "TMIN"
- and $r_max/dataType eq "TMAX"
- return $r_max/value - $r_min/value
-) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
deleted file mode 100644
index a48cad5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
+++ /dev/null
@@ -1,26 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Find the all the records for TMAX. :)
-count(
- let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r_max in collection($sensor_collection_max)/dataCollection/data
-
- where $r_max/dataType eq "TMAX"
- return $r_max
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
deleted file mode 100644
index 4a72d0f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
+++ /dev/null
@@ -1,26 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Find the all the records for TMIN. :)
-count(
- let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r_min in collection($sensor_collection_min)/dataCollection/data
-
- where $r_min/dataType eq "TMIN"
- return $r_min
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
deleted file mode 100644
index 6fa981b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather sensor readings available. :)
-count(
- let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
- for $r in collection($sensor_collection)/dataCollection/data
- return $r
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
deleted file mode 100644
index 1958ec6..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather stations available. :)
-count(
- let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
- for $s in collection($station_collection)/stationCollection/station
- return $s
-)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
deleted file mode 100644
index 58bea51..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-Weather Data Conversion To XML
-=====================
-
-# Introduction
-
-The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY)
-.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor
-readings. Using the RSS feed as a template, the GHCN-DAILY historical
-information is used to generate past RSS feed XML documents. The process allows
-testing on a large set of information with out having to continually monitor
-the weather.gov site for all the weather details for years.
-
-# Detailed Description
-
-Detailed GHDN-DAILY information:
-<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
-
-The process takes a save folder for the data. The folder contains a several
-folders:
-
- - all_xml_files (The generated xml files for a given package)
- - downloads (All files taken from the NOAA HTTP site)
- - dataset-[name] (all files related to a single dataset)
-
-
-# Examples commands
-
-Building
-
-
-Partitioning
-python weather_cli.py -x weather_example.xml
-
-Linking
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
deleted file mode 100755
index 632dbcb..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Run all the queries and save a log.
-# First argument: Supply the folder which houses all the queries (recursive).
-# Second argument: adds options to the VXQuery CLI.
-#
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
-#
-REPEAT=5
-FRAME_SIZE=$((8*1024))
-BUFFER_SIZE=$((32*1024*1024))
-JOIN_HASH_SIZE=-1
-
-if [ -z "${1}" ]
-then
- echo "Please supply a directory for query files to be found."
- exit
-fi
-
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
-
-for j in $(find ${1} -name '*q??.xq')
-do
- if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]]
- then
- date
- echo "Running query: ${j}"
- log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
- log_base_path=$(dirname ${j/queries/query_logs})
- mkdir -p ${log_base_path}
- time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
- echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
- echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
- echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
- fi;
-done
-
-if which programname >/dev/null;
-then
- echo "Sending out e-mail notification."
- SUBJECT="Benchmark Tests Finished"
- EMAIL="ecarm002@ucr.edu"
- /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
- Completed all tests in folder ${1}.
- EOM
-else
- echo "No mail command to use."
-fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
deleted file mode 100755
index 98ab04b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Run all the queries and save a log.
-# First argument: Supply the folder which houses all the queries (recursive).
-# Second argument: adds options to the VXQuery CLI.
-#
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
-#
-CLUSTER="uci"
-REPEAT=5
-FRAME_SIZE=$((8*1024))
-BUFFER_SIZE=$((32*1024*1024))
-#JOIN_HASH_SIZE=$((256*1024*1024))
-JOIN_HASH_SIZE=-1
-
-if [ -z "${1}" ]
-then
- echo "Please supply a directory for query files to be found."
- exit
-fi
-
-if [ -z "${2}" ]
-then
- echo "Please the number of nodes (start at 0)."
- exit
-fi
-
-# Run queries for the specified number of nodes.
-echo "Starting ${2} cluster nodes"
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
-
-# wait for cluster to finish setting up
-sleep 5
-
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
-
-for j in $(find ${1} -name '*q??.xq')
-do
- # Only work with i nodes.
- if [[ "${j}" =~ "${2}nodes" ]]
- then
- # Only run for specified queries.
- if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]]
- then
- date
- echo "Running query: ${j}"
- log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
- log_base_path=$(dirname ${j/queries/query_logs})
- mkdir -p ${log_base_path}
- time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
- echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
- echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
- echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
- fi;
- fi;
-done
-
-# Stop cluster.
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
-
-if which programname >/dev/null;
-then
- echo "Sending out e-mail notification."
- SUBJECT="Benchmark Cluster Tests Finished"
- EMAIL="ecarm002@ucr.edu"
- /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
- Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}.
- EOM
-else
- echo "No mail command to use."
-fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
deleted file mode 100755
index 58976b7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-DATASET="dataset-hcn-d2"
-cluster_ip=${1}
-base_weather_folder=${2}
-
-for n in 7 6 5 3 4 2 1 0
-do
- #for t in "batch_scale_out" "speed_up"
- for t in "batch_scale_out"
- #for t in "speed_up"
- do
- for p in 2
- do
- for c in 4
- do
- echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
- sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
- done
- done
- done
-done
-
-if which programname >/dev/null;
-then
- echo "Sending out e-mail notification."
- SUBJECT="Benchmark Group Tests Finished"
- EMAIL="ecarm002@ucr.edu"
- /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
- Completed all tests in the predefined group for ${DATASET}.
- EOM
-else
- echo "No mail command to use."
-fi;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
deleted file mode 100755
index a6788be..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
-REPEAT=${1}
-DATASET="hcn"
-
-for n in `seq 0 7`
-#for n in 0
-do
- date
- echo "Running q0${n} on ${DATASET} for MRQL."
- time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done;
-done
-
-if which programname >/dev/null;
-then
- echo "Sending out e-mail notification."
- SUBJECT="MRQL Tests Finished (${DATASET})"
- EMAIL="ecarm002@ucr.edu"
- /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
- Completed all MRQL tests on ${DATASET}.
- EOM
-else
- echo "No mail command to use."
-fi;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
deleted file mode 100644
index 8021b2c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os.path
-import linecache
-import distutils.core
-import fileinput
-import socket
-
-from weather_config import *
-from weather_data_files import *
-
-# Weather data files created to manage the conversion process.
-# Allows partition and picking up where you left off.
-#
-# benchmark_name/
-# data/
-# queries/
-# logs/
-class WeatherBenchmark:
-
- DATA_LINKS_FOLDER = "data_links/"
- LARGE_FILE_ROOT_TAG = WeatherDataFiles.LARGE_FILE_ROOT_TAG
- QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/"
- QUERY_MASTER_FOLDER = "../queries/"
- QUERY_FILE_LIST = [
- "q00.xq",
- "q01.xq",
- "q02.xq",
- "q03.xq",
- "q04.xq",
- "q05.xq",
- "q06.xq",
- "q07.xq"
- ]
- QUERY_UTILITY_LIST = [
- "no_result.xq",
- "sensor_count.xq",
- "station_count.xq",
- "q04_sensor.xq",
- "q04_station.xq",
- "q05_sensor.xq",
- "q05_station.xq",
- "q06_sensor.xq",
- "q06_station.xq",
- "q07_tmin.xq",
- "q07_tmax.xq",
- ]
- BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"]
- BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"]
- QUERY_COLLECTIONS = ["sensors", "stations"]
-
- SEPERATOR = "|"
-
- def __init__(self, base_paths, partitions, dataset, nodes):
- self.base_paths = base_paths
- self.partitions = partitions
- self.dataset = dataset
- self.nodes = nodes
-
- def print_partition_scheme(self):
- if (len(self.base_paths) == 0):
- return
- for test in self.dataset.get_tests():
- if test in self.BENCHMARK_LOCAL_TESTS:
- self.print_local_partition_schemes(test)
- elif test in self.BENCHMARK_CLUSTER_TESTS:
- self.print_cluster_partition_schemes(test)
- else:
- print "Unknown test."
- exit()
-
- def print_local_partition_schemes(self, test):
- node_index = 0
- virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
- for p in self.partitions:
- scheme = self.get_local_partition_scheme(test, p)
- self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
-
- def print_cluster_partition_schemes(self, test):
- node_index = self.get_current_node_index()
- virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
- for p in self.partitions:
- scheme = self.get_cluster_partition_scheme(test, p)
- self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
-
- def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id):
- print
- print "---------------- Partition Scheme --------------------"
- print " Test: " + test
- print " Virtual Partitions: " + str(virtual_partitions)
- print " Disks: " + str(len(self.base_paths))
- print " Partitions: " + str(partitions)
- print " Node Id: " + str(node_id)
-
- if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
- folder_length = len(scheme[0][3]) + 5
- row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} {:<" + str(folder_length) + "}"
- HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
- print row_format.format(*HEADER)
- for row in scheme:
- print row_format.format(*row)
- print
- else:
- print " Scheme is EMPTY."
-
- def get_local_partition_scheme(self, test, partition):
- scheme = []
- virtual_partitions = get_local_virtual_disk_partitions(self.partitions)
- data_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths)
- link_base_schemes = get_partition_scheme(0, virtual_partitions, self.base_paths, self.DATA_LINKS_FOLDER + test)
-
- # Match link paths to real data paths.
- group_size = len(data_schemes) / len(link_base_schemes)
- for d in range(len(self.base_paths)):
- offset = 0
- for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
- if d == link_disk:
- # Only consider a single disk at a time.
- for data_node, data_disk, data_virtual, data_index, data_path in data_schemes:
- if test == "local_speed_up" and data_disk == link_disk \
- and offset <= data_index and data_index < offset + group_size:
- scheme.append([data_disk, data_index, link_index, data_path, link_path])
- elif test == "local_batch_scale_out" and data_disk == link_disk \
- and data_index == link_index:
- scheme.append([data_disk, data_index, link_index, data_path, link_path])
- offset += group_size
- return scheme
-
- def get_cluster_partition_scheme(self, test, partition):
- node_index = self.get_current_node_index()
- if node_index == -1:
- print "Unknown host."
- return
-
- scheme = []
- virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
- data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths)
- link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
-
- # Match link paths to real data paths.
- for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
- # Prep
- if test == "speed_up":
- group_size = virtual_disk_partitions / (link_node + 1) / partition
- elif test == "batch_scale_out":
- group_size = virtual_disk_partitions / len(self.nodes) / partition
- else:
- print "Unknown test."
- return
-
- node_offset = group_size * node_index * partition
- node_offset += group_size * link_index
- has_data = True
- if link_node < node_index:
- has_data = False
-
- # Make links
- for date_node, data_disk, data_virtual, data_index, data_path in data_schemes:
- if has_data and data_disk == link_disk \
- and node_offset <= data_index and data_index < node_offset + group_size:
- scheme.append([link_disk, data_index, link_index, data_path, link_path])
- scheme.append([link_disk, -1, link_index, "", link_path])
- return scheme
-
- def build_data_links(self, reset):
- if (len(self.base_paths) == 0):
- return
- if reset:
- shutil.rmtree(self.base_paths[0] + self.DATA_LINKS_FOLDER)
- for test in self.dataset.get_tests():
- if test in self.BENCHMARK_LOCAL_TESTS:
- for i in self.partitions:
- scheme = self.get_local_partition_scheme(test, i)
- self.build_data_links_scheme(scheme)
- if 1 in self.partitions and len(self.base_paths) > 1:
- scheme = self.build_data_links_local_zero_partition(test)
- self.build_data_links_scheme(scheme)
- elif test in self.BENCHMARK_CLUSTER_TESTS:
- for i in self.partitions:
- scheme = self.get_cluster_partition_scheme(test, i)
- self.build_data_links_scheme(scheme)
- if 1 in self.partitions and len(self.base_paths) > 1:
- scheme = self.build_data_links_cluster_zero_partition(test)
- self.build_data_links_scheme(scheme)
- else:
- print "Unknown test."
- exit()
-
- def build_data_links_scheme(self, scheme):
- '''Build all the data links based on the scheme information.'''
- for (data_disk, data_index, partition, data_path, link_path) in scheme:
- self.add_collection_links_for(data_path, link_path, data_index)
-
- def build_data_links_cluster_zero_partition(self, test):
- '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
- scheme = []
- link_base_schemes = get_cluster_link_scheme(len(self.nodes), 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
- for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
- new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test + "/" + str(link_node) + "nodes")
- scheme.append([0, link_disk, 0, link_path, new_link_path])
- return scheme
-
- def build_data_links_local_zero_partition(self, test):
- '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
- scheme = []
- index = 0
- link_base_schemes = get_partition_scheme(0, 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
- for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
- if test == "local_batch_scale_out" and index > 0:
- continue
- new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test)
- scheme.append([0, index, 0, link_path, new_link_path])
- index += 1
- return scheme
-
- def get_zero_partition_path(self, node, key):
- '''Return a partition path for the zero partition.'''
- base_path = self.base_paths[0]
- new_link_path = get_partition_scheme(node, 1, [base_path], key)[0][PARTITION_INDEX_PATH]
- return new_link_path.replace("p1", "p0")
-
- def get_current_node_index(self):
- found = False
- node_index = 0
- for machine in self.nodes:
- if socket.gethostname().startswith(machine.get_node_name()):
- found = True
- break
- node_index += 1
-
- if found:
- return node_index
- else:
- return -1
-
- def add_collection_links_for(self, real_path, link_path, index):
- for collection in self.QUERY_COLLECTIONS:
- collection_path = link_path + collection + "/"
- collection_index = collection_path + "index" + str(index)
- if not os.path.isdir(collection_path):
- os.makedirs(collection_path)
- if index >= 0:
- if os.path.islink(collection_index):
- os.unlink(collection_index)
- os.symlink(real_path + collection + "/", collection_index)
-
- def copy_query_files(self, reset):
- for test in self.dataset.get_tests():
- if test in self.BENCHMARK_LOCAL_TESTS:
- self.copy_local_query_files(test, reset)
- elif test in self.BENCHMARK_CLUSTER_TESTS:
- self.copy_cluster_query_files(test, reset)
- else:
- print "Unknown test."
- exit()
-
- def copy_cluster_query_files(self, test, reset):
- '''Determine the data_link path for cluster query files and copy with
- new location for collection.'''
- if 1 in self.partitions and len(self.base_paths) > 1:
- for n in range(len(self.nodes)):
- query_path = get_cluster_query_path(self.base_paths, test, 0, n)
- prepare_path(query_path, reset)
-
- # Copy query files.
- new_link_path = self.get_zero_partition_path(n, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
- self.copy_and_replace_query(query_path, [new_link_path])
- for n in range(len(self.nodes)):
- for p in self.partitions:
- query_path = get_cluster_query_path(self.base_paths, test, p, n)
- prepare_path(query_path, reset)
-
- # Copy query files.
- partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
- self.copy_and_replace_query(query_path, partition_paths)
-
- def copy_local_query_files(self, test, reset):
- '''Determine the data_link path for local query files and copy with
- new location for collection.'''
- if 1 in self.partitions and len(self.base_paths) > 1:
- query_path = get_local_query_path(self.base_paths, test, 0)
- prepare_path(query_path, reset)
-
- # Copy query files.
- new_link_path = self.get_zero_partition_path(0, self.DATA_LINKS_FOLDER + test)
- self.copy_and_replace_query(query_path, [new_link_path])
- for p in self.partitions:
- query_path = get_local_query_path(self.base_paths, test, p)
- prepare_path(query_path, reset)
-
- # Copy query files.
- partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
- self.copy_and_replace_query(query_path, partition_paths)
-
- def copy_and_replace_query(self, query_path, replacement_list):
- '''Copy the query files over to the query_path and replace the path
- for the where the collection data is located.'''
- for query_file in self.QUERY_FILE_LIST + self.QUERY_UTILITY_LIST:
- shutil.copyfile(self.QUERY_MASTER_FOLDER + query_file, query_path + query_file)
-
- # Make a search replace for each collection.
- for collection in self.QUERY_COLLECTIONS:
- replacement_list_with_type = []
- for replace in replacement_list:
- replacement_list_with_type.append(replace + collection)
-
- replace_string = self.SEPERATOR.join(replacement_list_with_type)
- for line in fileinput.input(query_path + query_file, True):
- sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string))
-
- # Make a search replace for partition type.
- if self.dataset.get_partition_type() == "large_files":
- for line in fileinput.input(query_path + query_file, True):
- sys.stdout.write(line.replace("/stationCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/stationCollection"))
- for line in fileinput.input(query_path + query_file, True):
- sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
-
- def get_number_of_slices_per_disk(self):
- if len(self.dataset.get_tests()) == 0:
- print "No test has been defined in config file."
- else:
- for test in self.dataset.get_tests():
- if test in self.BENCHMARK_LOCAL_TESTS:
- return get_local_virtual_disk_partitions(self.partitions)
- elif test in self.BENCHMARK_CLUSTER_TESTS:
- return get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
- else:
- print "Unknown test."
- exit()
-
-def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):
- link_paths = []
- for n in range(0, nodes):
- new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
- link_paths.extend(new_link_path)
- return link_paths
-
-def get_local_query_path(base_paths, test, partition):
- return base_paths[0] + "queries/" + test + "/" + get_local_query_folder(len(base_paths), partition) + "/"
-
-def get_local_query_folder(disks, partitions):
- return "d" + str(disks) + "_p" + str(partitions)
-
-def get_cluster_query_path(base_paths, test, partition, nodes):
- return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/"
-
-def get_cluster_virtual_disk_partitions(nodes, partitions):
- vp = get_local_virtual_disk_partitions(partitions)
- vn = calculate_partitions(range(1, len(nodes)+1, 1))
- return vp * vn
-
-def get_local_virtual_disk_partitions(partitions):
- return calculate_partitions(partitions)
-
-def calculate_partitions(list):
- x = 1
- for i in list:
- if x % i != 0:
- if i % x == 0:
- x = i
- else:
- x *= i
- return x
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
deleted file mode 100644
index eeae25c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys, getopt
-
-# Custom modules.
-from weather_data_files import *
-from weather_download_files import *
-from weather_convert_to_xml import *
-from weather_config import *
-from weather_benchmark import *
-
-DEBUG_OUTPUT = False
-
-#
-# Weather conversion for GHCN-DAILY files to xml.
-#
-# http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
-#
-def main(argv):
- append = False
- max_records = 0
- process_file_name = ""
- reset = False
- section = "all"
- token = ""
- update = False
- xml_config_path = ""
-
- try:
- opts, args = getopt.getopt(argv, "af:hl:m:ruvw:x:", ["file=", "locality=", "max_station_files=", "web_service=", "xml_config="])
- except getopt.GetoptError:
- print 'The file options for weather_cli.py were not correctly specified.'
- print 'To see a full list of options try:'
- print ' $ python weather_cli.py -h'
- sys.exit(2)
- for opt, arg in opts:
- if opt == '-h':
- print 'Converting weather daily files to xml options:'
- print ' -a Append the results to the progress file.'
- print ' -f (str) The file name of a specific station to process.'
- print ' * Helpful when testing a single stations XML file output.'
- print ' -l (str) Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).'
- print ' -m (int) Limits the number of files created for each station.'
- print ' * Helpful when testing to make sure all elements are supported for each station.'
- print ' Alternate form: --max_station_files=(int)'
- print ' -r Reset the build process. (For one section or all sections depending on other parameters.)'
- print ' -u Recalculate the file count and data size for each data source file.'
- print ' -v Extra debug information.'
- print ' -w (str) Downloads the station XML file form the web service.'
- print ' -x (str) XML config file for weather data.'
- sys.exit()
- elif opt in ('-a', "--append"):
- append = True
- elif opt in ('-f', "--file"):
- # check if file exists.
- if os.path.exists(arg):
- process_file_name = arg
- else:
- print 'Error: Argument must be a file name for --file (-f).'
- sys.exit()
- elif opt in ('-l', "--locality"):
- if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "partition_scheme", "test_links", "queries", "inventory", "statistics"):
- section = arg
- else:
- print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
- sys.exit()
- elif opt in ('-m', "--max_station_files"):
- if arg.isdigit():
- max_records = int(arg)
- else:
- print 'Error: Argument must be an integer for --max_station_files (-m).'
- sys.exit()
- elif opt == '-r':
- reset = True
- elif opt == '-u':
- update = True
- elif opt == '-v':
- global DEBUG_OUTPUT
- DEBUG_OUTPUT = True
- elif opt == '-w':
- # check if file exists.
- if arg is not "":
- token = arg
- else:
- print 'Error: Argument must be a string --web_service (-w).'
- sys.exit()
- elif opt in ('-x', "--xml_config"):
- # check if file exists.
- if os.path.exists(arg):
- xml_config_path = arg
- else:
- print 'Error: Argument must be a xml file for --xml_config (-x).'
- sys.exit()
-
- # Required fields to run the script.
- if xml_config_path == "" or not os.path.exists(xml_config_path):
- print 'Error: The xml config option must be supplied: --xml_config (-x).'
- sys.exit()
- config = WeatherConfig(xml_config_path)
-
- # Required fields to run the script.
- if config.get_save_path() == "" or not os.path.exists(config.get_save_path()):
- print 'Error: The save directory option must be supplied in the config file.'
- sys.exit()
-
- # Set up downloads folder.
- download_path = config.get_save_path() + "/downloads"
- if section in ("all", "download"):
- print 'Processing the download section.'
- download = WeatherDownloadFiles(download_path)
- download.download_ghcnd_files(reset)
- download.download_mshr_files(reset)
-
- # Unzip the required file.
- download.unzip_ghcnd_package(config.get_package(), reset)
- download.unzip_mshr_files(reset)
-
-
- # Create some basic paths for save files and references.
- ghcnd_data_dly_path = download_path + '/' + config.get_package() + '/' + config.get_package()
- xml_data_save_path = config.get_save_path() + '/all_xml_files/'
-
- # Make sure the xml folder is available.
- if not os.path.isdir(xml_data_save_path):
- os.makedirs(xml_data_save_path)
-
- # Set up the XML build objects.
- convert = WeatherWebServiceMonthlyXMLFile(download_path, xml_data_save_path, DEBUG_OUTPUT)
- progress_file = xml_data_save_path + "_data_progress.csv"
- data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
- if section in ("all", "progress_file"):
- print 'Processing the progress_file section.'
- options = list()
- if append:
- options.append('append')
- if update:
- options.append('recalculate')
- if reset:
- options.append('reset')
- data.build_progress_file(options, convert)
-
- if section in ("all", "sensor_build"):
- print 'Processing the sensor_build section.'
- if process_file_name is not "":
- # process a single file
- if os.path.exists(process_file_name):
- (file_count, data_size) = convert.process_sensor_file(process_file_name, max_records, 4)
- data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
- else:
- data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_MISSING)
- else:
- # process directory
- data.reset()
- data.set_type("sensor")
- data.set_data_reset(reset)
- for file_name in data:
- file_path = ghcnd_data_dly_path + '/' + file_name
- if os.path.exists(file_path):
- (file_count, data_size) = convert.process_sensor_file(file_path, max_records, 4)
- data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
- else:
- data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
-
- if section in ("all", "station_build"):
- print 'Processing the station_build section.'
- data.reset()
- data.set_type("station")
- data.set_data_reset(reset)
- if token is not "":
- convert.set_token(token)
- for file_name in data:
- file_path = ghcnd_data_dly_path + '/' + file_name
- if os.path.exists(file_path):
- return_status = convert.process_station_file(file_path)
- status = data.get_station_status(return_status)
- data.update_file_station_status(file_name, status)
- else:
- data.update_file_station_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
-
- for dataset in config.get_dataset_list():
- # Set up the setting for each dataset.
- dataset_folder = "/dataset-" + dataset.get_name()
- progress_file = config.get_save_path() + dataset_folder + "/_data_progress.csv"
- data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
-
- base_paths = []
- for paths in dataset.get_save_paths():
- base_paths.append(paths + dataset_folder + "/")
- benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list())
-
- if section in ("all", "partition", "partition_scheme"):
- slices = benchmark.get_number_of_slices_per_disk()
- print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
- data.reset()
- if section == "partition_scheme":
- benchmark.print_partition_scheme()
- else:
- if dataset.get_partition_type() == "large_files":
- data.build_to_n_partition_files(xml_data_save_path, slices, base_paths, reset)
- else:
- data.copy_to_n_partitions(xml_data_save_path, slices, base_paths, reset)
-
- if section in ("all", "test_links"):
- # TODO determine current node
- print 'Processing the test links section (' + dataset.get_name() + ').'
- benchmark.print_partition_scheme()
- benchmark.build_data_links(reset)
-
- if section in ("all", "queries"):
- print 'Processing the queries section (' + dataset.get_name() + ').'
- benchmark.copy_query_files(reset)
-
- if section in ("inventory"):
- print 'Processing the inventory section.'
- convert.process_inventory_file()
-
-# if section in ("statistics"):
-# print 'Processing the statistics section.'
-# data.print_progress_file_stats(convert)
-
-if __name__ == "__main__":
- main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
deleted file mode 100644
index 80607b8..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from xml.dom.minidom import parse
-
-class WeatherConfig:
- def __init__(self, config_xml_file):
- self.config_xml_file = config_xml_file
-
- self.config = parse(self.config_xml_file)
-
- def get_save_path(self):
- return self.get_text(self.config.getElementsByTagName("save_path")[0])
-
- def get_package(self):
- return self.get_text(self.config.getElementsByTagName("package")[0])
-
- def get_node_machine_list(self):
- nodes = []
- for node in self.config.getElementsByTagName("node"):
- id = self.get_node_name(node)
- ip = self.get_node_ip(node)
- nodes.append(Machine(id, ip))
- return nodes
-
- def get_dataset_list(self):
- nodes = []
- for node in self.config.getElementsByTagName("dataset"):
- name = self.get_dataset_name(node)
- save_paths = self.get_dataset_save_paths(node)
- partition_type = self.get_dataset_partition_type(node)
- partitions = self.get_dataset_partitions(node)
- tests = self.get_dataset_tests(node)
- nodes.append(Dataset(name, save_paths, partition_type, partitions, tests))
- return nodes
-
-
- # --------------------------------------------------------------------------
- # Node Specific Functions
- # --------------------------------------------------------------------------
- def get_node_ip(self, node):
- return self.get_text(node.getElementsByTagName("cluster_ip")[0])
-
- def get_node_name(self, node):
- return self.get_text(node.getElementsByTagName("id")[0])
-
-
- # --------------------------------------------------------------------------
- # Dataset Specific Functions
- # --------------------------------------------------------------------------
- def get_dataset_name(self, node):
- return self.get_text(node.getElementsByTagName("name")[0])
-
- def get_dataset_save_paths(self, node):
- paths = []
- for item in node.getElementsByTagName("save_path"):
- paths.append(self.get_text(item))
- return paths
-
- def get_dataset_partition_type(self, node):
- return self.get_text(node.getElementsByTagName("partition_type")[0])
-
- def get_dataset_partitions(self, node):
- paths = []
- for item in node.getElementsByTagName("partitions_per_path"):
- paths.append(int(self.get_text(item)))
- return paths
-
- def get_dataset_tests(self, node):
- tests = []
- for item in node.getElementsByTagName("test"):
- tests.append(self.get_text(item))
- return tests
-
- def get_text(self, xml_node):
- rc = []
- for node in xml_node.childNodes:
- if node.nodeType == node.TEXT_NODE:
- rc.append(node.data)
- return ''.join(rc)
-
-class Machine:
- def __init__(self, id, ip):
- self.id = id
- self.ip = ip
-
- def get_node_name(self):
- return self.id
-
- def get_node_ip(self):
- return self.ip
-
- def __repr__(self):
- return self.id + "(" + self.ip + ")"
-
-class Dataset:
- def __init__(self, name, save_paths, partition_type, partitions, tests):
- self.name = name
- self.save_paths = save_paths
- self.partitions = partitions
- self.partition_type = partition_type
- self.tests = tests
-
- def get_name(self):
- return self.name
-
- def get_save_paths(self):
- return self.save_paths
-
- def get_partitions(self):
- return self.partitions
-
- def get_partition_type(self):
- return self.partition_type
-
- def get_tests(self):
- return self.tests
-
- def __repr__(self):
- return self.name + ":" + str(self.save_paths) + ":" + str(self.partitions)
-
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
deleted file mode 100644
index 04fff52..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Base URL used to get all the required files.
-BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
-
-# List of required files for a build.
-FILE_NAMES = []
-FILE_NAMES.append('ghcnd-countries.txt')
-FILE_NAMES.append('ghcnd-inventory.txt')
-FILE_NAMES.append('ghcnd-states.txt')
-FILE_NAMES.append('ghcnd-stations.txt')
-FILE_NAMES.append('ghcnd-version.txt')
-FILE_NAMES.append('ghcnd_all.tar.gz')
-FILE_NAMES.append('ghcnd_gsn.tar.gz')
-FILE_NAMES.append('ghcnd_hcn.tar.gz')
-FILE_NAMES.append('readme.txt')
-FILE_NAMES.append('status.txt')
-
-# Store the row details here.
-
-# Index values of each field details.
-FIELD_INDEX_NAME = 0
-FIELD_INDEX_START = 1
-FIELD_INDEX_END = 2
-FIELD_INDEX_TYPE = 3
-
-DLY_FIELD_ID = 0
-DLY_FIELD_YEAR = 1
-DLY_FIELD_MONTH = 2
-DLY_FIELD_ELEMENT = 3
-
-DLY_FIELD_DAY_OFFSET = 4
-DLY_FIELD_DAY_FIELDS = 4
-
-DLY_FIELDS = []
-
-# Details about the row.
-DLY_FIELDS.append(['ID', 1, 11, 'Character'])
-DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
-DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
-DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
-
-# Days in each row.
-for i in range(1, 32):
- start = 22 + ((i - 1) * 8)
- DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
- DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 'Character'])
- DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 'Character'])
- DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 'Character'])
-
-# Details about the row.
-STATIONS_FIELDS = {}
-STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
-STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
-STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
-STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
-STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
-STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
-
-# Details about the row.
-COUNTRIES_FIELDS = {}
-COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-STATES_FIELDS = {}
-STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-INVENTORY_FIELDS = {}
-INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
-INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
-INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
deleted file mode 100644
index 7b1434f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# List of required files for a build.
-MSHR_URLS = []
-MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
-MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
-
-# Index values of each field details.
-MSHR_FIELD_INDEX_NAME = 0
-MSHR_FIELD_INDEX_START = 1
-MSHR_FIELD_INDEX_END = 2
-MSHR_FIELD_INDEX_TYPE = 3
-
-# Store the row details here.
-MSHR_FIELDS = {}
-
-# Details about the row.
-MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
-MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
-MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
-MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
-MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
-MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
-MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
-MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
-MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
-MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
-MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
-MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
-MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
-MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
-MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
-MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 'X(30)']
-MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
-MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
-MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
-MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
-MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
-MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
-MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
-MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
-MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
-MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
-MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
-MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
-MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
-MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
-MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
-MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
-MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
-MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
-MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
-MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
-MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
-MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
-MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
-MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
-MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
-MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
-MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
-MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
-MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
-MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']
[11/14] git commit: Added a background processes to help with
parallelizing the data loading.
Posted by pr...@apache.org.
Added a background processes to help with parallelizing the data loading.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/31b3f4d9
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/31b3f4d9
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/31b3f4d9
Branch: refs/heads/master
Commit: 31b3f4d985c6344d1ccc15ce57fb57a1cb2e3011
Parents: eefadb2
Author: Preston Carman <pr...@apache.org>
Authored: Thu Oct 9 12:19:27 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Oct 9 12:19:27 2014 -0700
----------------------------------------------------------------------
.../mrql_scripts/load_node_file.sh | 37 ++++++++++++++++++++
.../mrql_scripts/run_group_test.sh | 15 +++-----
2 files changed, 41 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/31b3f4d9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
new file mode 100755
index 0000000..048274f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${1}" ]
+then
+ echo "Please enter the node number."
+ exit
+fi
+
+
+# Add each sensor block
+cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/upload/
+gunzip disk1/hadoop/upload/all_sensors_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${1}.xml all/sensors
+rm -f disk1/hadoop/upload/all_sensors_${1}.xml
+
+# Add each station block
+cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/upload/
+gunzip disk1/hadoop/upload/all_stations_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${1}.xml all/stations
+rm -f disk1/hadoop/upload/all_stations_${1}.xml
http://git-wip-us.apache.org/repos/asf/vxquery/blob/31b3f4d9/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index c34ec95..60dc255 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -33,19 +33,12 @@ hadoop fs -mkdir all/stations
n=0
while [ ${n} -lt ${NODES} ];
do
- # Add each sensor block
- cp saved/backups/mr/all_sensors_${n}.xml.gz disk1/hadoop/upload/
- gunzip disk1/hadoop/upload/all_sensors_${n}.xml.gz
- hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${n}.xml all/sensors
- rm -f disk1/hadoop/upload/all_sensors_${n}.xml
-
- # Add each station block
- cp saved/backups/mr/all_stations_${n}.xml.gz disk1/hadoop/upload/
- gunzip disk1/hadoop/upload/all_stations_${n}.xml.gz
- hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${n}.xml all/stations
- rm -f disk1/hadoop/upload/all_stations_${n}.xml
+ sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${n} &
done
+# After all files have been uploaded, continue.
+wait
+
# Start test
sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
[04/14] copy of all changes in exrt benchmark queries that is
copyright free.
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
new file mode 100644
index 0000000..5db090a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -0,0 +1,554 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import textwrap
+from datetime import date
+import os
+from collections import OrderedDict
+
+# Custom modules.
+from weather_config_ghcnd import *
+from weather_config_mshr import *
+from weather_download_files import *
+
+class WeatherConvertToXML:
+
+ STATES = OrderedDict({
+ 'AK': 'Alaska',
+ 'AL': 'Alabama',
+ 'AR': 'Arkansas',
+ 'AS': 'American Samoa',
+ 'AZ': 'Arizona',
+ 'CA': 'California',
+ 'CO': 'Colorado',
+ 'CT': 'Connecticut',
+ 'DC': 'District of Columbia',
+ 'DE': 'Delaware',
+ 'FL': 'Florida',
+ 'GA': 'Georgia',
+ 'GU': 'Guam',
+ 'HI': 'Hawaii',
+ 'IA': 'Iowa',
+ 'ID': 'Idaho',
+ 'IL': 'Illinois',
+ 'IN': 'Indiana',
+ 'KS': 'Kansas',
+ 'KY': 'Kentucky',
+ 'LA': 'Louisiana',
+ 'MA': 'Massachusetts',
+ 'MD': 'Maryland',
+ 'ME': 'Maine',
+ 'MI': 'Michigan',
+ 'MN': 'Minnesota',
+ 'MO': 'Missouri',
+ 'MP': 'Northern Mariana Islands',
+ 'MS': 'Mississippi',
+ 'MT': 'Montana',
+ 'NA': 'National',
+ 'NC': 'North Carolina',
+ 'ND': 'North Dakota',
+ 'NE': 'Nebraska',
+ 'NH': 'New Hampshire',
+ 'NJ': 'New Jersey',
+ 'NM': 'New Mexico',
+ 'NV': 'Nevada',
+ 'NY': 'New York',
+ 'OH': 'Ohio',
+ 'OK': 'Oklahoma',
+ 'OR': 'Oregon',
+ 'PA': 'Pennsylvania',
+ 'PR': 'Puerto Rico',
+ 'RI': 'Rhode Island',
+ 'SC': 'South Carolina',
+ 'SD': 'South Dakota',
+ 'TN': 'Tennessee',
+ 'TX': 'Texas',
+ 'UT': 'Utah',
+ 'VA': 'Virginia',
+ 'VI': 'Virgin Islands',
+ 'VT': 'Vermont',
+ 'WA': 'Washington',
+ 'WI': 'Wisconsin',
+ 'WV': 'West Virginia',
+ 'WY': 'Wyoming'
+ })
+
+ MONTHS = [
+ "January",
+ "February",
+ "March",
+ "April",
+ "May",
+ "June",
+ "July",
+ "August",
+ "September",
+ "October",
+ "November",
+ "December"
+ ]
+
+ token = ""
+
+ def __init__(self, base_path, save_path, debug_output):
+ self.save_path = save_path
+ self.debug_output = debug_output
+
+ # Extra support files.
+ self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
+ self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt'
+ self.ghcnd_states = base_path + '/ghcnd-states.txt'
+ self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
+
+ # MSHR support files.
+ self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
+
+ def set_token(self, token):
+ self.token = token
+
+ def get_field_from_definition(self, row, field_definition):
+ return row[(field_definition[FIELD_INDEX_START] - 1):field_definition[FIELD_INDEX_END]]
+
+ def get_field(self, fields_array, row, index):
+ return row[(fields_array[index][FIELD_INDEX_START] - 1):fields_array[index][FIELD_INDEX_END]]
+
+ def get_dly_field(self, row, index):
+ return self.get_field(DLY_FIELDS, row, index)
+
+ def print_row_files(self, row):
+ for field in DLY_FIELDS:
+ print str(field[FIELD_INDEX_NAME]) + " = '" + row[(field[FIELD_INDEX_START] - 1):field[FIELD_INDEX_END]] + "'"
+
+ def save_file(self, filename, contents):
+ file = open(filename, 'w')
+ file.write(contents)
+ file.close()
+ return filename
+
+ def get_folder_size(self, folder_name):
+ total_size = 0
+ for dirpath, dirnames, filenames in os.walk(folder_name):
+ for f in filenames:
+ fp = os.path.join(dirpath, f)
+ total_size += os.path.getsize(fp)
+ return total_size
+
+ def process_one_month_sensor_set(self, records, page):
+ # Default
+ return 0
+
+ def process_station_data(self, row):
+ # Default
+ return 0
+
+ def get_base_folder(self, station_id, data_type="sensors"):
+ return build_base_save_folder(self.save_path, station_id, data_type)
+
+ def process_inventory_file(self):
+ print "Processing inventory file"
+ file_stream = open(self.ghcnd_inventory, 'r')
+
+ csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT', 'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS']
+ row = file_stream.readline()
+ csv_inventory = {}
+ for row in file_stream:
+ id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID'])
+ sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT'])
+ start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR']))
+ end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR']))
+ if id in csv_inventory:
+ new_count = str(int(csv_inventory[id][2]) + 1)
+ new_max = str(max(int(csv_inventory[id][3]), (end - start)))
+ new_total = str(int(csv_inventory[id][3]) + end - start)
+ csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count, new_max, new_total]
+ else:
+ csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)]
+
+ path = self.save_path + "/inventory.csv"
+ self.save_csv_file(path, csv_inventory, csv_header)
+
+ def save_csv_file(self, path, csv_inventory, header):
+ csv_content = "|".join(header) + "\n"
+ for row_id in csv_inventory:
+ csv_content += "|".join(csv_inventory[row_id]) + "\n"
+ self.save_file(path, csv_content)
+
+
+ def process_station_file(self, file_name):
+ print "Processing station file: " + file_name
+ file_stream = open(file_name, 'r')
+
+ row = file_stream.readline()
+ return self.process_station_data(row)
+
+ def process_sensor_file(self, file_name, max_files, sensor_max=99):
+ print "Processing sensor file: " + file_name
+ file_stream = open(file_name, 'r')
+
+ month_last = 0
+ year_last = 0
+ records = []
+ page = 0
+ sensor_count = 0
+
+ file_count = 0
+ for row in file_stream:
+ month = self.get_dly_field(row, DLY_FIELD_MONTH)
+ year = self.get_dly_field(row, DLY_FIELD_YEAR)
+
+ if (month_last != 0 and year_last != 0) and (sensor_count >= sensor_max or month != month_last or year != year_last):
+ # process set
+ file_count += self.process_one_month_sensor_set(records, page)
+ records = []
+ if sensor_count >= sensor_max and month == month_last and year == year_last:
+ # start a new page.
+ page += 1
+ else:
+ # start over.
+ page = 0
+ sensor_count = 0
+
+ records.append(row)
+ sensor_count += 1
+ if max_files != 0 and file_count >= max_files:
+ # Stop creating more files after the max is reached.
+ break
+
+ month_last = month
+ year_last = year
+
+ station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
+ data_size = self.get_folder_size(self.get_base_folder(station_id) + "/" + station_id)
+ print "Created " + str(file_count) + " XML files for a data size of " + str(data_size) + "."
+
+ return (file_count, data_size)
+
+ def convert_c2f(self, c):
+ return (9 / 5 * c) + 32
+
+ def default_xml_web_service_start(self):
+ field_xml = ""
+ field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
+ return field_xml
+
+ def default_xml_data_start(self, total_records):
+ field_xml = ""
+ field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + str(total_records) + "\">\n"
+ return field_xml
+
+ def default_xml_station_start(self):
+ field_xml = ""
+ field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" totalCount=\"1\">\n"
+ return field_xml
+
+ def default_xml_field_date(self, report_date, indent=2):
+ field_xml = ""
+ field_xml += self.get_indent_space(indent) + "<date>" + str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2) + "T00:00:00.000</date>\n"
+ return field_xml
+
+ def default_xml_mshr_station_additional(self, station_id):
+ """The web service station data is generate from the MSHR data supplemented with GHCN-Daily."""
+ station_mshr_row = ""
+ stations_mshr_file = open(self.mshr_stations, 'r')
+ for line in stations_mshr_file:
+ if station_id == self.get_field_from_definition(line, MSHR_FIELDS['GHCND_ID']).strip():
+ station_mshr_row = line
+ break
+
+ if station_mshr_row == "":
+ return ""
+
+ additional_xml = ""
+
+ county = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['COUNTY']).strip()
+ if county != "":
+ additional_xml += self.default_xml_location_labels("CNTY", "FIPS:-9999", county)
+
+ country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
+ country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
+ if country_code != "" and country_name != "":
+ additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code, country_name)
+
+ return additional_xml
+
+ def default_xml_location_labels(self, type, id, display_name):
+ label_xml = ""
+ label_xml += self.default_xml_start_tag("locationLabels", 2)
+ label_xml += self.default_xml_element("type", type, 3)
+ label_xml += self.default_xml_element("id", id, 3)
+ label_xml += self.default_xml_element("displayName", display_name, 3)
+ label_xml += self.default_xml_end_tag("locationLabels", 2)
+ return label_xml
+
+
+ def default_xml_web_service_station(self, station_id):
+ """The web service station data is generate from available historical sources."""
+ station_ghcnd_row = ""
+ stations_ghcnd_file = open(self.ghcnd_stations, 'r')
+ for line in stations_ghcnd_file:
+ if station_id == self.get_field_from_definition(line, STATIONS_FIELDS['ID']):
+ station_ghcnd_row = line
+ break
+
+ xml_station = ""
+ xml_station += self.default_xml_start_tag("station", 1)
+
+ xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2)
+ xml_station += self.default_xml_element("displayName", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['NAME']).strip(), 2)
+ xml_station += self.default_xml_element("latitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LATITUDE']).strip(), 2)
+ xml_station += self.default_xml_element("longitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LONGITUDE']).strip(), 2)
+
+ elevation = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['ELEVATION']).strip()
+ if elevation != "-999.9":
+ xml_station += self.default_xml_element("elevation", elevation, 2)
+
+ state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip()
+ if state_code != "" and state_code in self.STATES:
+ xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code])
+
+ # Add the MSHR data to the station generated information.
+ xml_station += self.default_xml_mshr_station_additional(station_id)
+
+ xml_station += self.default_xml_end_tag("station", 1)
+ return xml_station
+
+ def default_xml_day_reading_as_field(self, row, day):
+ day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
+ value = self.get_dly_field(row, day_index);
+ if value == "-9999":
+ return ""
+
+ field_xml = ""
+ field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
+ if field_id in ("MDTN", "MDTX", "MNPN", "MXPN", "TMAX", "TMIN", "TOBS",):
+ # Add both the celcius and fahrenheit temperatures.
+ celcius = float(value) / 10
+ field_xml += " <" + field_id + "_c>" + str(celcius) + "</" + field_id + "_c>\n"
+ fahrenheit = self.convert_c2f(celcius)
+ field_xml += " <" + field_id + "_f>" + str(fahrenheit) + "</" + field_id + "_f>\n"
+ elif field_id in ("AWND", "EVAP", "PRCP", "THIC", "WESD", "WESF", "WSF1", "WSF2", "WSF5", "WSFG", "WSFI", "WSFM",):
+ # Field values that are in tenths.
+ converted_value = float(value) / 10
+ field_xml += " <" + field_id + ">" + str(converted_value) + "</" + field_id + ">\n"
+ elif field_id in ("ACMC", "ACMH", "ACSC", "ACSH", "PSUN",):
+ # Fields is a percentage.
+ field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+ elif field_id in ("FMTM", "PGTM",):
+ # Fields is a time value HHMM.
+ field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+ elif field_id in ("DAEV", "DAPR", "DASF", "DATN", "DATX", "DAWM", "DWPR", "FRGB", "FRGT", "FRTH", "GAHT", "MDSF", "MDWM", "MDEV", "MDPR", "SNOW", "SNWD", "TSUN", "WDF1", "WDF2", "WDF5", "WDFG", "WDFI", "WDFM", "WDMV",):
+ # Fields with no alternation needed.
+ field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+ else:
+ field_xml += " <unknown>" + field_id + "</unknown>\n"
+
+ # print field_xml
+ return field_xml
+
+ def default_xml_day_reading(self, row, day, indent=2):
+ day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
+ value = self.get_dly_field(row, day_index);
+ mflag = self.get_dly_field(row, day_index + 1);
+ qflag = self.get_dly_field(row, day_index + 2);
+ sflag = self.get_dly_field(row, day_index + 3);
+
+ if value == "-9999":
+ return ""
+
+ indent_space = self.get_indent_space(indent)
+ field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
+ station_id = "GHCND:" + self.get_dly_field(row, DLY_FIELD_ID)
+
+ field_xml = ""
+ field_xml += indent_space + "<dataType>" + field_id + "</dataType>\n"
+ field_xml += indent_space + "<station>" + station_id + "</station>\n"
+ field_xml += indent_space + "<value>" + value.strip() + "</value>\n"
+ field_xml += indent_space + "<attributes>\n"
+ field_xml += indent_space + indent_space + "<attribute>" + mflag.strip() + "</attribute>\n"
+ field_xml += indent_space + indent_space + "<attribute>" + qflag.strip() + "</attribute>\n"
+ field_xml += indent_space + indent_space + "<attribute>" + sflag.strip() + "</attribute>\n"
+ field_xml += indent_space + indent_space + "<attribute></attribute>\n"
+ field_xml += indent_space + "</attributes>\n"
+
+ # print field_xml
+ return field_xml
+
+ def default_xml_end(self):
+ return textwrap.dedent("""\
+ </ghcnd_observation>""")
+
+ def default_xml_data_end(self):
+ return self.default_xml_end_tag("dataCollection", 0)
+
+ def default_xml_station_end(self):
+ return self.default_xml_end_tag("stationCollection", 0)
+
+ def default_xml_element(self, tag, data, indent=1):
+ return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + tag + ">\n"
+
+ def default_xml_start_tag(self, tag, indent=1):
+ return self.get_indent_space(indent) + "<" + tag + ">\n"
+
+ def default_xml_end_tag(self, tag, indent=1):
+ return self.get_indent_space(indent) + "</" + tag + ">\n"
+
+ def get_indent_space(self, indent):
+ return (" " * (4 * indent))
+
+
+class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
+ """The web service class details how to create files similar to the NOAA web service."""
+ skip_downloading = False
+ # Station data
+ def process_station_data(self, row):
+ """Adds a single station record file either from downloading the data or generating a similar record."""
+ station_id = self.get_dly_field(row, DLY_FIELD_ID)
+ download = 0
+ if self.token is not "" and not self.skip_downloading:
+ download = self.download_station_data(station_id, self.token, True)
+ if download == 0:
+ self.skip_downloading = True
+
+ # If not downloaded, generate.
+ if download != 0:
+ return download
+ else:
+ # Information for each daily file.
+ station_xml_file = self.default_xml_web_service_start()
+ station_xml_file += self.default_xml_station_start()
+ station_xml_file += self.default_xml_web_service_station(station_id)
+ station_xml_file += self.default_xml_station_end()
+
+ # Remove white space.
+ station_xml_file = station_xml_file.replace("\n", "");
+ station_xml_file = station_xml_file.replace(self.get_indent_space(1), "");
+
+ # Make sure the station folder is available.
+ ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
+ if not os.path.isdir(ghcnd_xml_station_path):
+ os.makedirs(ghcnd_xml_station_path)
+
+ # Save XML string to disk.
+ save_file_name = ghcnd_xml_station_path + station_id + ".xml"
+ save_file_name = self.save_file(save_file_name, station_xml_file)
+
+ if save_file_name is not "":
+ if self.debug_output:
+ print "Wrote file: " + save_file_name
+ return 1
+ else:
+ return 0
+
+ # Station data
+ def download_station_data(self, station_id, token, reset=False):
+ """Downloads the station data from the web service."""
+ import time
+ time.sleep(2)
+ # Make sure the station folder is available.
+ ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
+ if not os.path.isdir(ghcnd_xml_station_path):
+ os.makedirs(ghcnd_xml_station_path)
+
+ # Build download URL.
+ url = "http://www.ncdc.noaa.gov/cdo-services/services/datasets/GHCND/stations/GHCND:" + station_id + ".xml?token=" + token
+ url_file = urllib.urlopen(url)
+ station_xml_file = ""
+ while (True):
+ line = url_file.readline()
+ if not line:
+ break
+ station_xml_file += line
+
+ if station_xml_file.find("<cdoError>") != -1:
+ if self.debug_output:
+ print "Error in station download"
+ return 0
+
+ # Save XML string to disk.
+ save_file_name = ghcnd_xml_station_path + station_id + ".xml"
+ save_file_name = self.save_file(save_file_name, station_xml_file)
+
+ if save_file_name is not "":
+ if self.debug_output:
+ print "Wrote file: " + save_file_name
+ return 2
+ else:
+ return 0
+
+ # Sensor data
+ def process_one_month_sensor_set(self, records, page):
+ """Generates records for a station using the web service xml layout."""
+ found_data = False
+ year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR))
+ month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH))
+
+ station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
+
+ # Information for each daily file.
+ count = 0
+ daily_xml_file = ""
+
+ for day in range(1, 32):
+ try:
+ # TODO find out what is a valid python date range? 1889?
+ # Attempt to see if this is valid date.
+ report_date = date(year, month, day)
+
+ for record in records:
+ record_xml_snip = self.default_xml_day_reading(record, report_date.day)
+ if record_xml_snip is not "":
+ daily_xml_file += self.default_xml_start_tag("data")
+ daily_xml_file += self.default_xml_field_date(report_date)
+ daily_xml_file += record_xml_snip
+ daily_xml_file += self.default_xml_end_tag("data")
+ found_data = True
+ count += 1
+
+ except ValueError:
+ pass
+
+ daily_xml_file = self.default_xml_web_service_start() + self.default_xml_data_start(count) + daily_xml_file + self.default_xml_data_end()
+ daily_xml_file = daily_xml_file.replace("\n", "");
+ daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), "");
+
+ if not found_data:
+ return 0
+
+ # Make sure the station folder is available.
+ ghcnd_xml_station_path = self.get_base_folder(station_id) + "/" + station_id + "/" + str(report_date.year) + "/"
+ if not os.path.isdir(ghcnd_xml_station_path):
+ os.makedirs(ghcnd_xml_station_path)
+
+ # Save XML string to disk.
+ save_file_name = ghcnd_xml_station_path + build_sensor_save_filename(station_id, report_date, page)
+ save_file_name = self.save_file(save_file_name, daily_xml_file)
+
+ if save_file_name is not "":
+ if self.debug_output:
+ print "Wrote file: " + save_file_name
+ return 1
+ else:
+ return 0
+
+def build_base_save_folder(save_path, station_id, data_type="sensors"):
+ # Default
+ station_prefix = station_id[:3]
+ return save_path + data_type + "/" + station_prefix + "/"
+
+def build_sensor_save_filename(station_id, report_date, page):
+ # Default
+ return station_id + "_" + str(report_date.year).zfill(4) + str(report_date.month).zfill(2) + "_" + str(page) + ".xml"
+
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
new file mode 100644
index 0000000..c8b0fa5
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import glob
+import os.path
+import linecache
+import distutils.core
+
+from weather_convert_to_xml import *
+from collections import OrderedDict
+
+# Weather data files created to manage the conversion process.
+# Allows partition and picking up where you left off.
+class WeatherDataFiles:
+
+ LARGE_FILE_ROOT_TAG = "root"
+
+ INDEX_DATA_FILE_NAME = 0
+ INDEX_DATA_SENSORS_STATUS = 1
+ INDEX_DATA_STATION_STATUS = 2
+ INDEX_DATA_FILE_COUNT = 3
+ INDEX_DATA_FOLDER_DATA = 4
+
+ DATA_FILE_START_INDEX = 0
+ DATA_FILE_EXTENSION = ".dly"
+ DATA_FILE_MISSING = "missing"
+ DATA_FILE_INITIAL = "initialized"
+ DATA_FILE_DOWNLOADED = "downloaded"
+ DATA_FILE_GENERATED = "generated"
+ SEPERATOR = ","
+
+ type = "sensor"
+ data_reset = False
+
+ def __init__(self, base_path, progress_file_name="/tmp/_weather_data.csv"):
+ self.base_path = base_path
+
+ self.progress_file_name = progress_file_name
+
+ self.current = self.DATA_FILE_START_INDEX
+ self.progress_data = []
+
+ def get_file_list_iterator(self):
+ """Return the list of files one at a time."""
+ return glob.iglob(self.base_path + "/*" + self.DATA_FILE_EXTENSION)
+
+ # Save Functions
+ def build_progress_file(self, options, convert):
+ if not os.path.isfile(self.progress_file_name) or 'reset' in options:
+ # Build a new file.
+ file = open(self.progress_file_name, 'w')
+ contents = self.get_default_progress_file_csv()
+ file.write(contents)
+ file.close()
+ elif 'append' in options or 'recalculate' in options:
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ if self.get_file_row(file_name) < 0 and 'append' in options:
+ self.progress_data.append(self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL))
+ elif 'recalculate' in options:
+ # The folder is hard coded
+ station_id = os.path.basename(file_name).split('.')[0]
+ folder_name = convert.get_base_folder(station_id)
+ if os.path.exists(folder_name):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ sensor_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
+ station_status = row_contents[self.INDEX_DATA_STATION_STATUS]
+ file_count = self.get_file_count(folder_name)
+ data_size = self.get_folder_size(folder_name)
+ self.progress_data[row] = self.get_progress_csv_row(file_name, sensor_status, station_status, file_count, data_size)
+ else:
+ self.progress_data[row] = self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
+ # Save file
+ self.close_progress_data(True)
+ self.reset()
+
+ def copy_to_n_partitions(self, save_path, partitions, base_paths, reset):
+ """Once the initial data has been generated, the data can be copied into a set number of partitions. """
+ if (len(base_paths) == 0):
+ return
+
+ # Initialize the partition paths.
+ partition_paths = get_disk_partition_paths(0, partitions, base_paths)
+ for path in partition_paths:
+ # Make sure the xml folder is available.
+ prepare_path(path, reset)
+
+ import fnmatch
+ import os
+
+ # copy stations and sensors into each partition
+ current_sensor_partition = 0
+ current_station_partition = 0
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ station_id = os.path.basename(file_name).split('.')[0]
+
+ # Copy sensor files
+ type = "sensors"
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id
+ for root, dirnames, filenames in os.walk(file_path):
+ for filename in fnmatch.filter(filenames, '*.xml'):
+ xml_path = os.path.join(root, filename)
+ new_file_base = build_base_save_folder(partition_paths[current_sensor_partition], station_id, type) + station_id
+ if not os.path.isdir(new_file_base):
+ os.makedirs(new_file_base)
+ shutil.copyfile(xml_path, new_file_base + "/" + filename)
+ current_sensor_partition += 1
+ if current_sensor_partition >= len(partition_paths):
+ current_sensor_partition = 0
+
+ # Copy station files
+ type = "stations"
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+ new_file_base = build_base_save_folder(partition_paths[current_station_partition], station_id, type)
+ new_file_path = new_file_base + station_id + ".xml"
+ if os.path.isfile(file_path):
+ if not os.path.isdir(new_file_base):
+ os.makedirs(new_file_base)
+ shutil.copyfile(file_path, new_file_path)
+ current_station_partition += 1
+ if current_station_partition >= len(partition_paths):
+ current_station_partition = 0
+
+ def build_to_n_partition_files(self, save_path, partitions, base_paths, reset):
+ """Once the initial data has been generated, the data can be divided into partitions
+ and stored in single files.
+ """
+ if (len(base_paths) == 0):
+ return
+
+ XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
+
+ partition_paths = get_disk_partition_paths(0, partitions, base_paths)
+
+ import fnmatch
+ import os
+
+ for path in partition_paths:
+ prepare_path(path, reset)
+
+ # Initialize the partition paths.
+ types = ["sensors", "stations"]
+ for type in types:
+ partition_files = []
+ for path in partition_paths:
+ # Make sure the xml folder is available.
+ prepare_path(path + type + "/", False)
+ partition_files.append(open(path + type + "/partition.xml", 'w'))
+ partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
+
+ # copy into each partition
+ current_partition = 0
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ station_id = os.path.basename(file_name).split('.')[0]
+
+ # Copy files
+ if type == "sensors":
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id
+ for root, dirnames, filenames in os.walk(file_path):
+ for filename in fnmatch.filter(filenames, '*.xml'):
+ xml_path = os.path.join(root, filename)
+ xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+ partition_files[current_partition].write(xml_data)
+ current_partition += 1
+ if current_partition >= len(partition_files):
+ current_partition = 0
+ elif type == "stations":
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+ xml_path = os.path.join(root, file_path)
+ xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+ partition_files[current_partition].write(xml_data)
+ current_partition += 1
+ if current_partition >= len(partition_paths):
+ current_partition = 0
+
+ for row in range(0, len(partition_paths)):
+ partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
+ partition_files[row].close()
+
+ def get_file_row(self, file_name):
+ for i in range(0, len(self.progress_data)):
+ if self.progress_data[i].startswith(file_name):
+ return i
+ return -1
+
+ def get_default_progress_file_csv(self):
+ contents = ""
+ for path in self.get_file_list_iterator():
+ file_name = os.path.basename(path)
+ contents += self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
+ return contents
+
+ def print_progress_file_stats(self, convert):
+ sensor_count_missing = 0
+ sensor_count = 0
+ file_count = 0
+ data_size = 0
+
+ sensor_count_actual = 0
+ file_count_actual = 0
+ data_size_actual = 0
+
+ station_count_missing = 0
+ station_count_generated = 0
+ station_count_downloaded = 0
+
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ if int(row_contents[self.INDEX_DATA_FILE_COUNT]) != -1 and int(row_contents[self.INDEX_DATA_FOLDER_DATA]) != -1:
+ sensor_count += 1
+ file_count += int(row_contents[self.INDEX_DATA_FILE_COUNT])
+ data_size += int(row_contents[self.INDEX_DATA_FOLDER_DATA])
+ else:
+ sensor_count_missing += 1
+
+ if row_contents[self.INDEX_DATA_STATION_STATUS] == "generated":
+ station_count_generated += 1
+ if row_contents[self.INDEX_DATA_STATION_STATUS] == "downloaded":
+ station_count_downloaded += 1
+ else:
+ station_count_missing += 1
+
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ station_id = os.path.basename(file_name).split('.')[0]
+ folder_name = convert.get_base_folder(station_id)
+ if os.path.exists(folder_name):
+ sensor_count_actual += 1
+ file_count_actual += self.get_file_count(folder_name)
+ data_size_actual += self.get_folder_size(folder_name)
+
+
+ print "Progress File:\t" + self.progress_file_name + "\n"
+
+ print "CSV DETAILS OF PROCESSED SENSORS"
+ print "Number of stations:\t" + "{:,}".format(sensor_count)
+ print "Number of files:\t" + "{:,}".format(file_count)
+ print "Data size:\t\t" + "{:,}".format(data_size) + " Bytes\n"
+
+ print "CSV DETAILS OF unPROCESSED SENSORS"
+ print "Number of stations:\t" + "{:,}".format(sensor_count_missing) + "\n"
+
+ print "CSV DETAILS OF PROCESSED STATIONS"
+ print "Generated:\t\t" + "{:,}".format(station_count_generated)
+ print "Downloaded:\t\t" + "{:,}".format(station_count_downloaded)
+ print "Missing:\t\t" + "{:,}".format(station_count_missing) + "\n"
+
+ print "FOLDER DETAILS"
+ print "Number of stations:\t" + "{:,}".format(sensor_count_actual)
+ print "Number of files:\t" + "{:,}".format(file_count_actual)
+ print "Data size:\t\t" + "{:,}".format(data_size_actual) + " Bytes\n"
+
+
+ def get_progress_csv_row(self, file_name, sensors_status, station_status, file_count=-1, data_size=-1):
+ return file_name + self.SEPERATOR + sensors_status + self.SEPERATOR + station_status + self.SEPERATOR + str(file_count) + self.SEPERATOR + str(data_size) + "\n"
+
+ def update_file_sensor_status(self, file_name, sensors_status, file_count=-1, data_size=-1):
+ for row in range(0, len(self.progress_data)):
+ if self.progress_data[row].startswith(file_name):
+ station_status = self.progress_data[row].rsplit(self.SEPERATOR)[self.INDEX_DATA_STATION_STATUS]
+ self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
+ break
+
+ # Save the file
+ self.close_progress_data(True)
+
+ def update_file_station_status(self, file_name, station_status):
+ for row in range(0, len(self.progress_data)):
+ if self.progress_data[row].startswith(file_name):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ sensors_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
+ file_count = int(row_contents[self.INDEX_DATA_FILE_COUNT])
+ data_size = int(row_contents[self.INDEX_DATA_FOLDER_DATA])
+ self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
+ break
+
+ # Save the file
+ self.close_progress_data(True)
+
+ def get_file_count(self, folder_name):
+ count = 0
+ for dirpath, dirnames, filenames in os.walk(folder_name):
+ for f in filenames:
+ count += 1
+ return count
+
+ def get_folder_size(self, folder_name):
+ total_size = 0
+ for dirpath, dirnames, filenames in os.walk(folder_name):
+ for f in filenames:
+ fp = os.path.join(dirpath, f)
+ total_size += os.path.getsize(fp)
+ return total_size
+
+ def get_station_status(self, return_value):
+ if return_value == 2:
+ return self.DATA_FILE_DOWNLOADED
+ elif return_value == 1:
+ return self.DATA_FILE_GENERATED
+ return self.DATA_FILE_MISSING
+
+
+ def open_progress_data(self):
+ with open(self.progress_file_name, 'r') as file:
+ self.progress_data = file.readlines()
+
+ def close_progress_data(self, force=False):
+ if len(self.progress_data) > 0 or force:
+ with open(self.progress_file_name, 'w') as file:
+ file.writelines(self.progress_data)
+
+
+ def reset(self):
+ self.close_progress_data()
+
+ self.current = self.DATA_FILE_START_INDEX
+ self.open_progress_data()
+
+ def set_type(self, type):
+ self.type = type
+
+ def set_data_reset(self, data_reset):
+ self.data_reset = data_reset
+
+
+ # Iterator Functions
+ def __iter__(self):
+ return self
+
+ def next(self):
+ columns = []
+ while True:
+ # find a row that has not been created.
+ if self.current >= len(self.progress_data):
+ raise StopIteration
+ row = self.progress_data[self.current]
+ self.current += 1
+ columns = row.rsplit(self.SEPERATOR)
+ if self.type == "sensor" and (columns[self.INDEX_DATA_SENSORS_STATUS].strip() != self.DATA_FILE_GENERATED or self.data_reset):
+ break
+ elif self.type == "station" and (columns[self.INDEX_DATA_STATION_STATUS].strip() != self.DATA_FILE_DOWNLOADED or self.data_reset):
+ break
+ return columns[self.INDEX_DATA_FILE_NAME]
+
+
+# Index values of each field details.
+PARTITION_INDEX_NODE = 0
+PARTITION_INDEX_DISK = 1
+PARTITION_INDEX_VIRTUAL = 2
+PARTITION_INDEX = 3
+PARTITION_INDEX_PATH = 4
+PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path")
+
+def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"):
+ partition_paths = []
+ for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key):
+ partition_paths.append(scheme[PARTITION_INDEX_PATH])
+ return partition_paths
+
+def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"):
+ partition_scheme = []
+ for i in range(0, virtual_disk_partitions):
+ for j in range(0, len(base_paths)):
+ new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/"
+ partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path))
+ return partition_scheme
+
+def get_partition_folder(disks, partitions, index):
+ return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
+
+def prepare_path(path, reset):
+ """Ensures the directory is available. If reset, then its a brand new directory."""
+ if os.path.isdir(path) and reset:
+ shutil.rmtree(path)
+
+ if not os.path.isdir(path):
+ os.makedirs(path)
+
+def file_get_contents(filename):
+ with open(filename) as f:
+ return f.read()
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
new file mode 100644
index 0000000..fb59b50
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os.path
+import shutil
+import tarfile
+import urllib
+import zipfile
+
+# Custom modules.
+from weather_config_ghcnd import *
+from weather_config_mshr import *
+
+class WeatherDownloadFiles:
+
+ def __init__(self, save_path):
+ self.save_path = save_path
+
+ if not os.path.isdir(save_path):
+ os.makedirs(save_path)
+
+
+ def download_ghcnd_files(self, reset=False):
+ """Download the complete list."""
+ for file_name in FILE_NAMES:
+ url = BASE_DOWNLOAD_URL + file_name
+ self.download_file(url, reset)
+
+ def download_mshr_files(self, reset=False):
+ for url in MSHR_URLS:
+ self.download_file(url, reset)
+
+ def download_file(self, url, reset=False):
+ """Download the file, unless it exists."""
+ file_name = self.save_path + "/" + url.split('/')[-1]
+
+ if not os.path.isfile(file_name) or reset:
+ print "Downloading: " + url
+ urllib.urlretrieve(url, file_name, report_download_status)
+ print
+
+ def unzip_ghcnd_package(self, package, reset=False):
+ """Unzip the package file, unless it exists."""
+ file_name = self.save_path + "/" + package + ".tar.gz"
+ unzipped_path = self.save_path + "/" + package
+
+ if os.path.isdir(unzipped_path) and reset:
+ shutil.rmtree(unzipped_path)
+
+ if not os.path.isdir(unzipped_path):
+ print "Unzipping: " + file_name
+ tar_file = tarfile.open(file_name, 'r:gz')
+ tar_file.extractall(unzipped_path)
+
+ def unzip_mshr_files(self, reset=False):
+ """Unzip the package file, unless it exists."""
+ for url in MSHR_URLS:
+ if url.endswith('.zip'):
+ file_name = self.save_path + "/" + url.split('/')[-1]
+ print "Unzipping: " + file_name
+ with zipfile.ZipFile(file_name, 'r') as myzip:
+ myzip.extractall(self.save_path)
+
+def report_download_status(count, block, size):
+ """Report download status."""
+ line_size = 50
+ erase = "\b" * line_size
+ sys.stdout.write(erase)
+ report = get_report_line((float(count) * block / size), line_size)
+ sys.stdout.write(report)
+
+def get_report_line(percentage, line_size):
+ """Creates a string to be used in reporting the percentage done."""
+ report = ""
+ for i in range(0, line_size):
+ if (float(i) / line_size < percentage):
+ report += "="
+ else:
+ report += "-"
+ return report
+
+def download_file_save_as(url, new_file_name, reset=False):
+ """Download the file, unless it exists."""
+ if not os.path.isfile(new_file_name) or reset:
+ print "Downloading: " + url
+ urllib.urlretrieve(url, new_file_name, report_download_status)
+ print
+
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/README.md b/vxquery-benchmark/src/main/resources/util/README.md
new file mode 100644
index 0000000..8e2a204
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/README.md
@@ -0,0 +1,28 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+Utilities for Benchmark Operations
+=====================
+
+# Introduction
+
+Helpful scripts or configuration document to work with the benchmarks.
+
+## Saxon Collection
+
+To test the data with other XQuery processors, the saxon script helps with
+creating a collection.xml file.
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
new file mode 100644
index 0000000..02f39ee
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import getopt, glob, os, sys
+
+def main(argv):
+ xml_folder = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:h", ["folder="])
+ except getopt.GetoptError:
+ print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python build_saxon_collection_xml.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to create collection XML file.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ xml_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+
+ # Required fields to run the script.
+ if xml_folder == "" or not os.path.exists(xml_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+
+ # find all XML files in folder
+ collection_xml = "<collection>"
+ for i in range(1, 5):
+ # Search the ith directory level.
+ search_pattern = xml_folder + ('/*' * i) + '.xml'
+ for file_path in glob.iglob(search_pattern):
+ collection_xml += '<doc href="' + str.replace(file_path, xml_folder, '') + '"/>'
+ collection_xml += "</collection>"
+
+ # create collection XML
+ file = open('collection.xml', 'w')
+ file.write(collection_xml)
+ file.close()
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/diff_xml_files.py b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
new file mode 100644
index 0000000..8ad2e30
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import getopt, glob, os, sys
+
+def main(argv):
+ f1 = ""
+ f2 = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "h", ["f1=", "f2="])
+ except getopt.GetoptError:
+ print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python build_saxon_collection_xml.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to create collection XML file.'
+ sys.exit()
+ elif opt in ('--f1'):
+ # check if file exists.
+ if os.path.exists(arg):
+ f1 = arg
+ else:
+ print 'Error: Argument must be a file name for --f1.'
+ sys.exit()
+ elif opt in ('--f2'):
+ # check if file exists.
+ if os.path.exists(arg):
+ f2 = arg
+ else:
+ print 'Error: Argument must be a file name for --f2.'
+ sys.exit()
+
+ # Required fields to run the script.
+ if f1 == "" or not os.path.exists(f1):
+ print 'Error: The file path option must be supplied: --f1.'
+ sys.exit()
+ if f2 == "" or not os.path.exists(f2):
+ print 'Error: The file path option must be supplied: --f2.'
+ sys.exit()
+
+ missing_in_f1 = []
+ missing_in_f2 = []
+ found_in_both = []
+
+ with open(f1) as f:
+ content_f1 = f.readlines()
+ set_f1 = set(content_f1)
+
+
+ with open(f2) as f:
+ content_f2 = f.readlines()
+ set_f2 = set(content_f2)
+
+ missing_in_f1 = set_f2.difference(set_f1)
+ missing_in_f2 = set_f1.difference(set_f2)
+ found_in_both = set_f1.intersection(set_f2)
+
+ print ""
+ print "Missing files in " + f1
+ for f1_name in missing_in_f1:
+ print " + " + f1_name.strip()
+
+ print ""
+ print "Missing files in " + f2
+ for f2_name in missing_in_f2:
+ print " + " + f2_name.strip()
+
+ offset = 40
+ print ""
+ print "XML Summary"
+ print (" - Found in both:").ljust(offset) + str(len(found_in_both))
+ print (" - " + f1 + " diff set vs list:").ljust(offset) + str(len(content_f1) - len(set_f1))
+ print (" - " + f2 + " diff set vs list:").ljust(offset) + str(len(content_f2) - len(set_f2))
+ print (" - " + f1 + " missing:").ljust(offset) + str(len(missing_in_f1))
+ print (" - " + f2 + " missing:").ljust(offset) + str(len(missing_in_f2))
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
new file mode 100644
index 0000000..1cd7939
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+import csv
+
+SEARCH_STRING = 'Average execution time:'
+
+def find_files(directory, pattern):
+ for root, dirs, files in os.walk(directory):
+ for basename in files:
+ if fnmatch.fnmatch(basename, pattern):
+ yield (root, basename)
+
+
+def main(argv):
+ ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+ '''
+ log_folder = ""
+ save_file = ""
+ data_type = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
+ except getopt.GetoptError:
+ print 'The file options for list_xml_files.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python list_xml_files.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to build XML file list.'
+ print ' -s The save file.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ log_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+ elif opt in ('-s', "--save_file"):
+ save_file = arg
+ elif opt in ('-t', "--data_type"):
+ data_type = arg
+
+ # Required fields to run the script.
+ if log_folder == "" or not os.path.exists(log_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+ if save_file == "":
+ print 'Error: The folder path option must be supplied: --save_file (-s).'
+ sys.exit()
+
+ list_xml_csv = ''
+ with open(save_file, 'w') as outfile:
+ csvfile = csv.writer(outfile)
+ for path, filename in find_files(log_folder, '*.log'):
+ # Only write out a specific type of data xml documents found in a specific path.
+ with open(path + "/" + filename) as infile:
+ folders = path.replace(log_folder, "")
+ for line in infile:
+ # Skip the root tags.
+ if line.startswith(SEARCH_STRING):
+ time_split = line.split(" ")
+ name_split = filename.split(".")
+ folder_split = folders.split("/")
+
+ # Build data row
+ row = folder_split
+ row.append(name_split[0])
+ row.append(time_split[3])
+ row.append(name_split[2])
+ csvfile.writerow(row)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/list_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/list_xml_files.py b/vxquery-benchmark/src/main/resources/util/list_xml_files.py
new file mode 100644
index 0000000..750a95e
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/list_xml_files.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+
+def find_files(directory, pattern):
+ for root, dirs, files in os.walk(directory):
+ for basename in files:
+ if fnmatch.fnmatch(basename, pattern):
+ yield (root, basename)
+
+
+def main(argv):
+ ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+ '''
+ xml_folder = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:h", ["folder="])
+ except getopt.GetoptError:
+ print 'The file options for list_xml_files.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python list_xml_files.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to build XML file list.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ xml_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+
+ # Required fields to run the script.
+ if xml_folder == "" or not os.path.exists(xml_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+
+ list_xml_csv = ''
+ for path, filename in find_files(xml_folder, '*.xml'):
+ list_xml_csv += filename + "\n"
+ #list_xml_csv += filename + "," + path + "\n"
+
+ # create collection XML
+ file = open('list_xml.csv', 'w')
+ file.write(list_xml_csv)
+ file.close()
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
new file mode 100644
index 0000000..2df026b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+
+XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + "\n"
+XML_SUFFIX = '</root>' + "\n"
+
+def find_files(directory, pattern):
+ for root, dirs, files in os.walk(directory, followlinks=True):
+ for basename in files:
+ if fnmatch.fnmatch(basename, pattern):
+ yield (root, basename)
+
+
+def main(argv):
+ ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+ '''
+ xml_folder = ""
+ save_file = ""
+ data_type = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
+ except getopt.GetoptError:
+ print 'The file options for list_xml_files.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to build XML file list.'
+ print ' -s The save file.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ xml_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+ elif opt in ('-s', "--save_file"):
+ save_file = arg
+ elif opt in ('-t', "--data_type"):
+ data_type = arg
+
+ # Required fields to run the script.
+ if xml_folder == "" or not os.path.exists(xml_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+ if save_file == "":
+ print 'Error: The folder path option must be supplied: --save_file (-s).'
+ sys.exit()
+
+ list_xml_csv = ''
+ with open(save_file, 'w') as outfile:
+ outfile.write(XML_PREFIX)
+ for path, filename in find_files(xml_folder, '*.xml'):
+ # Only write out a specific type of data xml documents found in a specific path.
+ if data_type in path:
+ with open(path + "/" + filename) as infile:
+ for line in infile:
+ # Skip the root tags.
+ if line != XML_PREFIX and line != XML_SUFFIX:
+ outfile.write(line)
+ outfile.write(XML_SUFFIX)
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
new file mode 100644
index 0000000..d0621eb
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Function List :)
+(: VXQuery function list in csv with arguments and return types :)
+let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml"
+let $r :=
+ for $f in fn:doc($list)/functions/function
+ let $pl :=
+ for $p in $f/param
+ return $p/@type
+ return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
+return fn:string-join($r , '|')
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
new file mode 100644
index 0000000..f485807
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Function List :)
+(: VXQuery function list in csv with arguments and return types :)
+let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml"
+let $r :=
+ for $f in fn:doc($list)/operators/operator
+ let $pl :=
+ for $p in $f/param
+ return $p/@type
+ return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
+return fn:string-join($r , '|')
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
----------------------------------------------------------------------
diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
index 8451bd5..c0ca612 100644
--- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
+++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
@@ -375,6 +375,7 @@ public class VXQuery {
ccConfig.clientNetPort = 39000;
ccConfig.clusterNetIpAddress = "127.0.0.1";
ccConfig.clusterNetPort = 39001;
+ ccConfig.httpPort = 39002;
ccConfig.profileDumpPeriod = 10000;
cc = new ClusterControllerService(ccConfig);
cc.start();
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/pom.xml b/vxquery-server/pom.xml
index ef8f348..6c99712 100644
--- a/vxquery-server/pom.xml
+++ b/vxquery-server/pom.xml
@@ -47,6 +47,10 @@
<configuration>
<programs>
<program>
+ <mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass>
+ <name>vxqueryshutdown</name>
+ </program>
+ <program>
<mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
<name>vxquerycc</name>
</program>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_actions.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_actions.py b/vxquery-server/src/main/resources/scripts/cluster_actions.py
index a7cda17..deeee33 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_actions.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_actions.py
@@ -62,6 +62,10 @@ class ClusterActions:
time.sleep(5)
self.start_all_ncs()
+ def stop_cluster(self):
+ machine = self.ci.get_master_node_machine()
+ self.stop_cc_and_all_ncs(machine)
+
def stop(self):
self.stop_all_ncs()
time.sleep(2)
@@ -109,16 +113,22 @@ class ClusterActions:
def start_cc(self, machine):
print "Start Cluster Controller."
- print " " + machine.get_id() + " " + machine.get_ip() + ":" + machine.get_port()
- command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_ip() + " \"" + machine.get_port() + "\" \"" + machine.get_java_opts() + "\""
+ print " " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port()
+ command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
self.run_remote_command(machine.get_username(), machine.get_id(), command)
def start_nc(self, machine, cc):
print "Start Node Controller."
print " " + machine.get_id() + " " + machine.get_ip()
- command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_ip() + " \"" + cc.get_port() + "\" \"" + machine.get_java_opts() + "\""
+ command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_client_ip() + " \"" + cc.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
self.run_remote_command(machine.get_username(), machine.get_id(), command)
+ def stop_cc_and_all_ncs(self, machine):
+ print "Stop Cluster and Node Controllers."
+ print " " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port()
+ command = "./vxquery-server/target/appassembler/bin/stopcluster.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
+ self.run_remote_command(machine.get_username(), machine.get_id(), command)
+
def stop_cc(self, machine):
print "Stop Cluster Controller."
print " " + machine.get_id() + " " + machine.get_ip()
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_cli.py b/vxquery-server/src/main/resources/scripts/cluster_cli.py
index 089ad08..bd5efa6 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_cli.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_cli.py
@@ -39,10 +39,10 @@ def main(argv):
sys.exit()
elif opt in ('-a', "--action"):
# check if file exists.
- if arg in ('deploy', 'start', 'stop'):
+ if arg in ('deploy', 'start', 'stop', 'kill'):
action = arg
else:
- print 'Error: Argument must be a string ("deploy", "start", or "stop") for --action (-a).'
+ print 'Error: Argument must be a string ("deploy", "start", "stop", or "kill") for --action (-a).'
sys.exit()
elif opt in ('-c', "--cluster"):
# check if file exists.
@@ -72,6 +72,8 @@ def main(argv):
if action == 'start':
cluster.start()
elif action == 'stop':
+ cluster.stop_cluster()
+ elif action == 'kill':
cluster.stop()
elif action == 'deploy':
if deploy_path != "":
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_information.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_information.py b/vxquery-server/src/main/resources/scripts/cluster_information.py
index 677204b..94b231d 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_information.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_information.py
@@ -32,12 +32,13 @@ class ClusterInformation:
master_node = self.config.getElementsByTagName("master_node")[0]
id = NodeXmlReader.get_cluster_id(master_node)
ip = NodeXmlReader.get_cluster_ip(master_node)
- port = NodeXmlReader.get_cluster_port(master_node)
+ client_ip = NodeXmlReader.get_client_ip(master_node)
+ client_port = NodeXmlReader.get_client_port(master_node)
java_opts = NodeXmlReader.get_java_opts(master_node)
if java_opts is "":
java_opts = self.get_java_opts()
username = self.get_username()
- return Machine(id, ip, username, port, java_opts)
+ return Machine(id, ip, username, client_ip, client_port, java_opts)
def get_node_machine_list(self):
nodes = []
@@ -48,7 +49,7 @@ class ClusterInformation:
java_opts = NodeXmlReader.get_java_opts(node)
if java_opts is "":
java_opts = self.get_java_opts()
- nodes.append(Machine(id, ip, username, "", java_opts))
+ nodes.append(Machine(id, ip, username, "", "", java_opts))
return nodes
class NodeXmlReader(object):
@@ -64,8 +65,12 @@ class NodeXmlReader(object):
return get_tag_text(node, "cluster_ip")
@staticmethod
- def get_cluster_port(node):
- return get_tag_text(node, "cluster_port")
+ def get_client_ip(node):
+ return get_tag_text(node, "client_ip")
+
+ @staticmethod
+ def get_client_port(node):
+ return get_tag_text(node, "client_port")
@staticmethod
def get_java_opts(node):
@@ -90,11 +95,12 @@ class Machine:
log_path = ""
port = ""
- def __init__(self, id, ip, username, port="", java_opts=""):
+ def __init__(self, id, ip, username, client_ip="", client_port="", java_opts=""):
self.id = id
self.ip = ip
self.username = username
- self.port = port
+ self.client_ip = client_ip
+ self.client_port = client_port
self.java_opts = java_opts
def get_id(self):
@@ -106,8 +112,11 @@ class Machine:
def get_java_opts(self):
return self.java_opts
- def get_port(self):
- return self.port
+ def get_client_ip(self):
+ return self.client_ip
+
+ def get_client_port(self):
+ return self.client_port
def get_username(self):
return self.username
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startcc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/startcc.sh b/vxquery-server/src/main/resources/scripts/startcc.sh
index 002055c..dd1e5be 100755
--- a/vxquery-server/src/main/resources/scripts/startcc.sh
+++ b/vxquery-server/src/main/resources/scripts/startcc.sh
@@ -23,7 +23,7 @@ CCHOST=$1
CCPORT=$2
J_OPTS=$3
-#Export JAVA_HOME
+# Export JAVA_HOME
export JAVA_HOME=${JAVA_HOME}
# java opts added parameters
@@ -43,8 +43,8 @@ mkdir -p ${CCLOGS_DIR}
CC_OPTIONS=" -client-net-ip-address ${CCHOST} -cluster-net-ip-address ${CCHOST} "
if [ ! -z "${CCPORT}" ]
then
- CC_OPTIONS=" ${CC_OPTIONS} -cluster-net-port ${CCPORT} "
+ CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} "
fi
-#Launch hyracks cc script without toplogy
+# Launch hyracks cc script without toplogy
${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerycc ${CC_OPTIONS} &> ${CCLOGS_DIR}/cc_$(date +%Y%m%d%H%M).log &
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startnc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/startnc.sh b/vxquery-server/src/main/resources/scripts/startnc.sh
index c2bda3c..260512e 100755
--- a/vxquery-server/src/main/resources/scripts/startnc.sh
+++ b/vxquery-server/src/main/resources/scripts/startnc.sh
@@ -25,7 +25,7 @@ CCHOST=$3
CCPORT=$4
J_OPTS=$5
-#Set JAVA_HOME
+# Set JAVA_HOME
export JAVA_HOME=$JAVA_HOME
# java opts added parameters
@@ -49,5 +49,5 @@ then
fi
-#Launch hyracks nc
+# Launch hyracks nc
${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerync ${NC_OPTIONS} &> ${NCLOGS_DIR}/nc_$(date +%Y%m%d%H%M).log &
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcc.sh b/vxquery-server/src/main/resources/scripts/stopcc.sh
index 3290ec6..f2b6883 100755
--- a/vxquery-server/src/main/resources/scripts/stopcc.sh
+++ b/vxquery-server/src/main/resources/scripts/stopcc.sh
@@ -21,8 +21,7 @@ hostname
USER=$1
-#Kill process
-#Kill process
+# Kill process
PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerycc'|awk '{print $2}'`
if [ "$PID" == "" ]; then
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcluster.sh b/vxquery-server/src/main/resources/scripts/stopcluster.sh
new file mode 100755
index 0000000..238da7f
--- /dev/null
+++ b/vxquery-server/src/main/resources/scripts/stopcluster.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+CCHOST=$1
+CCPORT=$2
+J_OPTS=$3
+
+# Export JAVA_HOME
+export JAVA_HOME=${JAVA_HOME}
+
+# java opts added parameters
+if [ ! -z "${J_OPTS}" ]
+then
+ JAVA_OPTS="${JAVA_OPTS} ${J_OPTS}"
+ export JAVA_OPTS
+fi
+
+VXQUERY_HOME=`pwd`
+CCLOGS_DIR=${VXQUERY_HOME}/logs
+
+# logs dir
+mkdir -p ${CCLOGS_DIR}
+
+# Set up the options for the cc.
+CC_OPTIONS=" -client-net-ip-address ${CCHOST} "
+if [ ! -z "${CCPORT}" ]
+then
+ CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} "
+fi
+
+# Launch hyracks cc script without toplogy
+echo "${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &"
+${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopnc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopnc.sh b/vxquery-server/src/main/resources/scripts/stopnc.sh
index 56ffc66..8f29de5 100755
--- a/vxquery-server/src/main/resources/scripts/stopnc.sh
+++ b/vxquery-server/src/main/resources/scripts/stopnc.sh
@@ -21,7 +21,7 @@ hostname
USER=$1
-#Kill process
+# Kill process
PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerync'|awk '{print $2}'`
if [ "$PID" == "" ]; then
[03/14] git commit: Remove benchmark files to allow easy copy from
other branch.
Posted by pr...@apache.org.
Remove benchmark files to allow easy copy from other branch.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/3167366d
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/3167366d
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/3167366d
Branch: refs/heads/master
Commit: 3167366d02d6170a99d2f8b7818d07347cc32049
Parents: 7f73fe9
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 14:36:53 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 14:36:53 2014 -0700
----------------------------------------------------------------------
.../main/resources/noaa-ghcn-daily/README.md | 40 --
.../other_systems/mrql_gsn/q00.mrql | 23 -
.../other_systems/mrql_gsn/q01.mrql | 21 -
.../other_systems/mrql_gsn/q02.mrql | 24 -
.../other_systems/mrql_gsn/q03.mrql | 22 -
.../other_systems/mrql_gsn/q04.mrql | 24 -
.../other_systems/mrql_gsn/q04_sensor.mrql | 21 -
.../other_systems/mrql_gsn/q04_station.mrql | 24 -
.../other_systems/mrql_gsn/q05.mrql | 27 -
.../other_systems/mrql_gsn/q05_sensor.mrql | 23 -
.../other_systems/mrql_gsn/q05_station.mrql | 23 -
.../other_systems/mrql_gsn/q06.mrql | 26 -
.../other_systems/mrql_gsn/q06_sensor.mrql | 23 -
.../other_systems/mrql_gsn/q06_station.mrql | 23 -
.../other_systems/mrql_gsn/q07.mrql | 26 -
.../other_systems/mrql_gsn/q07_join_count.mrql | 26 -
.../other_systems/mrql_gsn/q07_tmax.mrql | 22 -
.../other_systems/mrql_gsn/q07_tmin.mrql | 22 -
.../other_systems/mrql_hcn/q00.mrql | 23 -
.../other_systems/mrql_hcn/q01.mrql | 21 -
.../other_systems/mrql_hcn/q02.mrql | 24 -
.../other_systems/mrql_hcn/q03.mrql | 22 -
.../other_systems/mrql_hcn/q04.mrql | 24 -
.../other_systems/mrql_hcn/q04_sensor.mrql | 21 -
.../other_systems/mrql_hcn/q04_station.mrql | 24 -
.../other_systems/mrql_hcn/q05.mrql | 27 -
.../other_systems/mrql_hcn/q05_sensor.mrql | 23 -
.../other_systems/mrql_hcn/q05_station.mrql | 23 -
.../other_systems/mrql_hcn/q06.mrql | 28 -
.../other_systems/mrql_hcn/q06_sensor.mrql | 23 -
.../other_systems/mrql_hcn/q06_station.mrql | 23 -
.../other_systems/mrql_hcn/q07.mrql | 26 -
.../other_systems/mrql_hcn/q07_join_count.mrql | 26 -
.../other_systems/mrql_hcn/q07_tmax.mrql | 22 -
.../other_systems/mrql_hcn/q07_tmin.mrql | 22 -
.../other_systems/mrql_test/q00.mrql | 23 -
.../other_systems/mrql_test/q01.mrql | 21 -
.../other_systems/mrql_test/q02.mrql | 24 -
.../other_systems/mrql_test/q03.mrql | 22 -
.../other_systems/mrql_test/q04.mrql | 24 -
.../other_systems/mrql_test/q05.mrql | 27 -
.../other_systems/mrql_test/q06.mrql | 27 -
.../other_systems/mrql_test/q07.mrql | 26 -
.../noaa-ghcn-daily/other_systems/saxon/q04.xq | 30 -
.../noaa-ghcn-daily/other_systems/saxon/q05.xq | 33 --
.../noaa-ghcn-daily/other_systems/saxon/q06.xq | 30 -
.../noaa-ghcn-daily/queries/no_result.xq | 24 -
.../resources/noaa-ghcn-daily/queries/q00.xq | 31 --
.../resources/noaa-ghcn-daily/queries/q01.xq | 25 -
.../resources/noaa-ghcn-daily/queries/q02.xq | 30 -
.../resources/noaa-ghcn-daily/queries/q03.xq | 25 -
.../resources/noaa-ghcn-daily/queries/q04.xq | 30 -
.../noaa-ghcn-daily/queries/q04_sensor.xq | 27 -
.../noaa-ghcn-daily/queries/q04_station.xq | 25 -
.../resources/noaa-ghcn-daily/queries/q05.xq | 33 --
.../noaa-ghcn-daily/queries/q05_sensor.xq | 28 -
.../noaa-ghcn-daily/queries/q05_station.xq | 25 -
.../resources/noaa-ghcn-daily/queries/q06.xq | 30 -
.../noaa-ghcn-daily/queries/q06_sensor.xq | 27 -
.../noaa-ghcn-daily/queries/q06_station.xq | 24 -
.../resources/noaa-ghcn-daily/queries/q07.xq | 33 --
.../noaa-ghcn-daily/queries/q07_tmax.xq | 26 -
.../noaa-ghcn-daily/queries/q07_tmin.xq | 26 -
.../noaa-ghcn-daily/queries/sensor_count.xq | 24 -
.../noaa-ghcn-daily/queries/station_count.xq | 24 -
.../resources/noaa-ghcn-daily/scripts/README.md | 51 --
.../noaa-ghcn-daily/scripts/run_benchmark.sh | 67 ---
.../scripts/run_benchmark_cluster.sh | 90 ---
.../noaa-ghcn-daily/scripts/run_group_test.sh | 51 --
.../noaa-ghcn-daily/scripts/run_mrql_tests.sh | 42 --
.../scripts/weather_benchmark.py | 377 -------------
.../noaa-ghcn-daily/scripts/weather_cli.py | 236 --------
.../noaa-ghcn-daily/scripts/weather_config.py | 134 -----
.../scripts/weather_config_ghcnd.py | 95 ----
.../scripts/weather_config_mshr.py | 78 ---
.../scripts/weather_convert_to_xml.py | 554 -------------------
.../scripts/weather_data_files.py | 416 --------------
.../scripts/weather_download_files.py | 102 ----
.../src/main/resources/util/README.md | 28 -
.../util/build_saxon_collection_xml.py | 63 ---
.../resources/util/find_averages_in_logs.py | 97 ----
.../src/main/resources/util/log_top.sh | 35 --
.../src/main/resources/util/merge_xml_files.py | 88 ---
.../main/resources/util/vxquery_functions.xq | 27 -
.../main/resources/util/vxquery_operators.xq | 27 -
85 files changed, 4304 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
deleted file mode 100644
index 9b512dd..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-NOAA GHCN-Daily Benchmark
-=====================
-
-# Introduction
-
-The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY)
-.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor
-readings. Using the RSS feed as a template, the GHCN-DAILY historical
-information is used to generate past RSS feed XML documents. The process allows
-testing on a large set of information with out having to continually monitor
-the weather.gov site for all the weather details for years.
-
-# Detailed Description
-
-Detailed GHDN-DAILY information:
-<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
-
-# Folders
-
- * conf
- * other_systems
- * queries
- * scripts
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql
deleted file mode 100644
index 8e83879..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q00.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-where text(r.station) = "GHCND:USW00012836"
- and toInt(substring(text(r.date), 0, 4)) >= 2003
- and toInt(substring(text(r.date), 5, 7)) = 12
- and toInt(substring(text(r.date), 8, 10)) = 25
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
deleted file mode 100644
index 643c47b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q01.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-where text(r.dataType) = "AWND"
- and toFloat(text(r.value)) > 491.744
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql
deleted file mode 100644
index 8dc9c4b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q02.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-sum(
- select (toInt(text(r.value)))
- from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where text(r.station) = "GHCND:USW00014771"
- and toInt(substring(text(r.date), 0, 4)) = 1999
- and text(r.dataType) = "PRCP"
-) / 10
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql
deleted file mode 100644
index f81e914..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q03.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-max(
- select (toInt(text(r.value)))
- from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where text(r.dataType) = "TMAX"
-) / 10
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
deleted file mode 100644
index aaa8599..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (sensors)
-from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
- stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
- l in stations.locationLabels
-where text(stations.id) = text(sensors.station)
- and text(sensors.date) = "1976-07-04T00:00:00.000"
- and text(l.displayName) = "Washington"
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql
deleted file mode 100644
index 69fb35a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_sensor.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
-where text(r.date) = "1976-07-04T00:00:00.000"
- and text(r.dataType) = "TMAX"
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql
deleted file mode 100644
index 7c3c8bb..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q04_station.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (r)
- from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
- t in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
- l in t.locationLabels
- where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql
deleted file mode 100644
index ca5a558..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-min(
- select (toInt(text(sensors.value)))
- from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
- stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
- l in stations.locationLabels
- where text(stations.id) = text(sensors.station)
- and toInt(substring(text(sensors.date), 0, 4)) = 2001
- and text(sensors.dataType) = "TMIN"
- and text(l.id) = "FIPS:US"
-) / 10
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql
deleted file mode 100644
index 95ea398..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (toInt(text(r.value)))
- from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where toInt(substring(text(r.date), 0, 4)) = 2001
- and text(r.dataType) = "TMIN"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql
deleted file mode 100644
index 1f41e1e..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q05_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (t)
- from t in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
- l in t.locationLabels
- where text(l.id) = "FIPS:US"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
deleted file mode 100644
index b46049f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (n, d, v)
-from sensors in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
- d in sensors.date,
- v in sensors.value,
- stations in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
- n in stations.displayName,
-where text(stations.id) = text(sensors.station)
- and toInt(substring(text(d), 0, 4)) = 2000
- and text(sensors.dataType) = "TMAX"
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql
deleted file mode 100644
index 2b21287..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (r.date, r.value)
- from r in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where toInt(substring(text(r.date), 0, 4)) = 2000
- and text(r.dataType) = "TMAX"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql
deleted file mode 100644
index fbc1ea3..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q06_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (t.displayName)
- from t in source(xml, "sample_xml/gsn_stations.xml", {"station"}),
- l in t.locationLabels
- where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql
deleted file mode 100644
index 5d94e6c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-avg(
- select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
- from rtmax in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
- rtmin in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where text(rtmax.date) = text(rtmin.date)
- and text(rtmax.station) = text(rtmin.station)
- and text(rtmax.dataType) = "TMAX"
- and text(rtmin.dataType) = "TMIN"
-) / 10
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql
deleted file mode 100644
index 1c3a87b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_join_count.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
- from rtmax in source(xml, "sample_xml/gsn_sensors.xml", {"data"}),
- rtmin in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where text(rtmax.date) = text(rtmin.date)
- and text(rtmax.station) = text(rtmin.station)
- and text(r.dataType) = "TMAX"
- and text(r.dataType) = "TMIN"
-)
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql
deleted file mode 100644
index 6863dc1..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmax.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (rtmax)
- from rtmax in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where text(r.dataType) = "TMAX"
-)
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql
deleted file mode 100644
index f2b98b7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_gsn/q07_tmin.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (rtmin)
- from rtmin in source(xml, "sample_xml/gsn_sensors.xml", {"data"})
- where text(r.dataType) = "TMIN"
-)
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql
deleted file mode 100644
index c3e9ddd..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q00.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-where text(r.station) = "GHCND:USW00012836"
- and toInt(substring(text(r.date), 0, 4)) >= 2003
- and toInt(substring(text(r.date), 5, 7)) = 12
- and toInt(substring(text(r.date), 8, 10)) = 25
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql
deleted file mode 100644
index 206b391..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q01.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-where text(r.dataType) = "AWND"
- and toInt(text(r.value)) > 491.744
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql
deleted file mode 100644
index d80a259..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q02.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-sum(
- select (toInt(text(r.value)))
- from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where text(r.station) = "GHCND:USW00014771"
- and toInt(substring(text(r.date), 0, 4)) = 1999
- and text(r.dataType) = "PRCP"
-) / 10
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql
deleted file mode 100644
index 0ac697e..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q03.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-max(
- select (toInt(text(r.value)))
- from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where text(r.dataType) = "TMAX"
-) / 10
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql
deleted file mode 100644
index 1084afb..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
- t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
- l in t.locationLabels
-where text(t.id) = text(r.station)
- and text(r.date) = "1976-07-04T00:00:00.000"
- and text(l.displayName) = "WASHINGTON"
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql
deleted file mode 100644
index 7f4b065..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_sensor.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
-where text(r.date) = "1976-07-04T00:00:00.000"
- and text(r.dataType) = "TMAX"
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql
deleted file mode 100644
index 53013f5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q04_station.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (r)
- from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
- t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
- l in t.locationLabels
- where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql
deleted file mode 100644
index 6dcd0b7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-min(
- select (toInt(text(r.value)))
- from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
- t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
- l in t.locationLabels
- where text(t.id) = text(r.station)
- and toInt(substring(text(r.date), 0, 4)) = 2001
- and text(r.dataType) = "TMIN"
- and text(l.id) = "FIPS:US"
-) / 10
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql
deleted file mode 100644
index c481632..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (toInt(text(r.value)))
- from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where toInt(substring(text(r.date), 0, 4)) = 2001
- and text(r.dataType) = "TMIN"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql
deleted file mode 100644
index a040b7c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q05_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (t)
- from t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
- l in t.locationLabels
- where text(l.id) = "FIPS:US"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql
deleted file mode 100644
index a68ab4a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06.mrql
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (n, d, v)
-from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
- d in r.date,
- v in r.value,
- t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
- n in t.displayName,
- l in t.locationLabels
-where text(t.id) = text(r.station)
- and toInt(substring(text(d), 0, 4)) = 2000
- and text(r.dataType) = "TMAX"
- and text(l.displayName) = "WASHINGTON"
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql
deleted file mode 100644
index e5e26cd..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_sensor.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (r.date, r.value)
- from r in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where toInt(substring(text(r.date), 0, 4)) = 2000
- and text(r.dataType) = "TMAX"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql
deleted file mode 100644
index 99aaed6..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q06_station.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (t.displayName)
- from t in source(xml, "sample_xml/hcn_stations.xml", {"station"}),
- l in t.locationLabels
- where text(l.displayName) = "WASHINGTON"
-)
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql
deleted file mode 100644
index e6d680c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-avg(
- select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
- from rtmax in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
- rtmin in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where text(rtmax.date) = text(rtmin.date)
- and text(rtmax.station) = text(rtmin.station)
- and text(rtmax.dataType) = "TMAX"
- and text(rtmin.dataType) = "TMIN"
-) / 10
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql
deleted file mode 100644
index 5d83f85..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_join_count.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
- from rtmax in source(xml, "sample_xml/hcn_sensors.xml", {"data"}),
- rtmin in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where text(rtmax.date) = text(rtmin.date)
- and text(rtmax.station) = text(rtmin.station)
- and text(r.dataType) = "TMAX"
- and text(r.dataType) = "TMIN"
-)
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql
deleted file mode 100644
index 579c855..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmax.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (rtmax)
- from rtmax in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where text(r.dataType) = "TMAX"
-)
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql
deleted file mode 100644
index bf5f423..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_hcn/q07_tmin.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-count(
- select (rtmin)
- from rtmin in source(xml, "sample_xml/hcn_sensors.xml", {"data"})
- where text(r.dataType) = "TMIN"
-)
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
deleted file mode 100644
index d4bd10b..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q00.mrql
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
-where text(r.station) = "GHCND:AS000000003"
- and toInt(substring(text(r.date), 0, 4)) >= 2000
- and toInt(substring(text(r.date), 5, 7)) = 3
- and toInt(substring(text(r.date), 8, 10)) = 3
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
deleted file mode 100644
index 8f100df..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q01.mrql
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (r)
-from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
-where text(r.dataType) = "AWND"
- and toFloat(text(r.value)) > 491.744
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
deleted file mode 100644
index 3bba05f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q02.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-sum(
- select (toFloat(text(r.value)))
- from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
- where text(r.station) = "GHCND:US000000002"
- and toInt(substring(text(r.date), 0, 4)) = 2002
- and text(r.dataType) = "PRCP"
-) / 10
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
deleted file mode 100644
index a6f9afe..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q03.mrql
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-max(
- select (toFloat(text(r.value)))
- from r in source(xml, "sample_xml/nano_sensors.xml", {"data"})
- where text(r.dataType) = "TMAX"
-) / 10
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
deleted file mode 100644
index 4d24016..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q04.mrql
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (sensors)
-from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
- stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
- l in stations.locationLabels
-where text(stations.id) = text(sensors.station)
- and text(sensors.date) = "2002-02-02T00:00:00.000"
- and text(l.displayName) = "State 1"
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
deleted file mode 100644
index 0fdb641..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q05.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-min(
- select (toFloat(text(sensors.value)))
- from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
- stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
- l in stations.locationLabels
- where text(stations.id) = text(sensors.station)
- and toInt(substring(text(sensors.date), 0, 4)) = 2001
- and text(sensors.dataType) = "TMIN"
- and text(l.id) = "FIPS:US"
-) / 10
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
deleted file mode 100644
index c44f70c..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q06.mrql
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-select (n, d, v)
-from sensors in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
- d in sensors.date,
- v in sensors.value,
- stations in source(xml, "sample_xml/nano_stations.xml", {"station"}),
- n in stations.displayName,
- l in stations.locationLabels
-where text(stations.id) = text(sensors.station)
- and toInt(substring(text(d), 0, 4)) = 2002
- and text(sensors.dataType) = "TMAX"
-;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
deleted file mode 100644
index 9046181..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_test/q07.mrql
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-avg(
- select (toFloat(text(rtmax.value))-toFloat(text(rtmin.value)))
- from rtmax in source(xml, "sample_xml/nano_sensors.xml", {"data"}),
- rtmin in source(xml, "sample_xml/nano_sensors.xml", {"data"})
- where text(rtmax.date) = text(rtmin.date)
- and text(rtmax.station) = text(rtmin.station)
- and text(rtmax.dataType) = "TMAX"
- and text(rtmin.dataType) = "TMIN"
-) / 10
-;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
deleted file mode 100644
index 8f513ce..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q04.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Find all the weather readings for King county for a specific day :)
-(: 1976/7/4. :)
-let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
-for $r in collection($sensor_collection)/root/dataCollection/data
-
-let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
-for $s in collection($station_collection)/root/stationCollection/station
-
-where $s/id eq $r/station
- and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
- and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
-return $r
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
deleted file mode 100644
index 5f452c0..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q05.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Find the lowest recorded temperature (TMIN) in the state of Oregon for :)
-(: 2001. :)
-fn:min(
- let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
- for $r in collection($sensor_collection)/root/dataCollection/data
-
- let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
- for $s in collection($station_collection)/root/stationCollection/station
-
- where $s/id eq $r/station
- and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
- and $r/dataType eq "TMIN"
- and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
- return $r/value
-) div 10
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
deleted file mode 100644
index 2c02bc7..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/saxon/q06.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(: XQuery Join Query :)
-(: Find the highest recorded temperature (TMAX) for each station for each :)
-(: day over the year 2000. :)
-let $sensor_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/sensors/?select=*.xml;recurse=yes"
-for $r in collection($sensor_collection)/root/dataCollection/data
-
-let $station_collection := "../../../../../../../weather_data/dataset-tiny-local/data_links/local_speed_up/d0_p1_i0/stations/?select=*.xml;recurse=yes"
-for $s in collection($station_collection)/root/stationCollection/station
-
-where $s/id eq $r/station
- and $r/dataType eq "TMAX"
- and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
-return ($s/displayName, $r/date, $r/value)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
deleted file mode 100644
index c1363e3..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/no_result.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-
-(:
-VXQuery used to only parse all files with out producing results.
-:)
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-where fn:false()
-return $r
http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
deleted file mode 100644
index 5006a21..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q00.xq
+++ /dev/null
@@ -1,31 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License. :)
-(:
-XQuery Filter Query
--------------------
-See historical data for Key West International Airport, FL (USW00012836)
-station by selecting the weather readings for December 25 over the last
-10 years.
-:)
-let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($collection)/dataCollection/data
-let $datetime := xs:dateTime(fn:data($r/date))
-where $r/station eq "GHCND:USW00012836"
- and fn:year-from-dateTime($datetime) ge 2003
- and fn:month-from-dateTime($datetime) eq 12
- and fn:day-from-dateTime($datetime) eq 25
-return $r
\ No newline at end of file
[10/14] git commit: Getting MRQL scripts ready for prime time.
Posted by pr...@apache.org.
Getting MRQL scripts ready for prime time.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/eefadb25
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/eefadb25
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/eefadb25
Branch: refs/heads/master
Commit: eefadb254a1a30a191d01778fa62c2d7d9582593
Parents: 17bedfa
Author: Preston Carman <pr...@apache.org>
Authored: Wed Oct 8 15:11:40 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Wed Oct 8 15:11:40 2014 -0700
----------------------------------------------------------------------
.../noaa-ghcn-daily/other_systems/mrql/q06.mrql | 2 +-
.../other_systems/mrql_scripts/clear.sh | 9 ++++
.../mrql_scripts/run_group_test.sh | 55 ++++++++++++++++++++
.../mrql_scripts/run_mrql_tests.sh | 5 +-
.../noaa-ghcn-daily/scripts/run_mrql_tests.sh | 42 ---------------
5 files changed, 69 insertions(+), 44 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
index 583a5b9..a50dfe2 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q06.mrql
@@ -19,7 +19,7 @@ from sensors in source(xml, args[0], {"data"}),
d in sensors.date,
v in sensors.value,
stations in source(xml, args[1], {"station"}),
- n in stations.displayName,
+ n in stations.displayName
where text(stations.id) = text(sensors.station)
and toInt(substring(text(d), 0, 4)) = 2000
and text(sensors.dataType) = "TMAX"
http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
index da7cabe..b775de2 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
@@ -16,4 +16,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+
+# Hadoop data reset
hadoop namenode -format
+
+# Remove data
+rm -rf disk1/hadoop/data
+rm -rf disk2/hadoop/data
+rm -rf disk1/hadoop/tmp
+rm -rf disk1/hadoop/logs
+
http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
new file mode 100755
index 0000000..c34ec95
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+NODES=2
+REPEAT=1
+
+# Start Hadoop
+sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
+
+# Prepare hadoop file system
+hadoop fs -mkdir all
+hadoop fs -mkdir all/sensors
+hadoop fs -mkdir all/stations
+
+
+# Upload test data
+n=0
+while [ ${n} -lt ${NODES} ];
+do
+ # Add each sensor block
+ cp saved/backups/mr/all_sensors_${n}.xml.gz disk1/hadoop/upload/
+ gunzip disk1/hadoop/upload/all_sensors_${n}.xml.gz
+ hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${n}.xml all/sensors
+ rm -f disk1/hadoop/upload/all_sensors_${n}.xml
+
+ # Add each station block
+ cp saved/backups/mr/all_stations_${n}.xml.gz disk1/hadoop/upload/
+ gunzip disk1/hadoop/upload/all_stations_${n}.xml.gz
+ hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${n}.xml all/stations
+ rm -f disk1/hadoop/upload/all_stations_${n}.xml
+done
+
+
+# Start test
+sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
+
+
+# Stop Hadoop
+sh saved/hadoop/hadoop-1.2.1/bin/stop-all.sh
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
index 10ab4d9..1e512e1 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh
@@ -25,11 +25,14 @@ REPEAT=${3}
DATASET="all"
+# Make log folder
+mkdir -p ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/
+
for j in $(find ${1} -name '*q??.mrql')
do
date
echo "Running MRQL query: ${j}"
- time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${NODES} ${j} ${DATASET}/sensors.xml ${DATASET}/stations.xml >> ~/disk1/weather_data/mrql/query_logs/$(basename "${j}").log 2>&1; done;
+ time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes ${NODES} ${j} ${DATASET}/sensors/ ${DATASET}/stations/ >> ~/disk1/weather_data/mrql/query_logs/${NODES}nodes/$(basename "${j}").log 2>&1; done;
done
http://git-wip-us.apache.org/repos/asf/vxquery/blob/eefadb25/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
deleted file mode 100755
index a6788be..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
-REPEAT=${1}
-DATASET="hcn"
-
-for n in `seq 0 7`
-#for n in 0
-do
- date
- echo "Running q0${n} on ${DATASET} for MRQL."
- time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done;
-done
-
-if which programname >/dev/null;
-then
- echo "Sending out e-mail notification."
- SUBJECT="MRQL Tests Finished (${DATASET})"
- EMAIL="ecarm002@ucr.edu"
- /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
- Completed all MRQL tests on ${DATASET}.
- EOM
-else
- echo "No mail command to use."
-fi;
\ No newline at end of file
[05/14] copy of all changes in exrt benchmark queries that is
copyright free.
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
new file mode 100644
index 0000000..15b5160
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
@@ -0,0 +1,29 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the weather sensor readings on 1976-07-04.
+:)
+count(
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+ let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+ where $date eq xs:date("1976-07-04")
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
new file mode 100644
index 0000000..d21fe37
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the weather stations for Washington state.
+:)
+count(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+ where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
new file mode 100644
index 0000000..c95f3f5
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Join Aggregate Query :)
+(: Find the lowest recorded temperature (TMIN) in the United States for :)
+(: 2001. :)
+fn:min(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+
+ where $s/id eq $r/station
+ and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+ and $r/dataType eq "TMIN"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+ return $r/value
+) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
new file mode 100644
index 0000000..76e3458
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Aggregate Query
+-------------------
+Find the lowest recorded temperature (TMIN) in the United States for 2001.
+:)
+fn:count(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+
+ where $s/id eq $r/station
+ and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+ and $r/dataType eq "TMIN"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
new file mode 100644
index 0000000..3b1046b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
@@ -0,0 +1,31 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Aggregate Query
+-------------------
+Count all sensor readings for TMIN in 2001.
+:)
+count(
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+
+ let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+ where $r/dataType eq "TMIN"
+ and fn:year-from-date($date) eq 2001
+ return $r/value
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
new file mode 100644
index 0000000..7c2a7ef
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Aggregate Query
+-------------------
+Count all stations in the United States.
+:)
+count(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+ where (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US"))
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
new file mode 100644
index 0000000..5c8ed54
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Join Query :)
+(: Find the highest recorded temperature (TMAX) for each station for each :)
+(: day over the year 2000. :)
+let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+for $s in collection($station_collection)/stationCollection/station
+
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
+
+where $s/id eq $r/station
+ and $r/dataType eq "TMAX"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+return ($s/displayName, $r/date, $r/value)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
new file mode 100644
index 0000000..bad6406
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
@@ -0,0 +1,34 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the highest recorded temperature (TMAX) for each station for each day over the year 2000.
+:)
+fn:count(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+
+ where $s/id eq $r/station
+ and $r/dataType eq "TMAX"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
new file mode 100644
index 0000000..54d81c6
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
@@ -0,0 +1,29 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count max temperature (TMAX) readings for 2000-01-01.
+:)
+count(
+ let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r in collection($sensor_collection)/dataCollection/data
+ where $r/dataType eq "TMAX"
+ and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+ return $r
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
new file mode 100644
index 0000000..c94dc78
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the stations.
+:)
+count(
+ let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+ for $s in collection($station_collection)/stationCollection/station
+ return $s
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
new file mode 100644
index 0000000..5b1f2ac
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max :)
+(: temperature and get the average. :)
+fn:avg(
+ let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r_min in collection($sensor_collection_min)/dataCollection/data
+
+ let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r_max in collection($sensor_collection_max)/dataCollection/data
+
+ where $r_min/station eq $r_max/station
+ and $r_min/date eq $r_max/date
+ and $r_min/dataType eq "TMIN"
+ and $r_max/dataType eq "TMAX"
+ return $r_max/value - $r_min/value
+) div 10
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
new file mode 100644
index 0000000..0ddada0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+fn:count(
+ let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r_min in collection($sensor_collection_min)/dataCollection/data
+
+ let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r_max in collection($sensor_collection_max)/dataCollection/data
+
+ where $r_min/station eq $r_max/station
+ and $r_min/date eq $r_max/date
+ and $r_min/dataType eq "TMIN"
+ and $r_max/dataType eq "TMAX"
+ return $r_max
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
new file mode 100644
index 0000000..0b5511f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMAX.
+:)
+count(
+ let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r_max in collection($sensor_collection_max)/dataCollection/data
+ where $r_max/dataType eq "TMAX"
+ return $r_max
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
new file mode 100644
index 0000000..fda029a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+count(
+ let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+ for $r_min in collection($sensor_collection_min)/dataCollection/data
+ where $r_min/dataType eq "TMIN"
+ return $r_min
+)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
new file mode 100644
index 0000000..58bea51
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
@@ -0,0 +1,51 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+Weather Data Conversion To XML
+=====================
+
+# Introduction
+
+The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY)
+.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor
+readings. Using the RSS feed as a template, the GHCN-DAILY historical
+information is used to generate past RSS feed XML documents. The process allows
+testing on a large set of information with out having to continually monitor
+the weather.gov site for all the weather details for years.
+
+# Detailed Description
+
+Detailed GHDN-DAILY information:
+<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
+
+The process takes a save folder for the data. The folder contains a several
+folders:
+
+ - all_xml_files (The generated xml files for a given package)
+ - downloads (All files taken from the NOAA HTTP site)
+ - dataset-[name] (all files related to a single dataset)
+
+
+# Examples commands
+
+Building
+
+
+Partitioning
+python weather_cli.py -x weather_example.xml
+
+Linking
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
new file mode 100644
index 0000000..2fb0af0
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
@@ -0,0 +1 @@
+java.util.logging.ConsoleHandler.level=OFF
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
new file mode 100755
index 0000000..88339bd
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run all the queries and save a log.
+# First argument: Supply the folder which houses all the queries (recursive).
+# Second argument: adds options to the VXQuery CLI.
+#
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
+#
+REPEAT=5
+IGNORE=2
+FRAME_SIZE=$((8*1024))
+BUFFER_SIZE=$((32*1024*1024))
+JOIN_HASH_SIZE=-1
+
+if [ -z "${1}" ]
+then
+ echo "Please supply a directory for query files to be found."
+ exit
+fi
+
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+
+for j in $(find ${1} -name '*q??.xq')
+do
+ if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]]
+ then
+ date
+ echo "Running query: ${j}"
+ log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
+ log_base_path=$(dirname ${j/queries/query_logs})
+ mkdir -p ${log_base_path}
+ time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
+ echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+ echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
+ fi;
+done
+
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="Benchmark Tests Finished"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all tests in folder ${1}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
new file mode 100755
index 0000000..98ab04b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run all the queries and save a log.
+# First argument: Supply the folder which houses all the queries (recursive).
+# Second argument: adds options to the VXQuery CLI.
+#
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138"
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
+#
+CLUSTER="uci"
+REPEAT=5
+FRAME_SIZE=$((8*1024))
+BUFFER_SIZE=$((32*1024*1024))
+#JOIN_HASH_SIZE=$((256*1024*1024))
+JOIN_HASH_SIZE=-1
+
+if [ -z "${1}" ]
+then
+ echo "Please supply a directory for query files to be found."
+ exit
+fi
+
+if [ -z "${2}" ]
+then
+ echo "Please the number of nodes (start at 0)."
+ exit
+fi
+
+# Run queries for the specified number of nodes.
+echo "Starting ${2} cluster nodes"
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
+
+# wait for cluster to finish setting up
+sleep 5
+
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+
+for j in $(find ${1} -name '*q??.xq')
+do
+ # Only work with i nodes.
+ if [[ "${j}" =~ "${2}nodes" ]]
+ then
+ # Only run for specified queries.
+ if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]]
+ then
+ date
+ echo "Running query: ${j}"
+ log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
+ log_base_path=$(dirname ${j/queries/query_logs})
+ mkdir -p ${log_base_path}
+ time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size ${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1
+ echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
+ echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+ echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file}
+ fi;
+ fi;
+done
+
+# Stop cluster.
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
+
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="Benchmark Cluster Tests Finished"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all tests in folder ${1} for a ${2} node cluster using ${HOSTNAME}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
new file mode 100755
index 0000000..58976b7
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+DATASET="dataset-hcn-d2"
+cluster_ip=${1}
+base_weather_folder=${2}
+
+for n in 7 6 5 3 4 2 1 0
+do
+ #for t in "batch_scale_out" "speed_up"
+ for t in "batch_scale_out"
+ #for t in "speed_up"
+ do
+ for p in 2
+ do
+ for c in 4
+ do
+ echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
+ sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} "-client-net-ip-address ${cluster_ip} -available-processors ${c}"
+ done
+ done
+ done
+done
+
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="Benchmark Group Tests Finished"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all tests in the predefined group for ${DATASET}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
new file mode 100755
index 0000000..a6788be
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
+REPEAT=${1}
+DATASET="hcn"
+
+for n in `seq 0 7`
+#for n in 0
+do
+ date
+ echo "Running q0${n} on ${DATASET} for MRQL."
+ time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist -nodes 5 ~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done;
+done
+
+if which programname >/dev/null;
+then
+ echo "Sending out e-mail notification."
+ SUBJECT="MRQL Tests Finished (${DATASET})"
+ EMAIL="ecarm002@ucr.edu"
+ /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+ Completed all MRQL tests on ${DATASET}.
+ EOM
+else
+ echo "No mail command to use."
+fi;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
new file mode 100644
index 0000000..4f81f86
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os.path
+import linecache
+import distutils.core
+import fileinput
+import socket
+
+from weather_config import *
+from weather_data_files import *
+
+# Weather data files created to manage the conversion process.
+# Allows partition and picking up where you left off.
+#
+# benchmark_name/
+# data/
+# queries/
+# logs/
+class WeatherBenchmark:
+
+ DATA_LINKS_FOLDER = "data_links/"
+ LARGE_FILE_ROOT_TAG = WeatherDataFiles.LARGE_FILE_ROOT_TAG
+ QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/"
+ QUERY_MASTER_FOLDER = "../queries/"
+ QUERY_FILE_LIST = [
+ "q00.xq",
+ "q01.xq",
+ "q02.xq",
+ "q03.xq",
+ "q04.xq",
+ "q05.xq",
+ "q06.xq",
+ "q07.xq"
+ ]
+ QUERY_UTILITY_LIST = [
+ "no_result.xq",
+ "sensor_count.xq",
+ "station_count.xq",
+ "q04_sensor.xq",
+ "q04_station.xq",
+ "q05_sensor.xq",
+ "q05_station.xq",
+ "q06_sensor.xq",
+ "q06_station.xq",
+ "q07_tmin.xq",
+ "q07_tmax.xq",
+ ]
+ BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"]
+ BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"]
+ QUERY_COLLECTIONS = ["sensors", "stations"]
+
+ SEPERATOR = "|"
+
+ def __init__(self, base_paths, partitions, dataset, nodes):
+ self.base_paths = base_paths
+ self.partitions = partitions
+ self.dataset = dataset
+ self.nodes = nodes
+
+ def print_partition_scheme(self):
+ if (len(self.base_paths) == 0):
+ return
+ for test in self.dataset.get_tests():
+ if test in self.BENCHMARK_LOCAL_TESTS:
+ self.print_local_partition_schemes(test)
+ elif test in self.BENCHMARK_CLUSTER_TESTS:
+ self.print_cluster_partition_schemes(test)
+ else:
+ print "Unknown test."
+ exit()
+
+ def print_local_partition_schemes(self, test):
+ node_index = 0
+ virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
+ for p in self.partitions:
+ scheme = self.get_local_partition_scheme(test, p)
+ self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
+
+ def print_cluster_partition_schemes(self, test):
+ node_index = self.get_current_node_index()
+ virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+ for p in self.partitions:
+ scheme = self.get_cluster_partition_scheme(test, p)
+ self.print_partition_schemes(virtual_disk_partitions, scheme, test, p, node_index)
+
+ def print_partition_schemes(self, virtual_partitions, scheme, test, partitions, node_id):
+ print
+ print "---------------- Partition Scheme --------------------"
+ print " Test: " + test
+ print " Virtual Partitions: " + str(virtual_partitions)
+ print " Disks: " + str(len(self.base_paths))
+ print " Partitions: " + str(partitions)
+ print " Node Id: " + str(node_id)
+
+ if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
+ folder_length = len(scheme[0][3]) + 5
+ row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} {:<" + str(folder_length) + "}"
+ HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
+ print row_format.format(*HEADER)
+ for row in scheme:
+ print row_format.format(*row)
+ print
+ else:
+ print " Scheme is EMPTY."
+
+ def get_local_partition_scheme(self, test, partition):
+ scheme = []
+ virtual_disk_partitions = get_local_virtual_disk_partitions(self.partitions)
+ data_schemes = get_disk_partition_scheme(0, virtual_disk_partitions, self.base_paths)
+ link_base_schemes = get_disk_partition_scheme(0, partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+
+ # Match link paths to real data paths.
+ group_size = len(data_schemes) / len(link_base_schemes)
+ for d in range(len(self.base_paths)):
+ offset = 0
+ for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+ if d == link_disk:
+ # Only consider a single disk at a time.
+ for data_node, data_disk, data_virtual, data_index, data_path in data_schemes:
+ if test == "local_speed_up" and data_disk == link_disk \
+ and offset <= data_index and data_index < offset + group_size:
+ scheme.append([data_disk, data_index, link_index, data_path, link_path])
+ elif test == "local_batch_scale_out" and data_disk == link_disk \
+ and data_index == link_index:
+ scheme.append([data_disk, data_index, link_index, data_path, link_path])
+ offset += group_size
+ return scheme
+
+ def get_cluster_partition_scheme(self, test, partition):
+ node_index = self.get_current_node_index()
+ if node_index == -1:
+ print "Unknown host."
+ return
+
+ scheme = []
+ virtual_disk_partitions = get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+ data_schemes = get_disk_partition_scheme(node_index, virtual_disk_partitions, self.base_paths)
+ link_base_schemes = get_cluster_link_scheme(len(self.nodes), partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+
+ # Match link paths to real data paths.
+ for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+ # Prep
+ if test == "speed_up":
+ group_size = virtual_disk_partitions / (link_node + 1) / partition
+ elif test == "batch_scale_out":
+ group_size = virtual_disk_partitions / len(self.nodes) / partition
+ else:
+ print "Unknown test."
+ return
+
+ node_offset = group_size * node_index * partition
+ node_offset += group_size * link_index
+ has_data = True
+ if link_node < node_index:
+ has_data = False
+
+ # Make links
+ for date_node, data_disk, data_virtual, data_index, data_path in data_schemes:
+ if has_data and data_disk == link_disk \
+ and node_offset <= data_index and data_index < node_offset + group_size:
+ scheme.append([link_disk, data_index, link_index, data_path, link_path])
+ scheme.append([link_disk, -1, link_index, "", link_path])
+ return scheme
+
+ def build_data_links(self, reset):
+ if (len(self.base_paths) == 0):
+ return
+ if reset:
+ shutil.rmtree(self.base_paths[0] + self.DATA_LINKS_FOLDER)
+ for test in self.dataset.get_tests():
+ if test in self.BENCHMARK_LOCAL_TESTS:
+ for i in self.partitions:
+ scheme = self.get_local_partition_scheme(test, i)
+ self.build_data_links_scheme(scheme)
+ if 1 in self.partitions and len(self.base_paths) > 1:
+ scheme = self.build_data_links_local_zero_partition(test)
+ self.build_data_links_scheme(scheme)
+ elif test in self.BENCHMARK_CLUSTER_TESTS:
+ for i in self.partitions:
+ scheme = self.get_cluster_partition_scheme(test, i)
+ self.build_data_links_scheme(scheme)
+ if 1 in self.partitions and len(self.base_paths) > 1:
+ scheme = self.build_data_links_cluster_zero_partition(test)
+ self.build_data_links_scheme(scheme)
+ else:
+ print "Unknown test."
+ exit()
+
+ def build_data_links_scheme(self, scheme):
+ '''Build all the data links based on the scheme information.'''
+ for (data_disk, data_index, partition, data_path, link_path) in scheme:
+ self.add_collection_links_for(data_path, link_path, data_index)
+
+ def build_data_links_cluster_zero_partition(self, test):
+ '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
+ scheme = []
+ link_base_schemes = get_cluster_link_scheme(len(self.nodes), 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
+ for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+ new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test + "/" + str(link_node) + "nodes")
+ scheme.append([0, link_disk, 0, link_path, new_link_path])
+ return scheme
+
+ def build_data_links_local_zero_partition(self, test):
+ '''Build a scheme for all data in one symbolically linked folder. (0 partition)'''
+ scheme = []
+ index = 0
+ link_base_schemes = get_disk_partition_scheme(0, 1, self.base_paths, self.DATA_LINKS_FOLDER + test)
+ for link_node, link_disk, link_virtual, link_index, link_path in link_base_schemes:
+ if test == "local_batch_scale_out" and index > 0:
+ continue
+ new_link_path = self.get_zero_partition_path(link_node, self.DATA_LINKS_FOLDER + test)
+ scheme.append([0, index, 0, link_path, new_link_path])
+ index += 1
+ return scheme
+
+ def get_zero_partition_path(self, node, key):
+ '''Return a partition path for the zero partition.'''
+ base_path = self.base_paths[0]
+ new_link_path = get_disk_partition_scheme(node, 1, [base_path], key)[0][PARTITION_INDEX_PATH]
+ return new_link_path.replace("p1", "p0")
+
+ def get_current_node_index(self):
+ found = False
+ node_index = 0
+ for machine in self.nodes:
+ if socket.gethostname().startswith(machine.get_node_name()):
+ found = True
+ break
+ node_index += 1
+
+ if found:
+ return node_index
+ else:
+ return -1
+
+ def add_collection_links_for(self, real_path, link_path, index):
+ for collection in self.QUERY_COLLECTIONS:
+ collection_path = link_path + collection + "/"
+ collection_index = collection_path + "index" + str(index)
+ if not os.path.isdir(collection_path):
+ os.makedirs(collection_path)
+ if index >= 0:
+ if os.path.islink(collection_index):
+ os.unlink(collection_index)
+ os.symlink(real_path + collection + "/", collection_index)
+
+ def copy_query_files(self, reset):
+ for test in self.dataset.get_tests():
+ if test in self.BENCHMARK_LOCAL_TESTS:
+ self.copy_local_query_files(test, reset)
+ elif test in self.BENCHMARK_CLUSTER_TESTS:
+ self.copy_cluster_query_files(test, reset)
+ else:
+ print "Unknown test."
+ exit()
+
+ def copy_cluster_query_files(self, test, reset):
+ '''Determine the data_link path for cluster query files and copy with
+ new location for collection.'''
+ if 1 in self.partitions and len(self.base_paths) > 1:
+ for n in range(len(self.nodes)):
+ query_path = get_cluster_query_path(self.base_paths, test, 0, n)
+ prepare_path(query_path, reset)
+
+ # Copy query files.
+ new_link_path = self.get_zero_partition_path(n, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+ self.copy_and_replace_query(query_path, [new_link_path])
+ for n in range(len(self.nodes)):
+ for p in self.partitions:
+ query_path = get_cluster_query_path(self.base_paths, test, p, n)
+ prepare_path(query_path, reset)
+
+ # Copy query files.
+ partition_paths = get_disk_partition_paths(n, p, self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+ self.copy_and_replace_query(query_path, partition_paths)
+
+ def copy_local_query_files(self, test, reset):
+ '''Determine the data_link path for local query files and copy with
+ new location for collection.'''
+ if 1 in self.partitions and len(self.base_paths) > 1:
+ query_path = get_local_query_path(self.base_paths, test, 0)
+ prepare_path(query_path, reset)
+
+ # Copy query files.
+ new_link_path = self.get_zero_partition_path(0, self.DATA_LINKS_FOLDER + test)
+ self.copy_and_replace_query(query_path, [new_link_path])
+ for p in self.partitions:
+ query_path = get_local_query_path(self.base_paths, test, p)
+ prepare_path(query_path, reset)
+
+ # Copy query files.
+ partition_paths = get_disk_partition_paths(0, p, self.base_paths, self.DATA_LINKS_FOLDER + test)
+ self.copy_and_replace_query(query_path, partition_paths)
+
+ def copy_and_replace_query(self, query_path, replacement_list):
+ '''Copy the query files over to the query_path and replace the path
+ for the where the collection data is located.'''
+ for query_file in self.QUERY_FILE_LIST + self.QUERY_UTILITY_LIST:
+ shutil.copyfile(self.QUERY_MASTER_FOLDER + query_file, query_path + query_file)
+
+ # Make a search replace for each collection.
+ for collection in self.QUERY_COLLECTIONS:
+ replacement_list_with_type = []
+ for replace in replacement_list:
+ replacement_list_with_type.append(replace + collection)
+
+ replace_string = self.SEPERATOR.join(replacement_list_with_type)
+ for line in fileinput.input(query_path + query_file, True):
+ sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string))
+
+ # Make a search replace for partition type.
+ if self.dataset.get_partition_type() == "large_files":
+ for line in fileinput.input(query_path + query_file, True):
+ sys.stdout.write(line.replace("/stationCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/stationCollection"))
+ for line in fileinput.input(query_path + query_file, True):
+ sys.stdout.write(line.replace("/dataCollection", "/" + self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
+
+ def get_number_of_slices_per_disk(self):
+ if len(self.dataset.get_tests()) == 0:
+ print "No test has been defined in config file."
+ else:
+ for test in self.dataset.get_tests():
+ if test in self.BENCHMARK_LOCAL_TESTS:
+ return get_local_virtual_disk_partitions(self.partitions)
+ elif test in self.BENCHMARK_CLUSTER_TESTS:
+ return get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+ else:
+ print "Unknown test."
+ exit()
+
+def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):
+ link_paths = []
+ for n in range(0, nodes):
+ new_link_path = get_disk_partition_scheme(n, partition, base_paths, key + "/" + str(n) + "nodes")
+ link_paths.extend(new_link_path)
+ return link_paths
+
+def get_local_query_path(base_paths, test, partition):
+ return base_paths[0] + "queries/" + test + "/" + get_local_query_folder(len(base_paths), partition) + "/"
+
+def get_local_query_folder(disks, partitions):
+ return "d" + str(disks) + "_p" + str(partitions)
+
+def get_cluster_query_path(base_paths, test, partition, nodes):
+ return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + get_local_query_folder(len(base_paths), partition) + "/"
+
+def get_cluster_virtual_disk_partitions(nodes, partitions):
+ vp = get_local_virtual_disk_partitions(partitions)
+ vn = calculate_partitions(range(1, len(nodes)+1, 1))
+ return vp * vn
+
+def get_local_virtual_disk_partitions(partitions):
+ return calculate_partitions(partitions)
+
+def calculate_partitions(list):
+ x = 1
+ for i in list:
+ if x % i != 0:
+ if i % x == 0:
+ x = i
+ else:
+ x *= i
+ return x
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
new file mode 100644
index 0000000..eeae25c
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys, getopt
+
+# Custom modules.
+from weather_data_files import *
+from weather_download_files import *
+from weather_convert_to_xml import *
+from weather_config import *
+from weather_benchmark import *
+
+DEBUG_OUTPUT = False
+
+#
+# Weather conversion for GHCN-DAILY files to xml.
+#
+# http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
+#
+def main(argv):
+ append = False
+ max_records = 0
+ process_file_name = ""
+ reset = False
+ section = "all"
+ token = ""
+ update = False
+ xml_config_path = ""
+
+ try:
+ opts, args = getopt.getopt(argv, "af:hl:m:ruvw:x:", ["file=", "locality=", "max_station_files=", "web_service=", "xml_config="])
+ except getopt.GetoptError:
+ print 'The file options for weather_cli.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python weather_cli.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Converting weather daily files to xml options:'
+ print ' -a Append the results to the progress file.'
+ print ' -f (str) The file name of a specific station to process.'
+ print ' * Helpful when testing a single stations XML file output.'
+ print ' -l (str) Select the locality of the scripts execution (download, progress_file, sensor_build, station_build, partition, partition_scheme, test_links, queries, inventory, statistics).'
+ print ' -m (int) Limits the number of files created for each station.'
+ print ' * Helpful when testing to make sure all elements are supported for each station.'
+ print ' Alternate form: --max_station_files=(int)'
+ print ' -r Reset the build process. (For one section or all sections depending on other parameters.)'
+ print ' -u Recalculate the file count and data size for each data source file.'
+ print ' -v Extra debug information.'
+ print ' -w (str) Downloads the station XML file form the web service.'
+ print ' -x (str) XML config file for weather data.'
+ sys.exit()
+ elif opt in ('-a', "--append"):
+ append = True
+ elif opt in ('-f', "--file"):
+ # check if file exists.
+ if os.path.exists(arg):
+ process_file_name = arg
+ else:
+ print 'Error: Argument must be a file name for --file (-f).'
+ sys.exit()
+ elif opt in ('-l', "--locality"):
+ if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "partition_scheme", "test_links", "queries", "inventory", "statistics"):
+ section = arg
+ else:
+ print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
+ sys.exit()
+ elif opt in ('-m', "--max_station_files"):
+ if arg.isdigit():
+ max_records = int(arg)
+ else:
+ print 'Error: Argument must be an integer for --max_station_files (-m).'
+ sys.exit()
+ elif opt == '-r':
+ reset = True
+ elif opt == '-u':
+ update = True
+ elif opt == '-v':
+ global DEBUG_OUTPUT
+ DEBUG_OUTPUT = True
+ elif opt == '-w':
+ # check if file exists.
+ if arg is not "":
+ token = arg
+ else:
+ print 'Error: Argument must be a string --web_service (-w).'
+ sys.exit()
+ elif opt in ('-x', "--xml_config"):
+ # check if file exists.
+ if os.path.exists(arg):
+ xml_config_path = arg
+ else:
+ print 'Error: Argument must be a xml file for --xml_config (-x).'
+ sys.exit()
+
+ # Required fields to run the script.
+ if xml_config_path == "" or not os.path.exists(xml_config_path):
+ print 'Error: The xml config option must be supplied: --xml_config (-x).'
+ sys.exit()
+ config = WeatherConfig(xml_config_path)
+
+ # Required fields to run the script.
+ if config.get_save_path() == "" or not os.path.exists(config.get_save_path()):
+ print 'Error: The save directory option must be supplied in the config file.'
+ sys.exit()
+
+ # Set up downloads folder.
+ download_path = config.get_save_path() + "/downloads"
+ if section in ("all", "download"):
+ print 'Processing the download section.'
+ download = WeatherDownloadFiles(download_path)
+ download.download_ghcnd_files(reset)
+ download.download_mshr_files(reset)
+
+ # Unzip the required file.
+ download.unzip_ghcnd_package(config.get_package(), reset)
+ download.unzip_mshr_files(reset)
+
+
+ # Create some basic paths for save files and references.
+ ghcnd_data_dly_path = download_path + '/' + config.get_package() + '/' + config.get_package()
+ xml_data_save_path = config.get_save_path() + '/all_xml_files/'
+
+ # Make sure the xml folder is available.
+ if not os.path.isdir(xml_data_save_path):
+ os.makedirs(xml_data_save_path)
+
+ # Set up the XML build objects.
+ convert = WeatherWebServiceMonthlyXMLFile(download_path, xml_data_save_path, DEBUG_OUTPUT)
+ progress_file = xml_data_save_path + "_data_progress.csv"
+ data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
+ if section in ("all", "progress_file"):
+ print 'Processing the progress_file section.'
+ options = list()
+ if append:
+ options.append('append')
+ if update:
+ options.append('recalculate')
+ if reset:
+ options.append('reset')
+ data.build_progress_file(options, convert)
+
+ if section in ("all", "sensor_build"):
+ print 'Processing the sensor_build section.'
+ if process_file_name is not "":
+ # process a single file
+ if os.path.exists(process_file_name):
+ (file_count, data_size) = convert.process_sensor_file(process_file_name, max_records, 4)
+ data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
+ else:
+ data.update_file_sensor_status(process_file_name, WeatherDataFiles.DATA_FILE_MISSING)
+ else:
+ # process directory
+ data.reset()
+ data.set_type("sensor")
+ data.set_data_reset(reset)
+ for file_name in data:
+ file_path = ghcnd_data_dly_path + '/' + file_name
+ if os.path.exists(file_path):
+ (file_count, data_size) = convert.process_sensor_file(file_path, max_records, 4)
+ data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
+ else:
+ data.update_file_sensor_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
+
+ if section in ("all", "station_build"):
+ print 'Processing the station_build section.'
+ data.reset()
+ data.set_type("station")
+ data.set_data_reset(reset)
+ if token is not "":
+ convert.set_token(token)
+ for file_name in data:
+ file_path = ghcnd_data_dly_path + '/' + file_name
+ if os.path.exists(file_path):
+ return_status = convert.process_station_file(file_path)
+ status = data.get_station_status(return_status)
+ data.update_file_station_status(file_name, status)
+ else:
+ data.update_file_station_status(file_name, WeatherDataFiles.DATA_FILE_MISSING)
+
+ for dataset in config.get_dataset_list():
+ # Set up the setting for each dataset.
+ dataset_folder = "/dataset-" + dataset.get_name()
+ progress_file = config.get_save_path() + dataset_folder + "/_data_progress.csv"
+ data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
+
+ base_paths = []
+ for paths in dataset.get_save_paths():
+ base_paths.append(paths + dataset_folder + "/")
+ benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), dataset, config.get_node_machine_list())
+
+ if section in ("all", "partition", "partition_scheme"):
+ slices = benchmark.get_number_of_slices_per_disk()
+ print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
+ data.reset()
+ if section == "partition_scheme":
+ benchmark.print_partition_scheme()
+ else:
+ if dataset.get_partition_type() == "large_files":
+ data.build_to_n_partition_files(xml_data_save_path, slices, base_paths, reset)
+ else:
+ data.copy_to_n_partitions(xml_data_save_path, slices, base_paths, reset)
+
+ if section in ("all", "test_links"):
+ # TODO determine current node
+ print 'Processing the test links section (' + dataset.get_name() + ').'
+ benchmark.print_partition_scheme()
+ benchmark.build_data_links(reset)
+
+ if section in ("all", "queries"):
+ print 'Processing the queries section (' + dataset.get_name() + ').'
+ benchmark.copy_query_files(reset)
+
+ if section in ("inventory"):
+ print 'Processing the inventory section.'
+ convert.process_inventory_file()
+
+# if section in ("statistics"):
+# print 'Processing the statistics section.'
+# data.print_progress_file_stats(convert)
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
new file mode 100644
index 0000000..80607b8
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from xml.dom.minidom import parse
+
+class WeatherConfig:
+ def __init__(self, config_xml_file):
+ self.config_xml_file = config_xml_file
+
+ self.config = parse(self.config_xml_file)
+
+ def get_save_path(self):
+ return self.get_text(self.config.getElementsByTagName("save_path")[0])
+
+ def get_package(self):
+ return self.get_text(self.config.getElementsByTagName("package")[0])
+
+ def get_node_machine_list(self):
+ nodes = []
+ for node in self.config.getElementsByTagName("node"):
+ id = self.get_node_name(node)
+ ip = self.get_node_ip(node)
+ nodes.append(Machine(id, ip))
+ return nodes
+
+ def get_dataset_list(self):
+ nodes = []
+ for node in self.config.getElementsByTagName("dataset"):
+ name = self.get_dataset_name(node)
+ save_paths = self.get_dataset_save_paths(node)
+ partition_type = self.get_dataset_partition_type(node)
+ partitions = self.get_dataset_partitions(node)
+ tests = self.get_dataset_tests(node)
+ nodes.append(Dataset(name, save_paths, partition_type, partitions, tests))
+ return nodes
+
+
+ # --------------------------------------------------------------------------
+ # Node Specific Functions
+ # --------------------------------------------------------------------------
+ def get_node_ip(self, node):
+ return self.get_text(node.getElementsByTagName("cluster_ip")[0])
+
+ def get_node_name(self, node):
+ return self.get_text(node.getElementsByTagName("id")[0])
+
+
+ # --------------------------------------------------------------------------
+ # Dataset Specific Functions
+ # --------------------------------------------------------------------------
+ def get_dataset_name(self, node):
+ return self.get_text(node.getElementsByTagName("name")[0])
+
+ def get_dataset_save_paths(self, node):
+ paths = []
+ for item in node.getElementsByTagName("save_path"):
+ paths.append(self.get_text(item))
+ return paths
+
+ def get_dataset_partition_type(self, node):
+ return self.get_text(node.getElementsByTagName("partition_type")[0])
+
+ def get_dataset_partitions(self, node):
+ paths = []
+ for item in node.getElementsByTagName("partitions_per_path"):
+ paths.append(int(self.get_text(item)))
+ return paths
+
+ def get_dataset_tests(self, node):
+ tests = []
+ for item in node.getElementsByTagName("test"):
+ tests.append(self.get_text(item))
+ return tests
+
+ def get_text(self, xml_node):
+ rc = []
+ for node in xml_node.childNodes:
+ if node.nodeType == node.TEXT_NODE:
+ rc.append(node.data)
+ return ''.join(rc)
+
+class Machine:
+ def __init__(self, id, ip):
+ self.id = id
+ self.ip = ip
+
+ def get_node_name(self):
+ return self.id
+
+ def get_node_ip(self):
+ return self.ip
+
+ def __repr__(self):
+ return self.id + "(" + self.ip + ")"
+
+class Dataset:
+ def __init__(self, name, save_paths, partition_type, partitions, tests):
+ self.name = name
+ self.save_paths = save_paths
+ self.partitions = partitions
+ self.partition_type = partition_type
+ self.tests = tests
+
+ def get_name(self):
+ return self.name
+
+ def get_save_paths(self):
+ return self.save_paths
+
+ def get_partitions(self):
+ return self.partitions
+
+ def get_partition_type(self):
+ return self.partition_type
+
+ def get_tests(self):
+ return self.tests
+
+ def __repr__(self):
+ return self.name + ":" + str(self.save_paths) + ":" + str(self.partitions)
+
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
new file mode 100644
index 0000000..04fff52
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Base URL used to get all the required files.
+BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
+
+# List of required files for a build.
+FILE_NAMES = []
+FILE_NAMES.append('ghcnd-countries.txt')
+FILE_NAMES.append('ghcnd-inventory.txt')
+FILE_NAMES.append('ghcnd-states.txt')
+FILE_NAMES.append('ghcnd-stations.txt')
+FILE_NAMES.append('ghcnd-version.txt')
+FILE_NAMES.append('ghcnd_all.tar.gz')
+FILE_NAMES.append('ghcnd_gsn.tar.gz')
+FILE_NAMES.append('ghcnd_hcn.tar.gz')
+FILE_NAMES.append('readme.txt')
+FILE_NAMES.append('status.txt')
+
+# Store the row details here.
+
+# Index values of each field details.
+FIELD_INDEX_NAME = 0
+FIELD_INDEX_START = 1
+FIELD_INDEX_END = 2
+FIELD_INDEX_TYPE = 3
+
+DLY_FIELD_ID = 0
+DLY_FIELD_YEAR = 1
+DLY_FIELD_MONTH = 2
+DLY_FIELD_ELEMENT = 3
+
+DLY_FIELD_DAY_OFFSET = 4
+DLY_FIELD_DAY_FIELDS = 4
+
+DLY_FIELDS = []
+
+# Details about the row.
+DLY_FIELDS.append(['ID', 1, 11, 'Character'])
+DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
+DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
+DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
+
+# Days in each row.
+for i in range(1, 32):
+ start = 22 + ((i - 1) * 8)
+ DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
+ DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 'Character'])
+ DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 'Character'])
+ DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 'Character'])
+
+# Details about the row.
+STATIONS_FIELDS = {}
+STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
+STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
+STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
+STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
+STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
+STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
+
+# Details about the row.
+COUNTRIES_FIELDS = {}
+COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+STATES_FIELDS = {}
+STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+INVENTORY_FIELDS = {}
+INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
+INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
+INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
new file mode 100644
index 0000000..7b1434f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# List of required files for a build.
+MSHR_URLS = []
+MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
+MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
+
+# Index values of each field details.
+MSHR_FIELD_INDEX_NAME = 0
+MSHR_FIELD_INDEX_START = 1
+MSHR_FIELD_INDEX_END = 2
+MSHR_FIELD_INDEX_TYPE = 3
+
+# Store the row details here.
+MSHR_FIELDS = {}
+
+# Details about the row.
+MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
+MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
+MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
+MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
+MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
+MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
+MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
+MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
+MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
+MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
+MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
+MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
+MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
+MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
+MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
+MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 'X(30)']
+MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
+MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
+MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
+MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
+MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
+MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
+MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
+MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
+MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
+MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
+MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
+MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
+MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
+MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
+MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
+MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
+MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
+MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
+MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
+MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
+MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
+MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
+MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
+MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
+MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
+MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
+MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
+MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']
[12/14] git commit: More tweaking to get the MRQL benchmark to work
efficiently.
Posted by pr...@apache.org.
More tweaking to get the MRQL benchmark to work efficiently.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/7f06298f
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/7f06298f
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/7f06298f
Branch: refs/heads/master
Commit: 7f06298fa350ca8b0a81bb9ffb78aa29b26368c1
Parents: 31b3f4d
Author: Preston Carman <pr...@apache.org>
Authored: Thu Oct 9 15:29:16 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Oct 9 15:29:16 2014 -0700
----------------------------------------------------------------------
.../other_systems/mrql_scripts/README.md | 23 ++++++++++++++++++++
.../other_systems/mrql_scripts/clear.sh | 4 ----
.../mrql_scripts/load_node_file.sh | 17 ++++++++-------
.../mrql_scripts/run_group_test.sh | 22 +++++++++++++------
.../other_systems/mrql_scripts/start.sh | 20 -----------------
.../other_systems/mrql_scripts/stop.sh | 20 -----------------
6 files changed, 47 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md
new file mode 100644
index 0000000..53a7ecf
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/README.md
@@ -0,0 +1,23 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+clear.sh
+
+hadoop namenode -format
+
+run_group_test.sh
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
index b775de2..9a912b2 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/clear.sh
@@ -17,12 +17,8 @@
# limitations under the License.
#
-# Hadoop data reset
-hadoop namenode -format
-
# Remove data
rm -rf disk1/hadoop/data
rm -rf disk2/hadoop/data
rm -rf disk1/hadoop/tmp
rm -rf disk1/hadoop/logs
-
http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
index 048274f..ead0902 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh
@@ -23,15 +23,16 @@ then
exit
fi
+echo "Loading node ${1} data file in to cluster."
# Add each sensor block
-cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/upload/
-gunzip disk1/hadoop/upload/all_sensors_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/upload/all_sensors_${1}.xml all/sensors
-rm -f disk1/hadoop/upload/all_sensors_${1}.xml
+cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/all_sensors_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/all_sensors_${1}.xml all/sensors
+rm -f disk1/hadoop/all_sensors_${1}.xml
# Add each station block
-cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/upload/
-gunzip disk1/hadoop/upload/all_stations_${1}.xml.gz
-hadoop fs -copyFromLocal disk1/hadoop/upload/all_stations_${1}.xml all/stations
-rm -f disk1/hadoop/upload/all_stations_${1}.xml
+cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/
+gunzip disk1/hadoop/all_stations_${1}.xml.gz
+hadoop fs -copyFromLocal disk1/hadoop/all_stations_${1}.xml all/stations
+rm -f disk1/hadoop/all_stations_${1}.xml
http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index 60dc255..f42a451 100755
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -17,28 +17,36 @@
# limitations under the License.
#
-NODES=2
+if [ -z "${1}" ]
+then
+ echo "Please enter the number of nodes."
+ exit
+fi
+
+NODES=${1}
REPEAT=1
# Start Hadoop
sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh
+sleep 10
+
# Prepare hadoop file system
hadoop fs -mkdir all
+hadoop fs -ls
hadoop fs -mkdir all/sensors
hadoop fs -mkdir all/stations
+hadoop fs -ls all
# Upload test data
-n=0
-while [ ${n} -lt ${NODES} ];
+COUNTER=0
+while [ ${COUNTER} -lt ${NODES} ];
do
- sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${n} &
+ sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${COUNTER}
+ let COUNTER=COUNTER+1
done
-# After all files have been uploaded, continue.
-wait
-
# Start test
sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT}
http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
deleted file mode 100755
index a1766c9..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/start.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-bin/start-all.sh
http://git-wip-us.apache.org/repos/asf/vxquery/blob/7f06298f/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
deleted file mode 100755
index e49d818..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/stop.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-bin/stop-all.sh
[14/14] git commit: Merge branch 'prestonc/september_update'
Posted by pr...@apache.org.
Merge branch 'prestonc/september_update'
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/e97888ed
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/e97888ed
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/e97888ed
Branch: refs/heads/master
Commit: e97888ed881f8adc749e1e695c9700cc9d4c797f
Parents: 72fd5c6 9e0133a
Author: Preston Carman <pr...@apache.org>
Authored: Tue Oct 21 12:35:39 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Tue Oct 21 12:35:39 2014 -0700
----------------------------------------------------------------------
.../noaa-ghcn-daily/conf/weather_example.xml | 35 +++++++
.../conf/weather_example_cluster.xml | 58 ++++++++++++
.../noaa-ghcn-daily/other_systems/mrql/q00.mrql | 23 +++++
.../noaa-ghcn-daily/other_systems/mrql/q01.mrql | 21 +++++
.../noaa-ghcn-daily/other_systems/mrql/q02.mrql | 24 +++++
.../noaa-ghcn-daily/other_systems/mrql/q03.mrql | 22 +++++
.../noaa-ghcn-daily/other_systems/mrql/q04.mrql | 24 +++++
.../other_systems/mrql/q04_count_sensor.mrql | 21 +++++
.../other_systems/mrql/q04_count_station.mrql | 23 +++++
.../noaa-ghcn-daily/other_systems/mrql/q05.mrql | 27 ++++++
.../other_systems/mrql/q05_count_sensor.mrql | 23 +++++
.../other_systems/mrql/q05_count_station.mrql | 23 +++++
.../noaa-ghcn-daily/other_systems/mrql/q06.mrql | 26 ++++++
.../other_systems/mrql/q06_count_sensor.mrql | 23 +++++
.../other_systems/mrql/q06_count_station.mrql | 23 +++++
.../noaa-ghcn-daily/other_systems/mrql/q07.mrql | 26 ++++++
.../other_systems/mrql/q07_count_join.mrql | 26 ++++++
.../other_systems/mrql/q07_count_tmax.mrql | 22 +++++
.../other_systems/mrql/q07_count_tmin.mrql | 22 +++++
.../other_systems/mrql_gsn/q00.mrql | 23 -----
.../other_systems/mrql_gsn/q01.mrql | 21 -----
.../other_systems/mrql_gsn/q02.mrql | 24 -----
.../other_systems/mrql_gsn/q03.mrql | 22 -----
.../other_systems/mrql_gsn/q04.mrql | 24 -----
.../other_systems/mrql_gsn/q04_sensor.mrql | 21 -----
.../other_systems/mrql_gsn/q04_station.mrql | 24 -----
.../other_systems/mrql_gsn/q05.mrql | 27 ------
.../other_systems/mrql_gsn/q05_sensor.mrql | 23 -----
.../other_systems/mrql_gsn/q05_station.mrql | 23 -----
.../other_systems/mrql_gsn/q06.mrql | 26 ------
.../other_systems/mrql_gsn/q06_sensor.mrql | 23 -----
.../other_systems/mrql_gsn/q06_station.mrql | 23 -----
.../other_systems/mrql_gsn/q07.mrql | 26 ------
.../other_systems/mrql_gsn/q07_join_count.mrql | 26 ------
.../other_systems/mrql_gsn/q07_tmax.mrql | 22 -----
.../other_systems/mrql_gsn/q07_tmin.mrql | 22 -----
.../other_systems/mrql_hcn/q00.mrql | 23 -----
.../other_systems/mrql_hcn/q01.mrql | 21 -----
.../other_systems/mrql_hcn/q02.mrql | 24 -----
.../other_systems/mrql_hcn/q03.mrql | 22 -----
.../other_systems/mrql_hcn/q04.mrql | 24 -----
.../other_systems/mrql_hcn/q04_sensor.mrql | 21 -----
.../other_systems/mrql_hcn/q04_station.mrql | 24 -----
.../other_systems/mrql_hcn/q05.mrql | 27 ------
.../other_systems/mrql_hcn/q05_sensor.mrql | 23 -----
.../other_systems/mrql_hcn/q05_station.mrql | 23 -----
.../other_systems/mrql_hcn/q06.mrql | 28 ------
.../other_systems/mrql_hcn/q06_sensor.mrql | 23 -----
.../other_systems/mrql_hcn/q06_station.mrql | 23 -----
.../other_systems/mrql_hcn/q07.mrql | 26 ------
.../other_systems/mrql_hcn/q07_join_count.mrql | 26 ------
.../other_systems/mrql_hcn/q07_tmax.mrql | 22 -----
.../other_systems/mrql_hcn/q07_tmin.mrql | 22 -----
.../other_systems/mrql_scripts/README.md | 23 +++++
.../other_systems/mrql_scripts/clear.sh | 24 +++++
.../mrql_scripts/load_node_file.sh | 47 ++++++++++
.../mrql_scripts/run_group_test.sh | 65 +++++++++++++
.../mrql_scripts/run_mrql_tests.sh | 49 ++++++++++
.../other_systems/mrql_test/q00.mrql | 23 -----
.../other_systems/mrql_test/q01.mrql | 21 -----
.../other_systems/mrql_test/q02.mrql | 24 -----
.../other_systems/mrql_test/q03.mrql | 22 -----
.../other_systems/mrql_test/q04.mrql | 24 -----
.../other_systems/mrql_test/q05.mrql | 27 ------
.../other_systems/mrql_test/q06.mrql | 27 ------
.../other_systems/mrql_test/q07.mrql | 26 ------
.../other_systems/saxon/count_sensor.xq | 7 ++
.../other_systems/saxon/count_station.xq | 7 ++
.../noaa-ghcn-daily/other_systems/saxon/q00.xq | 15 +++
.../noaa-ghcn-daily/other_systems/saxon/q01.xq | 8 ++
.../noaa-ghcn-daily/other_systems/saxon/q02.xq | 14 +++
.../noaa-ghcn-daily/other_systems/saxon/q03.xq | 8 ++
.../other_systems/saxon/q04_count_sensor.xq | 10 ++
.../other_systems/saxon/q04_count_station.xq | 8 ++
.../other_systems/saxon/q05_count_sensor.xq | 11 +++
.../other_systems/saxon/q05_count_station.xq | 8 ++
.../other_systems/saxon/q06_count_sensor.xq | 8 ++
.../other_systems/saxon/q06_count_station.xq | 5 +
.../noaa-ghcn-daily/other_systems/saxon/q07.xq | 15 +++
.../other_systems/saxon/q07_count_tmax.xq | 9 ++
.../other_systems/saxon/q07_count_tmin.xq | 9 ++
.../saxon_scripts/run_saxon_tests.sh | 44 +++++++++
.../noaa-ghcn-daily/queries/count_sensor.xq | 27 ++++++
.../noaa-ghcn-daily/queries/count_station.xq | 27 ++++++
.../resources/noaa-ghcn-daily/queries/q04.xq | 8 +-
.../noaa-ghcn-daily/queries/q04_count_join.xq | 34 +++++++
.../noaa-ghcn-daily/queries/q04_count_sensor.xq | 29 ++++++
.../queries/q04_count_station.xq | 28 ++++++
.../noaa-ghcn-daily/queries/q04_sensor.xq | 27 ------
.../noaa-ghcn-daily/queries/q04_station.xq | 25 -----
.../noaa-ghcn-daily/queries/q05_count_join.xq | 35 +++++++
.../noaa-ghcn-daily/queries/q05_count_sensor.xq | 31 +++++++
.../queries/q05_count_station.xq | 28 ++++++
.../noaa-ghcn-daily/queries/q05_sensor.xq | 28 ------
.../noaa-ghcn-daily/queries/q05_station.xq | 25 -----
.../noaa-ghcn-daily/queries/q06_count_join.xq | 34 +++++++
.../noaa-ghcn-daily/queries/q06_count_sensor.xq | 29 ++++++
.../queries/q06_count_station.xq | 27 ++++++
.../noaa-ghcn-daily/queries/q06_sensor.xq | 27 ------
.../noaa-ghcn-daily/queries/q06_station.xq | 24 -----
.../noaa-ghcn-daily/queries/q07_count_join.xq | 35 +++++++
.../noaa-ghcn-daily/queries/q07_count_tmax.xq | 28 ++++++
.../noaa-ghcn-daily/queries/q07_count_tmin.xq | 28 ++++++
.../noaa-ghcn-daily/queries/q07_tmax.xq | 26 ------
.../noaa-ghcn-daily/queries/q07_tmin.xq | 26 ------
.../noaa-ghcn-daily/queries/sensor_count.xq | 24 -----
.../noaa-ghcn-daily/queries/station_count.xq | 24 -----
.../scripts/benchmark_logging.properties | 1 +
.../noaa-ghcn-daily/scripts/run_benchmark.sh | 3 +-
.../noaa-ghcn-daily/scripts/run_mrql_tests.sh | 42 ---------
.../scripts/weather_benchmark.py | 10 +-
.../scripts/weather_data_files.py | 14 +--
.../src/main/resources/util/diff_xml_files.py | 97 ++++++++++++++++++++
.../src/main/resources/util/list_xml_files.py | 72 +++++++++++++++
.../src/main/resources/util/log_top.sh | 35 -------
.../src/main/resources/util/merge_xml_files.py | 2 +-
.../java/org/apache/vxquery/cli/VXQuery.java | 1 +
.../RemoveUnusedSortDistinctNodesRule.java | 19 +---
.../rules/util/CardinalityRuleToolbox.java | 13 ---
.../rewriter/rules/util/OperatorToolbox.java | 72 ---------------
.../vxquery/functions/builtin-functions.xml | 1 +
.../xmlquery/query/XMLQueryCompiler.java | 2 +-
vxquery-server/pom.xml | 4 +
.../vxquery/cli/VXQueryClusterShutdown.java | 76 +++++++++++++++
.../src/main/resources/conf/cluster_example.xml | 12 +--
.../src/main/resources/conf/local.xml | 18 ++--
.../main/resources/scripts/cluster_actions.py | 16 +++-
.../src/main/resources/scripts/cluster_cli.py | 6 +-
.../resources/scripts/cluster_information.py | 27 ++++--
.../src/main/resources/scripts/startcc.sh | 6 +-
.../src/main/resources/scripts/startnc.sh | 4 +-
.../src/main/resources/scripts/stopcc.sh | 3 +-
.../src/main/resources/scripts/stopcluster.sh | 48 ++++++++++
.../src/main/resources/scripts/stopnc.sh | 2 +-
134 files changed, 1681 insertions(+), 1491 deletions(-)
----------------------------------------------------------------------
[08/14] git commit: Removed debug output.
Posted by pr...@apache.org.
Removed debug output.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/0e666fc5
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/0e666fc5
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/0e666fc5
Branch: refs/heads/master
Commit: 0e666fc5dea4268d0fa8e2ce2d9d310159ebd162
Parents: 44d07d9
Author: Preston Carman <pr...@apache.org>
Authored: Mon Oct 6 15:40:00 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Mon Oct 6 15:40:00 2014 -0700
----------------------------------------------------------------------
vxquery-server/src/main/resources/scripts/stopcluster.sh | 1 -
1 file changed, 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/0e666fc5/vxquery-server/src/main/resources/scripts/stopcluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcluster.sh b/vxquery-server/src/main/resources/scripts/stopcluster.sh
index 238da7f..5172a2e 100755
--- a/vxquery-server/src/main/resources/scripts/stopcluster.sh
+++ b/vxquery-server/src/main/resources/scripts/stopcluster.sh
@@ -45,5 +45,4 @@ then
fi
# Launch hyracks cc script without toplogy
-echo "${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &"
${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &