You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/10/21 21:36:05 UTC
[04/14] copy of all changes in exrt benchmark queries that is
copyright free.
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
new file mode 100644
index 0000000..5db090a
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -0,0 +1,554 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import textwrap
+from datetime import date
+import os
+from collections import OrderedDict
+
+# Custom modules.
+from weather_config_ghcnd import *
+from weather_config_mshr import *
+from weather_download_files import *
+
+class WeatherConvertToXML:
+
+ STATES = OrderedDict({
+ 'AK': 'Alaska',
+ 'AL': 'Alabama',
+ 'AR': 'Arkansas',
+ 'AS': 'American Samoa',
+ 'AZ': 'Arizona',
+ 'CA': 'California',
+ 'CO': 'Colorado',
+ 'CT': 'Connecticut',
+ 'DC': 'District of Columbia',
+ 'DE': 'Delaware',
+ 'FL': 'Florida',
+ 'GA': 'Georgia',
+ 'GU': 'Guam',
+ 'HI': 'Hawaii',
+ 'IA': 'Iowa',
+ 'ID': 'Idaho',
+ 'IL': 'Illinois',
+ 'IN': 'Indiana',
+ 'KS': 'Kansas',
+ 'KY': 'Kentucky',
+ 'LA': 'Louisiana',
+ 'MA': 'Massachusetts',
+ 'MD': 'Maryland',
+ 'ME': 'Maine',
+ 'MI': 'Michigan',
+ 'MN': 'Minnesota',
+ 'MO': 'Missouri',
+ 'MP': 'Northern Mariana Islands',
+ 'MS': 'Mississippi',
+ 'MT': 'Montana',
+ 'NA': 'National',
+ 'NC': 'North Carolina',
+ 'ND': 'North Dakota',
+ 'NE': 'Nebraska',
+ 'NH': 'New Hampshire',
+ 'NJ': 'New Jersey',
+ 'NM': 'New Mexico',
+ 'NV': 'Nevada',
+ 'NY': 'New York',
+ 'OH': 'Ohio',
+ 'OK': 'Oklahoma',
+ 'OR': 'Oregon',
+ 'PA': 'Pennsylvania',
+ 'PR': 'Puerto Rico',
+ 'RI': 'Rhode Island',
+ 'SC': 'South Carolina',
+ 'SD': 'South Dakota',
+ 'TN': 'Tennessee',
+ 'TX': 'Texas',
+ 'UT': 'Utah',
+ 'VA': 'Virginia',
+ 'VI': 'Virgin Islands',
+ 'VT': 'Vermont',
+ 'WA': 'Washington',
+ 'WI': 'Wisconsin',
+ 'WV': 'West Virginia',
+ 'WY': 'Wyoming'
+ })
+
+ MONTHS = [
+ "January",
+ "February",
+ "March",
+ "April",
+ "May",
+ "June",
+ "July",
+ "August",
+ "September",
+ "October",
+ "November",
+ "December"
+ ]
+
+ token = ""
+
+ def __init__(self, base_path, save_path, debug_output):
+ self.save_path = save_path
+ self.debug_output = debug_output
+
+ # Extra support files.
+ self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
+ self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt'
+ self.ghcnd_states = base_path + '/ghcnd-states.txt'
+ self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
+
+ # MSHR support files.
+ self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
+
+ def set_token(self, token):
+ self.token = token
+
+ def get_field_from_definition(self, row, field_definition):
+ return row[(field_definition[FIELD_INDEX_START] - 1):field_definition[FIELD_INDEX_END]]
+
+ def get_field(self, fields_array, row, index):
+ return row[(fields_array[index][FIELD_INDEX_START] - 1):fields_array[index][FIELD_INDEX_END]]
+
+ def get_dly_field(self, row, index):
+ return self.get_field(DLY_FIELDS, row, index)
+
+ def print_row_files(self, row):
+ for field in DLY_FIELDS:
+ print str(field[FIELD_INDEX_NAME]) + " = '" + row[(field[FIELD_INDEX_START] - 1):field[FIELD_INDEX_END]] + "'"
+
+ def save_file(self, filename, contents):
+ file = open(filename, 'w')
+ file.write(contents)
+ file.close()
+ return filename
+
+ def get_folder_size(self, folder_name):
+ total_size = 0
+ for dirpath, dirnames, filenames in os.walk(folder_name):
+ for f in filenames:
+ fp = os.path.join(dirpath, f)
+ total_size += os.path.getsize(fp)
+ return total_size
+
+ def process_one_month_sensor_set(self, records, page):
+ # Default
+ return 0
+
+ def process_station_data(self, row):
+ # Default
+ return 0
+
+ def get_base_folder(self, station_id, data_type="sensors"):
+ return build_base_save_folder(self.save_path, station_id, data_type)
+
+ def process_inventory_file(self):
+ print "Processing inventory file"
+ file_stream = open(self.ghcnd_inventory, 'r')
+
+ csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT', 'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS']
+ row = file_stream.readline()
+ csv_inventory = {}
+ for row in file_stream:
+ id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID'])
+ sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT'])
+ start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR']))
+ end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR']))
+ if id in csv_inventory:
+ new_count = str(int(csv_inventory[id][2]) + 1)
+ new_max = str(max(int(csv_inventory[id][3]), (end - start)))
+ new_total = str(int(csv_inventory[id][3]) + end - start)
+ csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count, new_max, new_total]
+ else:
+ csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)]
+
+ path = self.save_path + "/inventory.csv"
+ self.save_csv_file(path, csv_inventory, csv_header)
+
+ def save_csv_file(self, path, csv_inventory, header):
+ csv_content = "|".join(header) + "\n"
+ for row_id in csv_inventory:
+ csv_content += "|".join(csv_inventory[row_id]) + "\n"
+ self.save_file(path, csv_content)
+
+
+ def process_station_file(self, file_name):
+ print "Processing station file: " + file_name
+ file_stream = open(file_name, 'r')
+
+ row = file_stream.readline()
+ return self.process_station_data(row)
+
+ def process_sensor_file(self, file_name, max_files, sensor_max=99):
+ print "Processing sensor file: " + file_name
+ file_stream = open(file_name, 'r')
+
+ month_last = 0
+ year_last = 0
+ records = []
+ page = 0
+ sensor_count = 0
+
+ file_count = 0
+ for row in file_stream:
+ month = self.get_dly_field(row, DLY_FIELD_MONTH)
+ year = self.get_dly_field(row, DLY_FIELD_YEAR)
+
+ if (month_last != 0 and year_last != 0) and (sensor_count >= sensor_max or month != month_last or year != year_last):
+ # process set
+ file_count += self.process_one_month_sensor_set(records, page)
+ records = []
+ if sensor_count >= sensor_max and month == month_last and year == year_last:
+ # start a new page.
+ page += 1
+ else:
+ # start over.
+ page = 0
+ sensor_count = 0
+
+ records.append(row)
+ sensor_count += 1
+ if max_files != 0 and file_count >= max_files:
+ # Stop creating more files after the max is reached.
+ break
+
+ month_last = month
+ year_last = year
+
+ station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
+ data_size = self.get_folder_size(self.get_base_folder(station_id) + "/" + station_id)
+ print "Created " + str(file_count) + " XML files for a data size of " + str(data_size) + "."
+
+ return (file_count, data_size)
+
+ def convert_c2f(self, c):
+ return (9 / 5 * c) + 32
+
+ def default_xml_web_service_start(self):
+ field_xml = ""
+ field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
+ return field_xml
+
+ def default_xml_data_start(self, total_records):
+ field_xml = ""
+ field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + str(total_records) + "\">\n"
+ return field_xml
+
+ def default_xml_station_start(self):
+ field_xml = ""
+ field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" totalCount=\"1\">\n"
+ return field_xml
+
+ def default_xml_field_date(self, report_date, indent=2):
+ field_xml = ""
+ field_xml += self.get_indent_space(indent) + "<date>" + str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2) + "T00:00:00.000</date>\n"
+ return field_xml
+
+ def default_xml_mshr_station_additional(self, station_id):
+ """The web service station data is generate from the MSHR data supplemented with GHCN-Daily."""
+ station_mshr_row = ""
+ stations_mshr_file = open(self.mshr_stations, 'r')
+ for line in stations_mshr_file:
+ if station_id == self.get_field_from_definition(line, MSHR_FIELDS['GHCND_ID']).strip():
+ station_mshr_row = line
+ break
+
+ if station_mshr_row == "":
+ return ""
+
+ additional_xml = ""
+
+ county = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['COUNTY']).strip()
+ if county != "":
+ additional_xml += self.default_xml_location_labels("CNTY", "FIPS:-9999", county)
+
+ country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
+ country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
+ if country_code != "" and country_name != "":
+ additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code, country_name)
+
+ return additional_xml
+
+ def default_xml_location_labels(self, type, id, display_name):
+ label_xml = ""
+ label_xml += self.default_xml_start_tag("locationLabels", 2)
+ label_xml += self.default_xml_element("type", type, 3)
+ label_xml += self.default_xml_element("id", id, 3)
+ label_xml += self.default_xml_element("displayName", display_name, 3)
+ label_xml += self.default_xml_end_tag("locationLabels", 2)
+ return label_xml
+
+
+ def default_xml_web_service_station(self, station_id):
+ """The web service station data is generate from available historical sources."""
+ station_ghcnd_row = ""
+ stations_ghcnd_file = open(self.ghcnd_stations, 'r')
+ for line in stations_ghcnd_file:
+ if station_id == self.get_field_from_definition(line, STATIONS_FIELDS['ID']):
+ station_ghcnd_row = line
+ break
+
+ xml_station = ""
+ xml_station += self.default_xml_start_tag("station", 1)
+
+ xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2)
+ xml_station += self.default_xml_element("displayName", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['NAME']).strip(), 2)
+ xml_station += self.default_xml_element("latitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LATITUDE']).strip(), 2)
+ xml_station += self.default_xml_element("longitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LONGITUDE']).strip(), 2)
+
+ elevation = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['ELEVATION']).strip()
+ if elevation != "-999.9":
+ xml_station += self.default_xml_element("elevation", elevation, 2)
+
+ state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip()
+ if state_code != "" and state_code in self.STATES:
+ xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code])
+
+ # Add the MSHR data to the station generated information.
+ xml_station += self.default_xml_mshr_station_additional(station_id)
+
+ xml_station += self.default_xml_end_tag("station", 1)
+ return xml_station
+
+ def default_xml_day_reading_as_field(self, row, day):
+ day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
+ value = self.get_dly_field(row, day_index);
+ if value == "-9999":
+ return ""
+
+ field_xml = ""
+ field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
+ if field_id in ("MDTN", "MDTX", "MNPN", "MXPN", "TMAX", "TMIN", "TOBS",):
+ # Add both the celcius and fahrenheit temperatures.
+ celcius = float(value) / 10
+ field_xml += " <" + field_id + "_c>" + str(celcius) + "</" + field_id + "_c>\n"
+ fahrenheit = self.convert_c2f(celcius)
+ field_xml += " <" + field_id + "_f>" + str(fahrenheit) + "</" + field_id + "_f>\n"
+ elif field_id in ("AWND", "EVAP", "PRCP", "THIC", "WESD", "WESF", "WSF1", "WSF2", "WSF5", "WSFG", "WSFI", "WSFM",):
+ # Field values that are in tenths.
+ converted_value = float(value) / 10
+ field_xml += " <" + field_id + ">" + str(converted_value) + "</" + field_id + ">\n"
+ elif field_id in ("ACMC", "ACMH", "ACSC", "ACSH", "PSUN",):
+ # Fields is a percentage.
+ field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+ elif field_id in ("FMTM", "PGTM",):
+ # Fields is a time value HHMM.
+ field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+ elif field_id in ("DAEV", "DAPR", "DASF", "DATN", "DATX", "DAWM", "DWPR", "FRGB", "FRGT", "FRTH", "GAHT", "MDSF", "MDWM", "MDEV", "MDPR", "SNOW", "SNWD", "TSUN", "WDF1", "WDF2", "WDF5", "WDFG", "WDFI", "WDFM", "WDMV",):
+ # Fields with no alternation needed.
+ field_xml += " <" + field_id + ">" + value.strip() + "</" + field_id + ">\n"
+ else:
+ field_xml += " <unknown>" + field_id + "</unknown>\n"
+
+ # print field_xml
+ return field_xml
+
+ def default_xml_day_reading(self, row, day, indent=2):
+ day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
+ value = self.get_dly_field(row, day_index);
+ mflag = self.get_dly_field(row, day_index + 1);
+ qflag = self.get_dly_field(row, day_index + 2);
+ sflag = self.get_dly_field(row, day_index + 3);
+
+ if value == "-9999":
+ return ""
+
+ indent_space = self.get_indent_space(indent)
+ field_id = self.get_dly_field(row, DLY_FIELD_ELEMENT)
+ station_id = "GHCND:" + self.get_dly_field(row, DLY_FIELD_ID)
+
+ field_xml = ""
+ field_xml += indent_space + "<dataType>" + field_id + "</dataType>\n"
+ field_xml += indent_space + "<station>" + station_id + "</station>\n"
+ field_xml += indent_space + "<value>" + value.strip() + "</value>\n"
+ field_xml += indent_space + "<attributes>\n"
+ field_xml += indent_space + indent_space + "<attribute>" + mflag.strip() + "</attribute>\n"
+ field_xml += indent_space + indent_space + "<attribute>" + qflag.strip() + "</attribute>\n"
+ field_xml += indent_space + indent_space + "<attribute>" + sflag.strip() + "</attribute>\n"
+ field_xml += indent_space + indent_space + "<attribute></attribute>\n"
+ field_xml += indent_space + "</attributes>\n"
+
+ # print field_xml
+ return field_xml
+
+ def default_xml_end(self):
+ return textwrap.dedent("""\
+ </ghcnd_observation>""")
+
+ def default_xml_data_end(self):
+ return self.default_xml_end_tag("dataCollection", 0)
+
+ def default_xml_station_end(self):
+ return self.default_xml_end_tag("stationCollection", 0)
+
+ def default_xml_element(self, tag, data, indent=1):
+ return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + tag + ">\n"
+
+ def default_xml_start_tag(self, tag, indent=1):
+ return self.get_indent_space(indent) + "<" + tag + ">\n"
+
+ def default_xml_end_tag(self, tag, indent=1):
+ return self.get_indent_space(indent) + "</" + tag + ">\n"
+
+ def get_indent_space(self, indent):
+ return (" " * (4 * indent))
+
+
+class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
+ """The web service class details how to create files similar to the NOAA web service."""
+ skip_downloading = False
+ # Station data
+ def process_station_data(self, row):
+ """Adds a single station record file either from downloading the data or generating a similar record."""
+ station_id = self.get_dly_field(row, DLY_FIELD_ID)
+ download = 0
+ if self.token is not "" and not self.skip_downloading:
+ download = self.download_station_data(station_id, self.token, True)
+ if download == 0:
+ self.skip_downloading = True
+
+ # If not downloaded, generate.
+ if download != 0:
+ return download
+ else:
+ # Information for each daily file.
+ station_xml_file = self.default_xml_web_service_start()
+ station_xml_file += self.default_xml_station_start()
+ station_xml_file += self.default_xml_web_service_station(station_id)
+ station_xml_file += self.default_xml_station_end()
+
+ # Remove white space.
+ station_xml_file = station_xml_file.replace("\n", "");
+ station_xml_file = station_xml_file.replace(self.get_indent_space(1), "");
+
+ # Make sure the station folder is available.
+ ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
+ if not os.path.isdir(ghcnd_xml_station_path):
+ os.makedirs(ghcnd_xml_station_path)
+
+ # Save XML string to disk.
+ save_file_name = ghcnd_xml_station_path + station_id + ".xml"
+ save_file_name = self.save_file(save_file_name, station_xml_file)
+
+ if save_file_name is not "":
+ if self.debug_output:
+ print "Wrote file: " + save_file_name
+ return 1
+ else:
+ return 0
+
+ # Station data
+ def download_station_data(self, station_id, token, reset=False):
+ """Downloads the station data from the web service."""
+ import time
+ time.sleep(2)
+ # Make sure the station folder is available.
+ ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
+ if not os.path.isdir(ghcnd_xml_station_path):
+ os.makedirs(ghcnd_xml_station_path)
+
+ # Build download URL.
+ url = "http://www.ncdc.noaa.gov/cdo-services/services/datasets/GHCND/stations/GHCND:" + station_id + ".xml?token=" + token
+ url_file = urllib.urlopen(url)
+ station_xml_file = ""
+ while (True):
+ line = url_file.readline()
+ if not line:
+ break
+ station_xml_file += line
+
+ if station_xml_file.find("<cdoError>") != -1:
+ if self.debug_output:
+ print "Error in station download"
+ return 0
+
+ # Save XML string to disk.
+ save_file_name = ghcnd_xml_station_path + station_id + ".xml"
+ save_file_name = self.save_file(save_file_name, station_xml_file)
+
+ if save_file_name is not "":
+ if self.debug_output:
+ print "Wrote file: " + save_file_name
+ return 2
+ else:
+ return 0
+
+ # Sensor data
+ def process_one_month_sensor_set(self, records, page):
+ """Generates records for a station using the web service xml layout."""
+ found_data = False
+ year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR))
+ month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH))
+
+ station_id = self.get_dly_field(records[0], DLY_FIELD_ID)
+
+ # Information for each daily file.
+ count = 0
+ daily_xml_file = ""
+
+ for day in range(1, 32):
+ try:
+ # TODO find out what is a valid python date range? 1889?
+ # Attempt to see if this is valid date.
+ report_date = date(year, month, day)
+
+ for record in records:
+ record_xml_snip = self.default_xml_day_reading(record, report_date.day)
+ if record_xml_snip is not "":
+ daily_xml_file += self.default_xml_start_tag("data")
+ daily_xml_file += self.default_xml_field_date(report_date)
+ daily_xml_file += record_xml_snip
+ daily_xml_file += self.default_xml_end_tag("data")
+ found_data = True
+ count += 1
+
+ except ValueError:
+ pass
+
+ daily_xml_file = self.default_xml_web_service_start() + self.default_xml_data_start(count) + daily_xml_file + self.default_xml_data_end()
+ daily_xml_file = daily_xml_file.replace("\n", "");
+ daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), "");
+
+ if not found_data:
+ return 0
+
+ # Make sure the station folder is available.
+ ghcnd_xml_station_path = self.get_base_folder(station_id) + "/" + station_id + "/" + str(report_date.year) + "/"
+ if not os.path.isdir(ghcnd_xml_station_path):
+ os.makedirs(ghcnd_xml_station_path)
+
+ # Save XML string to disk.
+ save_file_name = ghcnd_xml_station_path + build_sensor_save_filename(station_id, report_date, page)
+ save_file_name = self.save_file(save_file_name, daily_xml_file)
+
+ if save_file_name is not "":
+ if self.debug_output:
+ print "Wrote file: " + save_file_name
+ return 1
+ else:
+ return 0
+
+def build_base_save_folder(save_path, station_id, data_type="sensors"):
+ # Default
+ station_prefix = station_id[:3]
+ return save_path + data_type + "/" + station_prefix + "/"
+
+def build_sensor_save_filename(station_id, report_date, page):
+ # Default
+ return station_id + "_" + str(report_date.year).zfill(4) + str(report_date.month).zfill(2) + "_" + str(page) + ".xml"
+
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
new file mode 100644
index 0000000..c8b0fa5
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import glob
+import os.path
+import linecache
+import distutils.core
+
+from weather_convert_to_xml import *
+from collections import OrderedDict
+
+# Weather data files created to manage the conversion process.
+# Allows partition and picking up where you left off.
+class WeatherDataFiles:
+
+ LARGE_FILE_ROOT_TAG = "root"
+
+ INDEX_DATA_FILE_NAME = 0
+ INDEX_DATA_SENSORS_STATUS = 1
+ INDEX_DATA_STATION_STATUS = 2
+ INDEX_DATA_FILE_COUNT = 3
+ INDEX_DATA_FOLDER_DATA = 4
+
+ DATA_FILE_START_INDEX = 0
+ DATA_FILE_EXTENSION = ".dly"
+ DATA_FILE_MISSING = "missing"
+ DATA_FILE_INITIAL = "initialized"
+ DATA_FILE_DOWNLOADED = "downloaded"
+ DATA_FILE_GENERATED = "generated"
+ SEPERATOR = ","
+
+ type = "sensor"
+ data_reset = False
+
+ def __init__(self, base_path, progress_file_name="/tmp/_weather_data.csv"):
+ self.base_path = base_path
+
+ self.progress_file_name = progress_file_name
+
+ self.current = self.DATA_FILE_START_INDEX
+ self.progress_data = []
+
+ def get_file_list_iterator(self):
+ """Return the list of files one at a time."""
+ return glob.iglob(self.base_path + "/*" + self.DATA_FILE_EXTENSION)
+
+ # Save Functions
+ def build_progress_file(self, options, convert):
+ if not os.path.isfile(self.progress_file_name) or 'reset' in options:
+ # Build a new file.
+ file = open(self.progress_file_name, 'w')
+ contents = self.get_default_progress_file_csv()
+ file.write(contents)
+ file.close()
+ elif 'append' in options or 'recalculate' in options:
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ if self.get_file_row(file_name) < 0 and 'append' in options:
+ self.progress_data.append(self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL))
+ elif 'recalculate' in options:
+ # The folder is hard coded
+ station_id = os.path.basename(file_name).split('.')[0]
+ folder_name = convert.get_base_folder(station_id)
+ if os.path.exists(folder_name):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ sensor_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
+ station_status = row_contents[self.INDEX_DATA_STATION_STATUS]
+ file_count = self.get_file_count(folder_name)
+ data_size = self.get_folder_size(folder_name)
+ self.progress_data[row] = self.get_progress_csv_row(file_name, sensor_status, station_status, file_count, data_size)
+ else:
+ self.progress_data[row] = self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
+ # Save file
+ self.close_progress_data(True)
+ self.reset()
+
+ def copy_to_n_partitions(self, save_path, partitions, base_paths, reset):
+ """Once the initial data has been generated, the data can be copied into a set number of partitions. """
+ if (len(base_paths) == 0):
+ return
+
+ # Initialize the partition paths.
+ partition_paths = get_disk_partition_paths(0, partitions, base_paths)
+ for path in partition_paths:
+ # Make sure the xml folder is available.
+ prepare_path(path, reset)
+
+ import fnmatch
+ import os
+
+ # copy stations and sensors into each partition
+ current_sensor_partition = 0
+ current_station_partition = 0
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ station_id = os.path.basename(file_name).split('.')[0]
+
+ # Copy sensor files
+ type = "sensors"
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id
+ for root, dirnames, filenames in os.walk(file_path):
+ for filename in fnmatch.filter(filenames, '*.xml'):
+ xml_path = os.path.join(root, filename)
+ new_file_base = build_base_save_folder(partition_paths[current_sensor_partition], station_id, type) + station_id
+ if not os.path.isdir(new_file_base):
+ os.makedirs(new_file_base)
+ shutil.copyfile(xml_path, new_file_base + "/" + filename)
+ current_sensor_partition += 1
+ if current_sensor_partition >= len(partition_paths):
+ current_sensor_partition = 0
+
+ # Copy station files
+ type = "stations"
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+ new_file_base = build_base_save_folder(partition_paths[current_station_partition], station_id, type)
+ new_file_path = new_file_base + station_id + ".xml"
+ if os.path.isfile(file_path):
+ if not os.path.isdir(new_file_base):
+ os.makedirs(new_file_base)
+ shutil.copyfile(file_path, new_file_path)
+ current_station_partition += 1
+ if current_station_partition >= len(partition_paths):
+ current_station_partition = 0
+
+ def build_to_n_partition_files(self, save_path, partitions, base_paths, reset):
+ """Once the initial data has been generated, the data can be divided into partitions
+ and stored in single files.
+ """
+ if (len(base_paths) == 0):
+ return
+
+ XML_START = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
+
+ partition_paths = get_disk_partition_paths(0, partitions, base_paths)
+
+ import fnmatch
+ import os
+
+ for path in partition_paths:
+ prepare_path(path, reset)
+
+ # Initialize the partition paths.
+ types = ["sensors", "stations"]
+ for type in types:
+ partition_files = []
+ for path in partition_paths:
+ # Make sure the xml folder is available.
+ prepare_path(path + type + "/", False)
+ partition_files.append(open(path + type + "/partition.xml", 'w'))
+ partition_files[-1].write(XML_START + "<" + self.LARGE_FILE_ROOT_TAG + ">\n")
+
+ # copy into each partition
+ current_partition = 0
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ station_id = os.path.basename(file_name).split('.')[0]
+
+ # Copy files
+ if type == "sensors":
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id
+ for root, dirnames, filenames in os.walk(file_path):
+ for filename in fnmatch.filter(filenames, '*.xml'):
+ xml_path = os.path.join(root, filename)
+ xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+ partition_files[current_partition].write(xml_data)
+ current_partition += 1
+ if current_partition >= len(partition_files):
+ current_partition = 0
+ elif type == "stations":
+ file_path = build_base_save_folder(save_path, station_id, type) + station_id + ".xml"
+ xml_path = os.path.join(root, file_path)
+ xml_data = file_get_contents(xml_path).replace(XML_START, "") + "\n"
+ partition_files[current_partition].write(xml_data)
+ current_partition += 1
+ if current_partition >= len(partition_paths):
+ current_partition = 0
+
+ for row in range(0, len(partition_paths)):
+ partition_files[row].write("</" + self.LARGE_FILE_ROOT_TAG + ">\n")
+ partition_files[row].close()
+
+ def get_file_row(self, file_name):
+ for i in range(0, len(self.progress_data)):
+ if self.progress_data[i].startswith(file_name):
+ return i
+ return -1
+
+ def get_default_progress_file_csv(self):
+ contents = ""
+ for path in self.get_file_list_iterator():
+ file_name = os.path.basename(path)
+ contents += self.get_progress_csv_row(file_name, self.DATA_FILE_INITIAL, self.DATA_FILE_INITIAL)
+ return contents
+
+ def print_progress_file_stats(self, convert):
+ sensor_count_missing = 0
+ sensor_count = 0
+ file_count = 0
+ data_size = 0
+
+ sensor_count_actual = 0
+ file_count_actual = 0
+ data_size_actual = 0
+
+ station_count_missing = 0
+ station_count_generated = 0
+ station_count_downloaded = 0
+
+ self.open_progress_data()
+ row_count = len(self.progress_data)
+ for row in range(0, row_count):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ if int(row_contents[self.INDEX_DATA_FILE_COUNT]) != -1 and int(row_contents[self.INDEX_DATA_FOLDER_DATA]) != -1:
+ sensor_count += 1
+ file_count += int(row_contents[self.INDEX_DATA_FILE_COUNT])
+ data_size += int(row_contents[self.INDEX_DATA_FOLDER_DATA])
+ else:
+ sensor_count_missing += 1
+
+ if row_contents[self.INDEX_DATA_STATION_STATUS] == "generated":
+ station_count_generated += 1
+ if row_contents[self.INDEX_DATA_STATION_STATUS] == "downloaded":
+ station_count_downloaded += 1
+ else:
+ station_count_missing += 1
+
+ file_name = row_contents[self.INDEX_DATA_FILE_NAME]
+ station_id = os.path.basename(file_name).split('.')[0]
+ folder_name = convert.get_base_folder(station_id)
+ if os.path.exists(folder_name):
+ sensor_count_actual += 1
+ file_count_actual += self.get_file_count(folder_name)
+ data_size_actual += self.get_folder_size(folder_name)
+
+
+ print "Progress File:\t" + self.progress_file_name + "\n"
+
+ print "CSV DETAILS OF PROCESSED SENSORS"
+ print "Number of stations:\t" + "{:,}".format(sensor_count)
+ print "Number of files:\t" + "{:,}".format(file_count)
+ print "Data size:\t\t" + "{:,}".format(data_size) + " Bytes\n"
+
+ print "CSV DETAILS OF unPROCESSED SENSORS"
+ print "Number of stations:\t" + "{:,}".format(sensor_count_missing) + "\n"
+
+ print "CSV DETAILS OF PROCESSED STATIONS"
+ print "Generated:\t\t" + "{:,}".format(station_count_generated)
+ print "Downloaded:\t\t" + "{:,}".format(station_count_downloaded)
+ print "Missing:\t\t" + "{:,}".format(station_count_missing) + "\n"
+
+ print "FOLDER DETAILS"
+ print "Number of stations:\t" + "{:,}".format(sensor_count_actual)
+ print "Number of files:\t" + "{:,}".format(file_count_actual)
+ print "Data size:\t\t" + "{:,}".format(data_size_actual) + " Bytes\n"
+
+
+ def get_progress_csv_row(self, file_name, sensors_status, station_status, file_count=-1, data_size=-1):
+ return file_name + self.SEPERATOR + sensors_status + self.SEPERATOR + station_status + self.SEPERATOR + str(file_count) + self.SEPERATOR + str(data_size) + "\n"
+
+ def update_file_sensor_status(self, file_name, sensors_status, file_count=-1, data_size=-1):
+ for row in range(0, len(self.progress_data)):
+ if self.progress_data[row].startswith(file_name):
+ station_status = self.progress_data[row].rsplit(self.SEPERATOR)[self.INDEX_DATA_STATION_STATUS]
+ self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
+ break
+
+ # Save the file
+ self.close_progress_data(True)
+
+ def update_file_station_status(self, file_name, station_status):
+ for row in range(0, len(self.progress_data)):
+ if self.progress_data[row].startswith(file_name):
+ row_contents = self.progress_data[row].rsplit(self.SEPERATOR)
+ sensors_status = row_contents[self.INDEX_DATA_SENSORS_STATUS]
+ file_count = int(row_contents[self.INDEX_DATA_FILE_COUNT])
+ data_size = int(row_contents[self.INDEX_DATA_FOLDER_DATA])
+ self.progress_data[row] = self.get_progress_csv_row(file_name, sensors_status, station_status, file_count, data_size)
+ break
+
+ # Save the file
+ self.close_progress_data(True)
+
+ def get_file_count(self, folder_name):
+ count = 0
+ for dirpath, dirnames, filenames in os.walk(folder_name):
+ for f in filenames:
+ count += 1
+ return count
+
+ def get_folder_size(self, folder_name):
+ total_size = 0
+ for dirpath, dirnames, filenames in os.walk(folder_name):
+ for f in filenames:
+ fp = os.path.join(dirpath, f)
+ total_size += os.path.getsize(fp)
+ return total_size
+
+ def get_station_status(self, return_value):
+ if return_value == 2:
+ return self.DATA_FILE_DOWNLOADED
+ elif return_value == 1:
+ return self.DATA_FILE_GENERATED
+ return self.DATA_FILE_MISSING
+
+
+ def open_progress_data(self):
+ with open(self.progress_file_name, 'r') as file:
+ self.progress_data = file.readlines()
+
+ def close_progress_data(self, force=False):
+ if len(self.progress_data) > 0 or force:
+ with open(self.progress_file_name, 'w') as file:
+ file.writelines(self.progress_data)
+
+
+ def reset(self):
+ self.close_progress_data()
+
+ self.current = self.DATA_FILE_START_INDEX
+ self.open_progress_data()
+
+ def set_type(self, type):
+ self.type = type
+
+ def set_data_reset(self, data_reset):
+ self.data_reset = data_reset
+
+
+ # Iterator Functions
+ def __iter__(self):
+ return self
+
+ def next(self):
+ columns = []
+ while True:
+ # find a row that has not been created.
+ if self.current >= len(self.progress_data):
+ raise StopIteration
+ row = self.progress_data[self.current]
+ self.current += 1
+ columns = row.rsplit(self.SEPERATOR)
+ if self.type == "sensor" and (columns[self.INDEX_DATA_SENSORS_STATUS].strip() != self.DATA_FILE_GENERATED or self.data_reset):
+ break
+ elif self.type == "station" and (columns[self.INDEX_DATA_STATION_STATUS].strip() != self.DATA_FILE_DOWNLOADED or self.data_reset):
+ break
+ return columns[self.INDEX_DATA_FILE_NAME]
+
+
+# Index values of each field details.
+PARTITION_INDEX_NODE = 0
+PARTITION_INDEX_DISK = 1
+PARTITION_INDEX_VIRTUAL = 2
+PARTITION_INDEX = 3
+PARTITION_INDEX_PATH = 4
+PARTITION_HEADER = ("Node", "Disk", "Virtual", "Index", "Path")
+
+def get_disk_partition_paths(node_id, partitions, base_paths, key="partitions"):
+ partition_paths = []
+ for scheme in get_disk_partition_scheme(node_id, partitions, base_paths, key):
+ partition_paths.append(scheme[PARTITION_INDEX_PATH])
+ return partition_paths
+
+def get_disk_partition_scheme(node_id, virtual_disk_partitions, base_paths, key="partitions"):
+ partition_scheme = []
+ for i in range(0, virtual_disk_partitions):
+ for j in range(0, len(base_paths)):
+ new_partition_path = base_paths[j] + key + "/" + get_partition_folder(j, virtual_disk_partitions, i) + "/"
+ partition_scheme.append((node_id, j, virtual_disk_partitions, i, new_partition_path))
+ return partition_scheme
+
+def get_partition_folder(disks, partitions, index):
+ return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
+
+def prepare_path(path, reset):
+ """Ensures the directory is available. If reset, then its a brand new directory."""
+ if os.path.isdir(path) and reset:
+ shutil.rmtree(path)
+
+ if not os.path.isdir(path):
+ os.makedirs(path)
+
+def file_get_contents(filename):
+ with open(filename) as f:
+ return f.read()
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
new file mode 100644
index 0000000..fb59b50
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os.path
+import shutil
+import tarfile
+import urllib
+import zipfile
+
+# Custom modules.
+from weather_config_ghcnd import *
+from weather_config_mshr import *
+
+class WeatherDownloadFiles:
+
+ def __init__(self, save_path):
+ self.save_path = save_path
+
+ if not os.path.isdir(save_path):
+ os.makedirs(save_path)
+
+
+ def download_ghcnd_files(self, reset=False):
+ """Download the complete list."""
+ for file_name in FILE_NAMES:
+ url = BASE_DOWNLOAD_URL + file_name
+ self.download_file(url, reset)
+
+ def download_mshr_files(self, reset=False):
+ for url in MSHR_URLS:
+ self.download_file(url, reset)
+
+ def download_file(self, url, reset=False):
+ """Download the file, unless it exists."""
+ file_name = self.save_path + "/" + url.split('/')[-1]
+
+ if not os.path.isfile(file_name) or reset:
+ print "Downloading: " + url
+ urllib.urlretrieve(url, file_name, report_download_status)
+ print
+
+ def unzip_ghcnd_package(self, package, reset=False):
+ """Unzip the package file, unless it exists."""
+ file_name = self.save_path + "/" + package + ".tar.gz"
+ unzipped_path = self.save_path + "/" + package
+
+ if os.path.isdir(unzipped_path) and reset:
+ shutil.rmtree(unzipped_path)
+
+ if not os.path.isdir(unzipped_path):
+ print "Unzipping: " + file_name
+ tar_file = tarfile.open(file_name, 'r:gz')
+ tar_file.extractall(unzipped_path)
+
+ def unzip_mshr_files(self, reset=False):
+ """Unzip the package file, unless it exists."""
+ for url in MSHR_URLS:
+ if url.endswith('.zip'):
+ file_name = self.save_path + "/" + url.split('/')[-1]
+ print "Unzipping: " + file_name
+ with zipfile.ZipFile(file_name, 'r') as myzip:
+ myzip.extractall(self.save_path)
+
+def report_download_status(count, block, size):
+ """Report download status."""
+ line_size = 50
+ erase = "\b" * line_size
+ sys.stdout.write(erase)
+ report = get_report_line((float(count) * block / size), line_size)
+ sys.stdout.write(report)
+
+def get_report_line(percentage, line_size):
+ """Creates a string to be used in reporting the percentage done."""
+ report = ""
+ for i in range(0, line_size):
+ if (float(i) / line_size < percentage):
+ report += "="
+ else:
+ report += "-"
+ return report
+
+def download_file_save_as(url, new_file_name, reset=False):
+ """Download the file, unless it exists."""
+ if not os.path.isfile(new_file_name) or reset:
+ print "Downloading: " + url
+ urllib.urlretrieve(url, new_file_name, report_download_status)
+ print
+
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/README.md
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/README.md b/vxquery-benchmark/src/main/resources/util/README.md
new file mode 100644
index 0000000..8e2a204
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/README.md
@@ -0,0 +1,28 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+Utilities for Benchmark Operations
+=====================
+
+# Introduction
+
+Helpful scripts or configuration document to work with the benchmarks.
+
+## Saxon Collection
+
+To test the data with other XQuery processors, the saxon script helps with
+creating a collection.xml file.
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
new file mode 100644
index 0000000..02f39ee
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/build_saxon_collection_xml.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import getopt, glob, os, sys
+
+def main(argv):
+ xml_folder = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:h", ["folder="])
+ except getopt.GetoptError:
+ print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python build_saxon_collection_xml.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to create collection XML file.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ xml_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+
+ # Required fields to run the script.
+ if xml_folder == "" or not os.path.exists(xml_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+
+ # find all XML files in folder
+ collection_xml = "<collection>"
+ for i in range(1, 5):
+ # Search the ith directory level.
+ search_pattern = xml_folder + ('/*' * i) + '.xml'
+ for file_path in glob.iglob(search_pattern):
+ collection_xml += '<doc href="' + str.replace(file_path, xml_folder, '') + '"/>'
+ collection_xml += "</collection>"
+
+ # create collection XML
+ file = open('collection.xml', 'w')
+ file.write(collection_xml)
+ file.close()
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/diff_xml_files.py b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
new file mode 100644
index 0000000..8ad2e30
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/diff_xml_files.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import getopt, glob, os, sys
+
+def main(argv):
+ f1 = ""
+ f2 = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "h", ["f1=", "f2="])
+ except getopt.GetoptError:
+ print 'The file options for build_saxon_collection_xml.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python build_saxon_collection_xml.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to create collection XML file.'
+ sys.exit()
+ elif opt in ('--f1'):
+ # check if file exists.
+ if os.path.exists(arg):
+ f1 = arg
+ else:
+ print 'Error: Argument must be a file name for --f1.'
+ sys.exit()
+ elif opt in ('--f2'):
+ # check if file exists.
+ if os.path.exists(arg):
+ f2 = arg
+ else:
+ print 'Error: Argument must be a file name for --f2.'
+ sys.exit()
+
+ # Required fields to run the script.
+ if f1 == "" or not os.path.exists(f1):
+ print 'Error: The file path option must be supplied: --f1.'
+ sys.exit()
+ if f2 == "" or not os.path.exists(f2):
+ print 'Error: The file path option must be supplied: --f2.'
+ sys.exit()
+
+ missing_in_f1 = []
+ missing_in_f2 = []
+ found_in_both = []
+
+ with open(f1) as f:
+ content_f1 = f.readlines()
+ set_f1 = set(content_f1)
+
+
+ with open(f2) as f:
+ content_f2 = f.readlines()
+ set_f2 = set(content_f2)
+
+ missing_in_f1 = set_f2.difference(set_f1)
+ missing_in_f2 = set_f1.difference(set_f2)
+ found_in_both = set_f1.intersection(set_f2)
+
+ print ""
+ print "Missing files in " + f1
+ for f1_name in missing_in_f1:
+ print " + " + f1_name.strip()
+
+ print ""
+ print "Missing files in " + f2
+ for f2_name in missing_in_f2:
+ print " + " + f2_name.strip()
+
+ offset = 40
+ print ""
+ print "XML Summary"
+ print (" - Found in both:").ljust(offset) + str(len(found_in_both))
+ print (" - " + f1 + " diff set vs list:").ljust(offset) + str(len(content_f1) - len(set_f1))
+ print (" - " + f2 + " diff set vs list:").ljust(offset) + str(len(content_f2) - len(set_f2))
+ print (" - " + f1 + " missing:").ljust(offset) + str(len(missing_in_f1))
+ print (" - " + f2 + " missing:").ljust(offset) + str(len(missing_in_f2))
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
new file mode 100644
index 0000000..1cd7939
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+import csv
+
+SEARCH_STRING = 'Average execution time:'
+
+def find_files(directory, pattern):
+ for root, dirs, files in os.walk(directory):
+ for basename in files:
+ if fnmatch.fnmatch(basename, pattern):
+ yield (root, basename)
+
+
+def main(argv):
+ ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+ '''
+ log_folder = ""
+ save_file = ""
+ data_type = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
+ except getopt.GetoptError:
+ print 'The file options for list_xml_files.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python list_xml_files.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to build XML file list.'
+ print ' -s The save file.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ log_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+ elif opt in ('-s', "--save_file"):
+ save_file = arg
+ elif opt in ('-t', "--data_type"):
+ data_type = arg
+
+ # Required fields to run the script.
+ if log_folder == "" or not os.path.exists(log_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+ if save_file == "":
+ print 'Error: The folder path option must be supplied: --save_file (-s).'
+ sys.exit()
+
+ list_xml_csv = ''
+ with open(save_file, 'w') as outfile:
+ csvfile = csv.writer(outfile)
+ for path, filename in find_files(log_folder, '*.log'):
+ # Only write out a specific type of data xml documents found in a specific path.
+ with open(path + "/" + filename) as infile:
+ folders = path.replace(log_folder, "")
+ for line in infile:
+ # Skip the root tags.
+ if line.startswith(SEARCH_STRING):
+ time_split = line.split(" ")
+ name_split = filename.split(".")
+ folder_split = folders.split("/")
+
+ # Build data row
+ row = folder_split
+ row.append(name_split[0])
+ row.append(time_split[3])
+ row.append(name_split[2])
+ csvfile.writerow(row)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/list_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/list_xml_files.py b/vxquery-benchmark/src/main/resources/util/list_xml_files.py
new file mode 100644
index 0000000..750a95e
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/list_xml_files.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+
+def find_files(directory, pattern):
+ for root, dirs, files in os.walk(directory):
+ for basename in files:
+ if fnmatch.fnmatch(basename, pattern):
+ yield (root, basename)
+
+
+def main(argv):
+ ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+ '''
+ xml_folder = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:h", ["folder="])
+ except getopt.GetoptError:
+ print 'The file options for list_xml_files.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python list_xml_files.py -h'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to build XML file list.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ xml_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+
+ # Required fields to run the script.
+ if xml_folder == "" or not os.path.exists(xml_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+
+ list_xml_csv = ''
+ for path, filename in find_files(xml_folder, '*.xml'):
+ list_xml_csv += filename + "\n"
+ #list_xml_csv += filename + "," + path + "\n"
+
+ # create collection XML
+ file = open('list_xml.csv', 'w')
+ file.write(list_xml_csv)
+ file.close()
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
new file mode 100644
index 0000000..2df026b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+
+XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + "\n"
+XML_SUFFIX = '</root>' + "\n"
+
+def find_files(directory, pattern):
+ for root, dirs, files in os.walk(directory, followlinks=True):
+ for basename in files:
+ if fnmatch.fnmatch(basename, pattern):
+ yield (root, basename)
+
+
+def main(argv):
+ ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv
+ '''
+ xml_folder = ""
+ save_file = ""
+ data_type = ""
+
+ # Get the base folder
+ try:
+ opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="])
+ except getopt.GetoptError:
+ print 'The file options for list_xml_files.py were not correctly specified.'
+ print 'To see a full list of options try:'
+ print ' $ python merge_xml_files.py -f /path/to/folder -s new.xml -t sensors'
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print 'Options:'
+ print ' -f The base folder to build XML file list.'
+ print ' -s The save file.'
+ sys.exit()
+ elif opt in ('-f', "--folder"):
+ # check if file exists.
+ if os.path.exists(arg):
+ xml_folder = arg
+ else:
+ print 'Error: Argument must be a folder name for --folder (-f).'
+ sys.exit()
+ elif opt in ('-s', "--save_file"):
+ save_file = arg
+ elif opt in ('-t', "--data_type"):
+ data_type = arg
+
+ # Required fields to run the script.
+ if xml_folder == "" or not os.path.exists(xml_folder):
+ print 'Error: The folder path option must be supplied: --folder (-f).'
+ sys.exit()
+ if save_file == "":
+ print 'Error: The folder path option must be supplied: --save_file (-s).'
+ sys.exit()
+
+ list_xml_csv = ''
+ with open(save_file, 'w') as outfile:
+ outfile.write(XML_PREFIX)
+ for path, filename in find_files(xml_folder, '*.xml'):
+ # Only write out a specific type of data xml documents found in a specific path.
+ if data_type in path:
+ with open(path + "/" + filename) as infile:
+ for line in infile:
+ # Skip the root tags.
+ if line != XML_PREFIX and line != XML_SUFFIX:
+ outfile.write(line)
+ outfile.write(XML_SUFFIX)
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
new file mode 100644
index 0000000..d0621eb
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/vxquery_functions.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Function List :)
+(: VXQuery function list in csv with arguments and return types :)
+let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml"
+let $r :=
+ for $f in fn:doc($list)/functions/function
+ let $pl :=
+ for $p in $f/param
+ return $p/@type
+ return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
+return fn:string-join($r , '|')
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
new file mode 100644
index 0000000..f485807
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/vxquery_operators.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License. :)
+
+(: XQuery Function List :)
+(: VXQuery function list in csv with arguments and return types :)
+let $list := "../../../../../vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml"
+let $r :=
+ for $f in fn:doc($list)/operators/operator
+ let $pl :=
+ for $p in $f/param
+ return $p/@type
+ return fn:string-join(($f/@name, fn:string-join($pl, ' '), $f/return/@type), ',')
+return fn:string-join($r , '|')
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
----------------------------------------------------------------------
diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
index 8451bd5..c0ca612 100644
--- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
+++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
@@ -375,6 +375,7 @@ public class VXQuery {
ccConfig.clientNetPort = 39000;
ccConfig.clusterNetIpAddress = "127.0.0.1";
ccConfig.clusterNetPort = 39001;
+ ccConfig.httpPort = 39002;
ccConfig.profileDumpPeriod = 10000;
cc = new ClusterControllerService(ccConfig);
cc.start();
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/pom.xml b/vxquery-server/pom.xml
index ef8f348..6c99712 100644
--- a/vxquery-server/pom.xml
+++ b/vxquery-server/pom.xml
@@ -47,6 +47,10 @@
<configuration>
<programs>
<program>
+ <mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass>
+ <name>vxqueryshutdown</name>
+ </program>
+ <program>
<mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
<name>vxquerycc</name>
</program>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_actions.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_actions.py b/vxquery-server/src/main/resources/scripts/cluster_actions.py
index a7cda17..deeee33 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_actions.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_actions.py
@@ -62,6 +62,10 @@ class ClusterActions:
time.sleep(5)
self.start_all_ncs()
+ def stop_cluster(self):
+ machine = self.ci.get_master_node_machine()
+ self.stop_cc_and_all_ncs(machine)
+
def stop(self):
self.stop_all_ncs()
time.sleep(2)
@@ -109,16 +113,22 @@ class ClusterActions:
def start_cc(self, machine):
print "Start Cluster Controller."
- print " " + machine.get_id() + " " + machine.get_ip() + ":" + machine.get_port()
- command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_ip() + " \"" + machine.get_port() + "\" \"" + machine.get_java_opts() + "\""
+ print " " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port()
+ command = "./vxquery-server/target/appassembler/bin/startcc.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
self.run_remote_command(machine.get_username(), machine.get_id(), command)
def start_nc(self, machine, cc):
print "Start Node Controller."
print " " + machine.get_id() + " " + machine.get_ip()
- command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_ip() + " \"" + cc.get_port() + "\" \"" + machine.get_java_opts() + "\""
+ command = "./vxquery-server/target/appassembler/bin/startnc.sh " + machine.get_id() + " " + machine.get_ip() + " " + cc.get_client_ip() + " \"" + cc.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
self.run_remote_command(machine.get_username(), machine.get_id(), command)
+ def stop_cc_and_all_ncs(self, machine):
+ print "Stop Cluster and Node Controllers."
+ print " " + machine.get_id() + " " + machine.get_client_ip() + ":" + machine.get_client_port()
+ command = "./vxquery-server/target/appassembler/bin/stopcluster.sh " + machine.get_client_ip() + " \"" + machine.get_client_port() + "\" \"" + machine.get_java_opts() + "\""
+ self.run_remote_command(machine.get_username(), machine.get_id(), command)
+
def stop_cc(self, machine):
print "Stop Cluster Controller."
print " " + machine.get_id() + " " + machine.get_ip()
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_cli.py b/vxquery-server/src/main/resources/scripts/cluster_cli.py
index 089ad08..bd5efa6 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_cli.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_cli.py
@@ -39,10 +39,10 @@ def main(argv):
sys.exit()
elif opt in ('-a', "--action"):
# check if file exists.
- if arg in ('deploy', 'start', 'stop'):
+ if arg in ('deploy', 'start', 'stop', 'kill'):
action = arg
else:
- print 'Error: Argument must be a string ("deploy", "start", or "stop") for --action (-a).'
+ print 'Error: Argument must be a string ("deploy", "start", "stop", or "kill") for --action (-a).'
sys.exit()
elif opt in ('-c', "--cluster"):
# check if file exists.
@@ -72,6 +72,8 @@ def main(argv):
if action == 'start':
cluster.start()
elif action == 'stop':
+ cluster.stop_cluster()
+ elif action == 'kill':
cluster.stop()
elif action == 'deploy':
if deploy_path != "":
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/cluster_information.py
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/cluster_information.py b/vxquery-server/src/main/resources/scripts/cluster_information.py
index 677204b..94b231d 100644
--- a/vxquery-server/src/main/resources/scripts/cluster_information.py
+++ b/vxquery-server/src/main/resources/scripts/cluster_information.py
@@ -32,12 +32,13 @@ class ClusterInformation:
master_node = self.config.getElementsByTagName("master_node")[0]
id = NodeXmlReader.get_cluster_id(master_node)
ip = NodeXmlReader.get_cluster_ip(master_node)
- port = NodeXmlReader.get_cluster_port(master_node)
+ client_ip = NodeXmlReader.get_client_ip(master_node)
+ client_port = NodeXmlReader.get_client_port(master_node)
java_opts = NodeXmlReader.get_java_opts(master_node)
if java_opts is "":
java_opts = self.get_java_opts()
username = self.get_username()
- return Machine(id, ip, username, port, java_opts)
+ return Machine(id, ip, username, client_ip, client_port, java_opts)
def get_node_machine_list(self):
nodes = []
@@ -48,7 +49,7 @@ class ClusterInformation:
java_opts = NodeXmlReader.get_java_opts(node)
if java_opts is "":
java_opts = self.get_java_opts()
- nodes.append(Machine(id, ip, username, "", java_opts))
+ nodes.append(Machine(id, ip, username, "", "", java_opts))
return nodes
class NodeXmlReader(object):
@@ -64,8 +65,12 @@ class NodeXmlReader(object):
return get_tag_text(node, "cluster_ip")
@staticmethod
- def get_cluster_port(node):
- return get_tag_text(node, "cluster_port")
+ def get_client_ip(node):
+ return get_tag_text(node, "client_ip")
+
+ @staticmethod
+ def get_client_port(node):
+ return get_tag_text(node, "client_port")
@staticmethod
def get_java_opts(node):
@@ -90,11 +95,12 @@ class Machine:
log_path = ""
port = ""
- def __init__(self, id, ip, username, port="", java_opts=""):
+ def __init__(self, id, ip, username, client_ip="", client_port="", java_opts=""):
self.id = id
self.ip = ip
self.username = username
- self.port = port
+ self.client_ip = client_ip
+ self.client_port = client_port
self.java_opts = java_opts
def get_id(self):
@@ -106,8 +112,11 @@ class Machine:
def get_java_opts(self):
return self.java_opts
- def get_port(self):
- return self.port
+ def get_client_ip(self):
+ return self.client_ip
+
+ def get_client_port(self):
+ return self.client_port
def get_username(self):
return self.username
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startcc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/startcc.sh b/vxquery-server/src/main/resources/scripts/startcc.sh
index 002055c..dd1e5be 100755
--- a/vxquery-server/src/main/resources/scripts/startcc.sh
+++ b/vxquery-server/src/main/resources/scripts/startcc.sh
@@ -23,7 +23,7 @@ CCHOST=$1
CCPORT=$2
J_OPTS=$3
-#Export JAVA_HOME
+# Export JAVA_HOME
export JAVA_HOME=${JAVA_HOME}
# java opts added parameters
@@ -43,8 +43,8 @@ mkdir -p ${CCLOGS_DIR}
CC_OPTIONS=" -client-net-ip-address ${CCHOST} -cluster-net-ip-address ${CCHOST} "
if [ ! -z "${CCPORT}" ]
then
- CC_OPTIONS=" ${CC_OPTIONS} -cluster-net-port ${CCPORT} "
+ CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} "
fi
-#Launch hyracks cc script without toplogy
+# Launch hyracks cc script without toplogy
${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerycc ${CC_OPTIONS} &> ${CCLOGS_DIR}/cc_$(date +%Y%m%d%H%M).log &
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/startnc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/startnc.sh b/vxquery-server/src/main/resources/scripts/startnc.sh
index c2bda3c..260512e 100755
--- a/vxquery-server/src/main/resources/scripts/startnc.sh
+++ b/vxquery-server/src/main/resources/scripts/startnc.sh
@@ -25,7 +25,7 @@ CCHOST=$3
CCPORT=$4
J_OPTS=$5
-#Set JAVA_HOME
+# Set JAVA_HOME
export JAVA_HOME=$JAVA_HOME
# java opts added parameters
@@ -49,5 +49,5 @@ then
fi
-#Launch hyracks nc
+# Launch hyracks nc
${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxquerync ${NC_OPTIONS} &> ${NCLOGS_DIR}/nc_$(date +%Y%m%d%H%M).log &
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcc.sh b/vxquery-server/src/main/resources/scripts/stopcc.sh
index 3290ec6..f2b6883 100755
--- a/vxquery-server/src/main/resources/scripts/stopcc.sh
+++ b/vxquery-server/src/main/resources/scripts/stopcc.sh
@@ -21,8 +21,7 @@ hostname
USER=$1
-#Kill process
-#Kill process
+# Kill process
PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerycc'|awk '{print $2}'`
if [ "$PID" == "" ]; then
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopcluster.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopcluster.sh b/vxquery-server/src/main/resources/scripts/stopcluster.sh
new file mode 100755
index 0000000..238da7f
--- /dev/null
+++ b/vxquery-server/src/main/resources/scripts/stopcluster.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+CCHOST=$1
+CCPORT=$2
+J_OPTS=$3
+
+# Export JAVA_HOME
+export JAVA_HOME=${JAVA_HOME}
+
+# java opts added parameters
+if [ ! -z "${J_OPTS}" ]
+then
+ JAVA_OPTS="${JAVA_OPTS} ${J_OPTS}"
+ export JAVA_OPTS
+fi
+
+VXQUERY_HOME=`pwd`
+CCLOGS_DIR=${VXQUERY_HOME}/logs
+
+# logs dir
+mkdir -p ${CCLOGS_DIR}
+
+# Set up the options for the cc.
+CC_OPTIONS=" -client-net-ip-address ${CCHOST} "
+if [ ! -z "${CCPORT}" ]
+then
+ CC_OPTIONS=" ${CC_OPTIONS} -client-net-port ${CCPORT} "
+fi
+
+# Launch hyracks cc script without toplogy
+echo "${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &"
+${VXQUERY_HOME}/vxquery-server/target/appassembler/bin/vxqueryshutdown ${CC_OPTIONS} &> ${CCLOGS_DIR}/shutdown_$(date +%Y%m%d%H%M).log &
http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-server/src/main/resources/scripts/stopnc.sh
----------------------------------------------------------------------
diff --git a/vxquery-server/src/main/resources/scripts/stopnc.sh b/vxquery-server/src/main/resources/scripts/stopnc.sh
index 56ffc66..8f29de5 100755
--- a/vxquery-server/src/main/resources/scripts/stopnc.sh
+++ b/vxquery-server/src/main/resources/scripts/stopnc.sh
@@ -21,7 +21,7 @@ hostname
USER=$1
-#Kill process
+# Kill process
PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=vxquerync'|awk '{print $2}'`
if [ "$PID" == "" ]; then