You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/03/12 21:14:44 UTC

[1/5] git commit: Updated the generated station xml to mirror web service.

Repository: incubator-vxquery
Updated Branches:
  refs/heads/prestonc/benchmarks_staging 1e7880caf -> eaed030b6


Updated the generated station xml to mirror web service.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/9456d4ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/9456d4ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/9456d4ad

Branch: refs/heads/prestonc/benchmarks_staging
Commit: 9456d4ad6074b0c1b60a2105395a84a12885fd80
Parents: 1e7880c
Author: Preston Carman <pr...@apache.org>
Authored: Wed Mar 5 21:56:13 2014 -0800
Committer: Preston Carman <pr...@apache.org>
Committed: Wed Mar 5 21:56:13 2014 -0800

----------------------------------------------------------------------
 .../noaa-ghcn-daily/scripts/weather_cli.py      |   6 +-
 .../scripts/weather_config_ghcnd.py             |  96 ++++++++++
 .../scripts/weather_config_mshr.py              |  78 ++++++++
 .../scripts/weather_convert_to_xml.py           | 183 +++++++++++++++++--
 .../scripts/weather_data_files.py               |   2 +-
 .../scripts/weather_dly_config.py               |  96 ----------
 .../scripts/weather_download_files.py           |  33 +++-
 7 files changed, 374 insertions(+), 120 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 103c0d1..8d18607 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -127,10 +127,12 @@ def main(argv):
     if section in ("all", "download"):
         print 'Processing the download section.'
         download = WeatherDownloadFiles(download_path)
-        download.download_all_files(reset)
+        download.download_ghcnd_files(reset)
+        download.download_mshr_files(reset)
 
         # Unzip the required file.
-        download.unzip_package(config.get_package(), reset)
+        download.unzip_ghcnd_package(config.get_package(), reset)
+        download.unzip_mshr_files(reset)
 
 
     # Create some basic paths for save files and references.

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
new file mode 100644
index 0000000..801e748
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Base URL used to get all the required files.
+BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
+
+# List of required files for a build.
+FILE_NAMES = []
+FILE_NAMES.append('ghcnd-countries.txt')
+FILE_NAMES.append('ghcnd-inventory.txt')
+FILE_NAMES.append('ghcnd-states.txt')
+FILE_NAMES.append('ghcnd-stations.txt')
+FILE_NAMES.append('ghcnd-version.txt')
+FILE_NAMES.append('ghcnd_all.tar.gz')
+FILE_NAMES.append('ghcnd_gsn.tar.gz')
+FILE_NAMES.append('ghcnd_hcn.tar.gz')
+FILE_NAMES.append('readme.txt')
+FILE_NAMES.append('status.txt')
+
+# Store the row details here.
+
+# Index values of each field details.
+FIELD_INDEX_NAME = 0
+FIELD_INDEX_START = 1
+FIELD_INDEX_END = 2
+FIELD_INDEX_TYPE = 3
+
+DLY_FIELD_ID = 0
+DLY_FIELD_YEAR = 1
+DLY_FIELD_MONTH = 2
+DLY_FIELD_ELEMENT = 3
+
+DLY_FIELD_DAY_OFFSET = 4
+DLY_FIELD_DAY_FIELDS = 4
+
+DLY_FIELDS = []
+
+# Details about the row.
+DLY_FIELDS.append(['ID', 1, 11, 'Character'])
+DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
+DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
+DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
+
+# Days in each row.
+for i in range(1, 32):
+    start = 22 + ((i - 1) * 8)
+    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
+    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 'Character'])
+    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 'Character'])
+    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 'Character'])
+
+# Details about the row.
+STATIONS_FIELDS = {}
+STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
+STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
+STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
+STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
+STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
+STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
+
+# Details about the row.
+COUNTRIES_FIELDS = {}
+COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+STATES_FIELDS = {}
+STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+INVENTORY_FIELDS = []
+INVENTORY_FIELDS.append(['ID', 1, 11, 'Character'])
+INVENTORY_FIELDS.append(['LATITUDE', 13, 20, 'Real'])
+INVENTORY_FIELDS.append(['LONGITUDE', 22, 30, 'Real'])
+INVENTORY_FIELDS.append(['ELEMENT', 32, 35, 'Character'])
+INVENTORY_FIELDS.append(['FIRSTYEAR', 37, 40, 'Integer'])
+INVENTORY_FIELDS.append(['LASTYEAR', 42, 45, 'Integer'])
+

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
new file mode 100644
index 0000000..7b1434f
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# List of required files for a build.
+MSHR_URLS = []
+MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
+MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
+
+# Index values of each field details.
+MSHR_FIELD_INDEX_NAME = 0
+MSHR_FIELD_INDEX_START = 1
+MSHR_FIELD_INDEX_END = 2
+MSHR_FIELD_INDEX_TYPE = 3
+
+# Store the row details here.
+MSHR_FIELDS = {}
+
+# Details about the row.
+MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
+MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
+MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
+MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
+MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
+MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
+MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
+MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
+MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
+MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
+MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
+MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
+MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
+MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
+MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
+MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 'X(30)']
+MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
+MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
+MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
+MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
+MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
+MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
+MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
+MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
+MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
+MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
+MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
+MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
+MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
+MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
+MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
+MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
+MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
+MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
+MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
+MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
+MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
+MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
+MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
+MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
+MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
+MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
+MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
+MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
index 36aff16..1aee4a7 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -18,13 +18,75 @@ import textwrap
 from datetime import date
 import os
 import gzip
+from collections import OrderedDict
 
 # Custom modules.
-from weather_dly_config import *
+from weather_config_ghcnd import *
+from weather_config_mshr import *
 from weather_download_files import *
 
 class WeatherConvertToXML:
     
+    STATES = OrderedDict({
+        'AK': 'Alaska',
+        'AL': 'Alabama',
+        'AR': 'Arkansas',
+        'AS': 'American Samoa',
+        'AZ': 'Arizona',
+        'CA': 'California',
+        'CO': 'Colorado',
+        'CT': 'Connecticut',
+        'DC': 'District of Columbia',
+        'DE': 'Delaware',
+        'FL': 'Florida',
+        'GA': 'Georgia',
+        'GU': 'Guam',
+        'HI': 'Hawaii',
+        'IA': 'Iowa',
+        'ID': 'Idaho',
+        'IL': 'Illinois',
+        'IN': 'Indiana',
+        'KS': 'Kansas',
+        'KY': 'Kentucky',
+        'LA': 'Louisiana',
+        'MA': 'Massachusetts',
+        'MD': 'Maryland',
+        'ME': 'Maine',
+        'MI': 'Michigan',
+        'MN': 'Minnesota',
+        'MO': 'Missouri',
+        'MP': 'Northern Mariana Islands',
+        'MS': 'Mississippi',
+        'MT': 'Montana',
+        'NA': 'National',
+        'NC': 'North Carolina',
+        'ND': 'North Dakota',
+        'NE': 'Nebraska',
+        'NH': 'New Hampshire',
+        'NJ': 'New Jersey',
+        'NM': 'New Mexico',
+        'NV': 'Nevada',
+        'NY': 'New York',
+        'OH': 'Ohio',
+        'OK': 'Oklahoma',
+        'OR': 'Oregon',
+        'PA': 'Pennsylvania',
+        'PR': 'Puerto Rico',
+        'RI': 'Rhode Island',
+        'SC': 'South Carolina',
+        'SD': 'South Dakota',
+        'TN': 'Tennessee',
+        'TX': 'Texas',
+        'UT': 'Utah',
+        'VA': 'Virginia',
+        'VI': 'Virgin Islands',
+        'VT': 'Vermont',
+        'WA': 'Washington',
+        'WI': 'Wisconsin',
+        'WV': 'West Virginia',
+        'WY': 'Wyoming'
+    })
+    
     MONTHS = [
         "January",
         "February",
@@ -51,6 +113,9 @@ class WeatherConvertToXML:
         self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
         self.ghcnd_states = base_path + '/ghcnd-states.txt'
         self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
+
+        # MSHR support files.
+        self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
         
     def set_token(self, token):
         self.token = token
@@ -109,7 +174,7 @@ class WeatherConvertToXML:
         row = file_stream.readline()
         return self.process_station_data(row)
 
-    def process_sensor_file(self, file_name, max_files, sensor_max = 99):
+    def process_sensor_file(self, file_name, max_files, sensor_max=99):
         print "Processing sensor file: " + file_name
         file_stream = open(file_name, 'r')
     
@@ -164,17 +229,30 @@ class WeatherConvertToXML:
                 <credit_URL>http://www.ncdc.noaa.gov/</credit_URL>
             """)
     
-    def default_xml_web_service_start(self, total_records):
+    def default_xml_web_service_start(self):
         field_xml = ""
         field_xml += "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
+        return field_xml
+    
+    def default_xml_data_start(self, total_records):
+        field_xml = ""
         field_xml += "<dataCollection pageCount=\"1\" totalCount=\"" + str(total_records) + "\">\n"
         return field_xml
     
+    def default_xml_station_start(self):
+        field_xml = ""
+        field_xml = "<stationCollection pageSize=\"100\" pageCount=\"1\" totalCount=\"1\">\n"
+        return field_xml
+    
     def default_xml_field_date(self, report_date, indent=2):
         field_xml = ""
         field_xml += self.get_indent_space(indent) + "<date>" + str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2) + "T00:00:00.000</date>\n"
         return field_xml
     
+    def get_date_from_field(self, row, field):
+        report_date = self.get_field_from_definition(row, field)
+        return str(report_date.year) + "-" + str(report_date.month).zfill(2) + "-" + str(report_date.day).zfill(2)
+    
     def default_xml_field_date_iso8601(self, report_date):
         field_xml = ""
         field_xml += "    <observation_date>" + self.MONTHS[report_date.month - 1] + " " + str(report_date.day) + ", " + str(report_date.year) + "</observation_date>\n"
@@ -241,6 +319,72 @@ class WeatherConvertToXML:
     
         return field_xml
     
+    def default_xml_mshr_station_additional(self, station_id):
+        """The web service station data is generate from the MSHR data supplemented with GHCN-Daily."""
+        station_mshr_row = ""
+        stations_mshr_file = open(self.mshr_stations, 'r')
+        for line in stations_mshr_file:
+            if station_id == self.get_field_from_definition(line, MSHR_FIELDS['GHCND_ID']).strip():
+                station_mshr_row = line
+                break
+        
+        if station_mshr_row == "":
+            return ""
+
+        additional_xml = ""
+
+        county = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['COUNTY']).strip()
+        if county != "":
+            additional_xml += self.default_xml_location_labels("CNTY", "FIPS:-9999", county)
+            
+        country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
+        country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
+        if country_code != "" and country_name != "":
+            additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:"+country_code, country_name)
+        
+        return additional_xml
+
+    def default_xml_location_labels(self, type, id, display_name):
+        label_xml = ""
+        label_xml += self.default_xml_start_tag("locationLabels", 2)
+        label_xml += self.default_xml_element("type", type, 3)
+        label_xml += self.default_xml_element("id", id, 3)
+        label_xml += self.default_xml_element("displayName", display_name, 3)
+        label_xml += self.default_xml_end_tag("locationLabels", 2)
+        return label_xml
+        
+
+    def default_xml_web_service_station(self, station_id):
+        """The web service station data is generate from available historical sources."""
+        station_ghcnd_row = ""
+        stations_ghcnd_file = open(self.ghcnd_stations, 'r')
+        for line in stations_ghcnd_file:
+            if station_id == self.get_field_from_definition(line, STATIONS_FIELDS['ID']):
+                station_ghcnd_row = line
+                break
+    
+        xml_station = ""
+        xml_station += self.default_xml_start_tag("station", 1)
+        
+        xml_station += self.default_xml_element("id", "GHCND:" + station_id, 2)
+        xml_station += self.default_xml_element("displayName", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['NAME']).strip(), 2)
+        xml_station += self.default_xml_element("latitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LATITUDE']).strip(), 2)
+        xml_station += self.default_xml_element("longitude", self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['LONGITUDE']).strip(), 2)
+        
+        elevation = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['ELEVATION']).strip()
+        if elevation != "-999.9":
+            xml_station += self.default_xml_element("elevation", elevation, 2)
+        
+        state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip()
+        if state_code != "":
+            xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code])
+        
+        # Add the MSHR data to the station generated information.
+        xml_station += self.default_xml_mshr_station_additional(station_id)
+            
+        xml_station += self.default_xml_end_tag("station", 1)
+        return xml_station
+        
     def default_xml_day_reading_as_field(self, row, day):
         day_index = DLY_FIELD_DAY_OFFSET + ((day - 1) * DLY_FIELD_DAY_FIELDS)
         value = self.get_dly_field(row, day_index);
@@ -306,8 +450,14 @@ class WeatherConvertToXML:
         return textwrap.dedent("""\
             </ghcnd_observation>""")
 
-    def default_xml_web_service_end(self):
-        return "</dataCollection>"
+    def default_xml_data_end(self):
+        return self.default_xml_end_tag("dataCollection", 0)
+
+    def default_xml_station_end(self):
+        return self.default_xml_end_tag("stationCollection", 0)
+
+    def default_xml_element(self, tag, data, indent=1):
+        return self.get_indent_space(indent) + "<" + tag + ">" + data + "</" + tag + ">\n"
 
     def default_xml_start_tag(self, tag, indent=1):
         return self.get_indent_space(indent) + "<" + tag + ">\n"
@@ -434,9 +584,11 @@ class WeatherMonthlyXMLFile(WeatherConvertToXML):
             return 0
 
 class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
+    """The web service class details how to create files similar to the NOAA web service."""
     skip_downloading = False
     # Station data
     def process_station_data(self, row):
+        """Adds a single station record file either from downloading the data or generating a similar record."""
         station_id = self.get_dly_field(row, DLY_FIELD_ID)
         download = 0
         if self.token is not "" and not self.skip_downloading:
@@ -444,15 +596,20 @@ class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
             if download == 0:
                 self.skip_downloading = True
         
-        # If not downloaded generate.
+        # If not downloaded, generate.
         if download != 0:
             return download
         else:
             # Information for each daily file.
-            station_xml_file = self.default_xml_start()
-            station_xml_file += self.default_xml_field_station(station_id)
-            station_xml_file += self.default_xml_end()
+            station_xml_file = self.default_xml_web_service_start()
+            station_xml_file += self.default_xml_station_start()
+            station_xml_file += self.default_xml_web_service_station(station_id)
+            station_xml_file += self.default_xml_station_end()
             
+            # Remove white space.
+            station_xml_file = station_xml_file.replace("\n", "");
+            station_xml_file = station_xml_file.replace(self.get_indent_space(1), "");
+
             # Make sure the station folder is available.
             ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
             if not os.path.isdir(ghcnd_xml_station_path):
@@ -470,9 +627,10 @@ class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
                 return 0
 
     # Station data
-    def download_station_data(self, station_id, token, reset = False):
+    def download_station_data(self, station_id, token, reset=False):
+        """Downloads the station data from the web service."""
         import time
-        time.sleep(10)
+        time.sleep(2)
         # Make sure the station folder is available.
         ghcnd_xml_station_path = self.get_base_folder(station_id, "stations")
         if not os.path.isdir(ghcnd_xml_station_path):
@@ -506,6 +664,7 @@ class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
 
     # Sensor data
     def process_one_month_sensor_set(self, records, page):
+        """Generates records for a station using the web service xml layout."""
         found_data = False        
         year = int(self.get_dly_field(records[0], DLY_FIELD_YEAR))
         month = int(self.get_dly_field(records[0], DLY_FIELD_MONTH))
@@ -535,7 +694,7 @@ class WeatherWebServiceMonthlyXMLFile(WeatherConvertToXML):
             except ValueError:
                 pass
 
-        daily_xml_file = self.default_xml_web_service_start(count) + daily_xml_file + self.default_xml_web_service_end()
+        daily_xml_file = self.default_xml_web_service_start() + self.default_xml_data_start(count) + daily_xml_file + self.default_xml_data_end()
         daily_xml_file = daily_xml_file.replace("\n", "");
         daily_xml_file = daily_xml_file.replace(self.get_indent_space(1), "");
 

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index 8e26e99..da2afcc 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -92,8 +92,8 @@ class WeatherDataFiles:
             self.close_progress_data(True)
         self.reset()
         
-    # Once the initial data has been generated, the data can be copied into a set number of partitions. 
     def copy_to_n_partitions(self, save_path, partitions, base_paths=[]):
+        """Once the initial data has been generated, the data can be copied into a set number of partitions. """
         if (len(base_paths) == 0):
             return
         

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py
deleted file mode 100644
index 801e748..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_dly_config.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Base URL used to get all the required files.
-BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
-
-# List of required files for a build.
-FILE_NAMES = []
-FILE_NAMES.append('ghcnd-countries.txt')
-FILE_NAMES.append('ghcnd-inventory.txt')
-FILE_NAMES.append('ghcnd-states.txt')
-FILE_NAMES.append('ghcnd-stations.txt')
-FILE_NAMES.append('ghcnd-version.txt')
-FILE_NAMES.append('ghcnd_all.tar.gz')
-FILE_NAMES.append('ghcnd_gsn.tar.gz')
-FILE_NAMES.append('ghcnd_hcn.tar.gz')
-FILE_NAMES.append('readme.txt')
-FILE_NAMES.append('status.txt')
-
-# Store the row details here.
-
-# Index values of each field details.
-FIELD_INDEX_NAME = 0
-FIELD_INDEX_START = 1
-FIELD_INDEX_END = 2
-FIELD_INDEX_TYPE = 3
-
-DLY_FIELD_ID = 0
-DLY_FIELD_YEAR = 1
-DLY_FIELD_MONTH = 2
-DLY_FIELD_ELEMENT = 3
-
-DLY_FIELD_DAY_OFFSET = 4
-DLY_FIELD_DAY_FIELDS = 4
-
-DLY_FIELDS = []
-
-# Details about the row.
-DLY_FIELDS.append(['ID', 1, 11, 'Character'])
-DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
-DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
-DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
-
-# Days in each row.
-for i in range(1, 32):
-    start = 22 + ((i - 1) * 8)
-    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
-    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 'Character'])
-    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 'Character'])
-    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 'Character'])
-
-# Details about the row.
-STATIONS_FIELDS = {}
-STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
-STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
-STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
-STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
-STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
-STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
-
-# Details about the row.
-COUNTRIES_FIELDS = {}
-COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-STATES_FIELDS = {}
-STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-INVENTORY_FIELDS = []
-INVENTORY_FIELDS.append(['ID', 1, 11, 'Character'])
-INVENTORY_FIELDS.append(['LATITUDE', 13, 20, 'Real'])
-INVENTORY_FIELDS.append(['LONGITUDE', 22, 30, 'Real'])
-INVENTORY_FIELDS.append(['ELEMENT', 32, 35, 'Character'])
-INVENTORY_FIELDS.append(['FIRSTYEAR', 37, 40, 'Integer'])
-INVENTORY_FIELDS.append(['LASTYEAR', 42, 45, 'Integer'])
-

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/9456d4ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
index 87adb11..fb59b50 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
@@ -19,9 +19,11 @@ import os.path
 import shutil
 import tarfile
 import urllib
+import zipfile
 
 # Custom modules.
-from weather_dly_config import *
+from weather_config_ghcnd import *
+from weather_config_mshr import *
 
 class WeatherDownloadFiles:
 
@@ -32,14 +34,18 @@ class WeatherDownloadFiles:
             os.makedirs(save_path)
 
 
-    # Download the complete list
-    def download_all_files(self, reset=False):
+    def download_ghcnd_files(self, reset=False):
+        """Download the complete list."""
         for file_name in FILE_NAMES:
             url = BASE_DOWNLOAD_URL + file_name
             self.download_file(url, reset)
 
-    # Download the file, unless it exists.
+    def download_mshr_files(self, reset=False):
+        for url in MSHR_URLS:
+            self.download_file(url, reset)
+
     def download_file(self, url, reset=False):
+        """Download the file, unless it exists."""
         file_name = self.save_path + "/" + url.split('/')[-1]
 
         if not os.path.isfile(file_name) or reset:
@@ -47,8 +53,8 @@ class WeatherDownloadFiles:
             urllib.urlretrieve(url, file_name, report_download_status)
             print
 
-    # Unzip the package file, unless it exists.
-    def unzip_package(self, package, reset=False):
+    def unzip_ghcnd_package(self, package, reset=False):
+        """Unzip the package file, unless it exists."""
         file_name = self.save_path + "/" + package + ".tar.gz"
         unzipped_path = self.save_path + "/" + package
         
@@ -60,16 +66,25 @@ class WeatherDownloadFiles:
             tar_file = tarfile.open(file_name, 'r:gz')
             tar_file.extractall(unzipped_path)
  
-# Report download status.
+    def unzip_mshr_files(self, reset=False):
+        """Unzip the package file, unless it exists."""
+        for url in MSHR_URLS:
+            if url.endswith('.zip'):
+                file_name = self.save_path + "/" + url.split('/')[-1]
+                print "Unzipping: " + file_name
+                with zipfile.ZipFile(file_name, 'r') as myzip:
+                    myzip.extractall(self.save_path)
+ 
 def report_download_status(count, block, size):
+    """Report download status."""
     line_size = 50
     erase = "\b" * line_size
     sys.stdout.write(erase)
     report = get_report_line((float(count) * block / size), line_size)
     sys.stdout.write(report)
 
-# Creates a string to be used in reporting the percentage done.
 def get_report_line(percentage, line_size):
+    """Creates a string to be used in reporting the percentage done."""
     report = ""
     for i in range(0, line_size):
         if (float(i) / line_size < percentage):
@@ -78,8 +93,8 @@ def get_report_line(percentage, line_size):
             report += "-"
     return report
             
-# Download the file, unless it exists.
 def download_file_save_as(url, new_file_name, reset=False):
+    """Download the file, unless it exists."""
     if not os.path.isfile(new_file_name) or reset:
         print "Downloading: " + url
         urllib.urlretrieve(url, new_file_name, report_download_status)


[5/5] git commit: Improve search for functions based on things I learned from join.

Posted by pr...@apache.org.
Improve search for functions based on things I learned from join.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/eaed030b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/eaed030b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/eaed030b

Branch: refs/heads/prestonc/benchmarks_staging
Commit: eaed030b6808c02f45192746c1d753cdc13ad1fe
Parents: ca8b460
Author: Preston Carman <pr...@apache.org>
Authored: Wed Mar 12 13:12:27 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Wed Mar 12 13:12:27 2014 -0700

----------------------------------------------------------------------
 .../rewriter/rules/util/ExpressionToolbox.java   | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/eaed030b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
index 78ceeb7..d674f76 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
@@ -18,8 +18,11 @@ package org.apache.vxquery.compiler.rewriter.rules.util;
 
 import java.util.List;
 
+import javax.xml.namespace.QName;
+
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.vxquery.compiler.algebricks.VXQueryConstantValue;
+import org.apache.vxquery.context.StaticContext;
 import org.apache.vxquery.context.StaticContextImpl;
 import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
 import org.apache.vxquery.functions.BuiltinFunctions;
@@ -78,7 +81,8 @@ public class ExpressionToolbox {
         return null;
     }
 
-    public static void findVariableExpressions(Mutable<ILogicalExpression> mutableLe, List<Mutable<ILogicalExpression>> finds) {
+    public static void findVariableExpressions(Mutable<ILogicalExpression> mutableLe,
+            List<Mutable<ILogicalExpression>> finds) {
         ILogicalExpression le = mutableLe.getValue();
         if (le.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
             finds.add(mutableLe);
@@ -144,14 +148,15 @@ public class ExpressionToolbox {
         }
     }
 
-    public static Function getBuiltIn(Mutable<ILogicalExpression> mutableLe) {
+    public static Function getBuiltIn(Mutable<ILogicalExpression> mutableLe, StaticContext rootContext) {
         ILogicalExpression le = mutableLe.getValue();
         if (le.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
             AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression) le;
-            for (Function function : BuiltinFunctions.FUNCTION_COLLECTION) {
-                if (function.getFunctionIdentifier().equals(afce.getFunctionIdentifier())) {
-                    return function;
-                }
+            FunctionIdentifier fid = afce.getFunctionIdentifier();
+            QName functionName = new QName(fid.getNamespace(), fid.getName());
+            Function found = rootContext.lookupFunction(functionName, fid.getArity());
+            if (found != null) {
+                return found;
             }
             for (Function function : BuiltinOperators.OPERATOR_COLLECTION) {
                 if (function.getFunctionIdentifier().equals(afce.getFunctionIdentifier())) {
@@ -182,7 +187,7 @@ public class ExpressionToolbox {
         return pTypeCode.getInteger();
     }
 
-    public static SequenceType getTypeExpressionTypeArgument(Mutable<ILogicalExpression> searchM, StaticContextImpl dCtx) {
+    public static SequenceType getTypeExpressionTypeArgument(Mutable<ILogicalExpression> searchM, StaticContext dCtx) {
         int typeId = getTypeExpressionTypeArgument(searchM);
         if (typeId > 0) {
             return dCtx.lookupSequenceType(typeId);


[2/5] git commit: Added a reset option to partitions and queries.

Posted by pr...@apache.org.
Added a reset option to partitions and queries.

The new option allows for all existing data to be removed before running the file copies. Makes a clean start to the data.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/2bc25a10
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/2bc25a10
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/2bc25a10

Branch: refs/heads/prestonc/benchmarks_staging
Commit: 2bc25a10517d80c2d9277b2be8ab73e6557ad1c2
Parents: 9456d4a
Author: Preston Carman <pr...@apache.org>
Authored: Thu Mar 6 20:04:52 2014 -0800
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Mar 6 20:04:52 2014 -0800

----------------------------------------------------------------------
 .../scripts/weather_benchmark.py                | 23 +++++++++-----------
 .../noaa-ghcn-daily/scripts/weather_cli.py      |  4 ++--
 .../scripts/weather_convert_to_xml.py           |  2 +-
 .../scripts/weather_data_files.py               | 14 ++++++++----
 4 files changed, 23 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/2bc25a10/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index 8a032c2..66f85d6 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -68,7 +68,9 @@ class WeatherBenchmark:
             offset = 0
             group_size = len(data_paths) / len(link_base_paths)
             for link_index, link_path in enumerate(link_base_paths):
-                for data_index, data_path in  enumerate(data_paths):
+                if os.path.isdir(link_path):
+                    shutil.rmtree(link_path)
+                for data_index, data_path in enumerate(data_paths):
                     if offset <= data_index and data_index < offset + group_size:
                         self.add_collection_links_for(data_path, link_path, data_index)
                 offset += group_size
@@ -134,38 +136,34 @@ class WeatherBenchmark:
             if index >= 0:
                 os.symlink(real_path + collection + "/", collection_path + "index" + str(index))
             
-    def copy_query_files(self):
+    def copy_query_files(self, reset):
         for test in self.dataset.get_tests():
             if test in self.BENCHMARK_LOCAL_TESTS:
-                self.copy_local_query_files(test)
+                self.copy_local_query_files(test, reset)
             elif test in self.BENCHMARK_CLUSTER_TESTS:
-                self.copy_cluster_query_files(test)
+                self.copy_cluster_query_files(test, reset)
             else:
                 print "Unknown test."
                 exit()
             
-    def copy_cluster_query_files(self, test):
+    def copy_cluster_query_files(self, test, reset):
         '''Determine the data_link path for cluster query files and copy with
         new location for collection.'''
         partitions = self.dataset.get_partitions()[0]
         for i in range(len(self.nodes)):
             query_path = get_cluster_query_path(self.base_paths, test, i)
-        
-            if not os.path.isdir(query_path):
-                os.makedirs(query_path)
+            prepare_path(query_path, reset)
         
             # Copy query files.
             partition_paths = get_cluster_link_paths_for_node(i, self.base_paths, "data_links/" + test)
             self.copy_and_replace_query(query_path, partition_paths)
 
-    def copy_local_query_files(self, test):
+    def copy_local_query_files(self, test, reset):
         '''Determine the data_link path for local query files and copy with
         new location for collection.'''
         for i in self.partitions:
             query_path = get_local_query_path(self.base_paths, test, i)
-        
-            if not os.path.isdir(query_path):
-                os.makedirs(query_path)
+            prepare_path(query_path, reset)
     
             # Copy query files.
             partition_paths = get_partition_paths(i, self.base_paths, "data_links/" + test)
@@ -200,7 +198,6 @@ class WeatherBenchmark:
                     print "Unknown test."
                     exit()
 
-
 def get_cluster_link_paths(nodes, base_paths, key="partitions"):        
     link_paths = []
     for i in range(0, nodes):

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/2bc25a10/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 8d18607..a1a1aa2 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -211,7 +211,7 @@ def main(argv):
             slices = benchmark.get_number_of_slices()
             print 'Processing the partition section (' + dataset.get_name() + ':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
             data.reset()
-            data.copy_to_n_partitions(xml_data_save_path, slices, base_paths)
+            data.copy_to_n_partitions(xml_data_save_path, slices, base_paths, reset)
     
         if section in ("all", "test_links"):
             # TODO determine current node 
@@ -220,7 +220,7 @@ def main(argv):
 
         if section in ("all", "queries"):
             print 'Processing the queries section (' + dataset.get_name() + ').'
-            benchmark.copy_query_files()
+            benchmark.copy_query_files(reset)
     
 #     if section in ("statistics"):
 #         print 'Processing the statistics section.'

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/2bc25a10/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
index 1aee4a7..a7adce5 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -376,7 +376,7 @@ class WeatherConvertToXML:
             xml_station += self.default_xml_element("elevation", elevation, 2)
         
         state_code = self.get_field_from_definition(station_ghcnd_row, STATIONS_FIELDS['STATE']).strip()
-        if state_code != "":
+        if state_code != "" and state_code in self.STATES:
             xml_station += self.default_xml_location_labels("ST", "FIPS:" + str(self.STATES.keys().index(state_code)), self.STATES[state_code])
         
         # Add the MSHR data to the station generated information.

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/2bc25a10/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
index da2afcc..a9b4ecc 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
@@ -92,7 +92,7 @@ class WeatherDataFiles:
             self.close_progress_data(True)
         self.reset()
         
-    def copy_to_n_partitions(self, save_path, partitions, base_paths=[]):
+    def copy_to_n_partitions(self, save_path, partitions, base_paths, reset):
         """Once the initial data has been generated, the data can be copied into a set number of partitions. """
         if (len(base_paths) == 0):
             return
@@ -103,8 +103,7 @@ class WeatherDataFiles:
         for path in partition_paths:
             partition_sizes.append(0)
             # Make sure the xml folder is available.
-            if not os.path.isdir(path):
-                os.makedirs(path)
+            prepare_path(path, reset)
 
         # copy stations and sensors into each partition
         current_partition = 0
@@ -125,7 +124,6 @@ class WeatherDataFiles:
             if os.path.isdir(file_path):
                 distutils.dir_util.copy_tree(file_path, new_file_path)
             partition_sizes[current_partition] += size
-
         
             # Copy station files
             type = "stations"
@@ -319,6 +317,7 @@ class WeatherDataFiles:
                 break
         return columns[self.INDEX_DATA_FILE_NAME]
     
+    
 def get_partition_paths(partitions, base_paths, key="partitions"):        
     partition_paths = []
     for i in range(0, partitions):
@@ -330,4 +329,11 @@ def get_partition_paths(partitions, base_paths, key="partitions"):
 def get_partition_folder(disks, partitions, index):        
     return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index)
 
+def prepare_path(path, reset):
+    """Ensures the directory is available. If reset, then its a brand new directory."""
+    if os.path.isdir(path) and reset:
+        shutil.rmtree(path)
+                
+    if not os.path.isdir(path):
+        os.makedirs(path)
 


[4/5] git commit: Query update to work with generated data on smaller data sizes.

Posted by pr...@apache.org.
Query update to work with generated data on smaller data sizes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/ca8b4604
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/ca8b4604
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/ca8b4604

Branch: refs/heads/prestonc/benchmarks_staging
Commit: ca8b4604e499d28de70b311a74d96b9f601c8ad2
Parents: 3e53bf3
Author: Preston Carman <pr...@apache.org>
Authored: Wed Mar 12 13:11:25 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Wed Mar 12 13:11:25 2014 -0700

----------------------------------------------------------------------
 .../src/main/resources/noaa-ghcn-daily/queries/q04.xq   | 12 ++++++------
 .../src/main/resources/noaa-ghcn-daily/queries/q05.xq   | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/ca8b4604/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
index c20c973..174a9f7 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
@@ -1,14 +1,14 @@
 (: XQuery Join Query :)
-(: Find all the weather readings for Los Angeles county for a specific day    :)
+(: Find all the weather readings for King county for a specific day    :)
 (: 1976/7/4.                                                                  :)
-let $collection1 := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-for $s in collection($collection1)/stationCollection/station
+let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+for $s in collection($station_collection)/stationCollection/station
 
-let $collection2 := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($collection2)/dataCollection/data
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
     
 let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
 where $s/id eq $r/station 
-    and (some $x in $s/locationLabels satisfies ($x/type eq "CNTY" and $x/displayName eq "Los Angeles County, CA"))
+    and (some $x in $s/locationLabels satisfies ($x/type eq "CNTY" and fn:contains(fn:upper-case(fn:data($x/displayName)), "KING")))
     and $date eq xs:date("1976-07-04")
 return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/ca8b4604/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
index 3348d04..23bbaf4 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
@@ -2,15 +2,15 @@
 (: Find the lowest recorded temperature (TMIN) in the state of Oregon for     :)
 (: 2001.                                                                      :)
 fn:min(
-    let $collection1 := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($collection1)/stationCollection/station
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
     
-    let $collection2 := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($collection2)/dataCollection/data
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
     
     let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
     where $s/id eq $r/station
-        and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and $x/displayName eq "Oregon"))
+        and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "OREGON"))
         and $r/dataType eq "TMIN" 
         and fn:year-from-date($date) eq 2001
     return $r/value


[3/5] git commit: Enabling hash based join.

Posted by pr...@apache.org.
Enabling hash based join.

The changes involve two new rules that allow switching between Algebricks Builtin Identifiers and VXQuery equivalent expressions. One more rewrite rule was included to move expressions out of the inner join to allow for hash join to be enabled on variables. Other changes connect these rules to allow for complete hash join implementation.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/3e53bf31
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/3e53bf31
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/3e53bf31

Branch: refs/heads/prestonc/benchmarks_staging
Commit: 3e53bf310baba222623f02b679d2b4ab35bf3c43
Parents: 2bc25a1
Author: Preston Carman <pr...@apache.org>
Authored: Wed Mar 12 13:10:34 2014 -0700
Committer: Preston Carman <pr...@apache.org>
Committed: Wed Mar 12 13:10:34 2014 -0700

----------------------------------------------------------------------
 .../VXQueryComparatorFactoryProvider.java       |  36 +++++-
 .../compiler/rewriter/RewriteRuleset.java       |  38 ++++--
 .../ConvertFromAlgebricksExpressionsRule.java   |  94 ++++++++++++++
 .../ConvertToAlgebricksExpressionsRule.java     | 101 +++++++++++++++
 .../rules/PushFunctionsOntoEqJoinBranches.java  | 122 +++++++++++++++++++
 .../RemoveRedundantBooleanExpressionsRule.java  |  69 +++++++++++
 .../rewriter/rules/util/ExpressionToolbox.java  |  16 ++-
 .../metadata/VXQueryMetadataProvider.java       |  12 +-
 .../bool/FnBooleanScalarEvaluatorFactory.java   |   2 +-
 .../runtime/functions/util/FunctionHelper.java  |   1 +
 ...VXQueryBinaryHashFunctionFamilyProvider.java |  36 ++++++
 .../xmlquery/query/XMLQueryCompiler.java        |   6 +-
 12 files changed, 512 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java
index 5a0e2df..b473d69 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java
@@ -16,7 +16,9 @@
  */
 package org.apache.vxquery.compiler.algebricks;
 
+import org.apache.vxquery.datamodel.accessors.SequencePointable;
 import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.values.ValueTag;
 
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
@@ -43,12 +45,44 @@ public class VXQueryComparatorFactoryProvider implements IBinaryComparatorFactor
         public IBinaryComparator createBinaryComparator() {
             final TaggedValuePointable tvp1 = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
             final TaggedValuePointable tvp2 = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+            final SequencePointable sp1 = (SequencePointable) SequencePointable.FACTORY.createPointable();
+            final SequencePointable sp2 = (SequencePointable) SequencePointable.FACTORY.createPointable();
             return new IBinaryComparator() {
                 @Override
                 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
                     tvp1.set(b1, s1, l1);
                     tvp2.set(b2, s2, l2);
-                    return 0;
+                    if (tvp1.getTag() != tvp2.getTag()) {
+                        return tvp1.getTag() - tvp2.getTag();
+                    }
+                    // Empty sequences do not match. Supports hash based join.
+                    switch (tvp1.getTag()) {
+                        case ValueTag.SEQUENCE_TAG:
+                            tvp1.getValue(sp1);
+                            if (sp1.getEntryCount() == 0) {
+                                return -1;
+                            }
+
+                            switch (tvp2.getTag()) {
+                                case ValueTag.SEQUENCE_TAG:
+                                    tvp2.getValue(sp2);
+                                    if (sp2.getEntryCount() == 0) {
+                                        return 1;
+                                    }
+                                    break;
+                                default:
+                            }
+
+                            break;
+                        default:
+                    }
+                    // Do a binary compare between byte arrays.
+                    for (int i = 0; i < b1.length && i < b2.length; i++) {
+                        if (b1[i] != b2[i]) {
+                            return b1[i] - b2[i];
+                        }
+                    }
+                    return b1.length - b2.length;
                 }
             };
         }

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java
index 16c44b8..ab0d535 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java
@@ -20,6 +20,8 @@ import java.util.LinkedList;
 import java.util.List;
 
 import org.apache.vxquery.compiler.rewriter.rules.ConsolidateAssignAggregateRule;
+import org.apache.vxquery.compiler.rewriter.rules.ConvertFromAlgebricksExpressionsRule;
+import org.apache.vxquery.compiler.rewriter.rules.ConvertToAlgebricksExpressionsRule;
 import org.apache.vxquery.compiler.rewriter.rules.InlineNestedVariablesRule;
 import org.apache.vxquery.compiler.rewriter.rules.PushChildIntoDataScanRule;
 import org.apache.vxquery.compiler.rewriter.rules.ConsolidateUnnestsRule;
@@ -30,7 +32,9 @@ import org.apache.vxquery.compiler.rewriter.rules.EliminateUnnestAggregateSequen
 import org.apache.vxquery.compiler.rewriter.rules.EliminateUnnestAggregateSubplanRule;
 import org.apache.vxquery.compiler.rewriter.rules.IntroduceCollectionRule;
 import org.apache.vxquery.compiler.rewriter.rules.IntroduceTwoStepAggregateRule;
+import org.apache.vxquery.compiler.rewriter.rules.PushFunctionsOntoEqJoinBranches;
 import org.apache.vxquery.compiler.rewriter.rules.PushMapOperatorDownThroughProductRule;
+import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantBooleanExpressionsRule;
 import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantCastExpressionsRule;
 import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantDataExpressionsRule;
 import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantPromoteExpressionsRule;
@@ -147,14 +151,32 @@ public class RewriteRuleset {
     }
 
     /**
+     * Remove expressions known to be redundant.
+     */
+    public final static List<IAlgebraicRewriteRule> buildRedundantExpressionNormalizationRuleCollection() {
+        List<IAlgebraicRewriteRule> normalization = new LinkedList<IAlgebraicRewriteRule>();
+        normalization.add(new InlineNestedVariablesRule());
+        normalization.add(new RemoveRedundantTreatExpressionsRule());
+        normalization.add(new RemoveRedundantDataExpressionsRule());
+        normalization.add(new RemoveRedundantPromoteExpressionsRule());
+        normalization.add(new RemoveRedundantCastExpressionsRule());
+        normalization.add(new ConvertToAlgebricksExpressionsRule());
+        normalization.add(new RemoveRedundantBooleanExpressionsRule());
+        // Clean up
+        normalization.add(new RemoveRedundantVariablesRule());
+        normalization.add(new RemoveUnusedAssignAndAggregateRule());
+        return normalization;
+    }
+
+    /**
      * When a nested data sources exist, convert the plan to use the join operator.
      */
     public final static List<IAlgebraicRewriteRule> buildNestedDataSourceRuleCollection() {
         List<IAlgebraicRewriteRule> xquery = new LinkedList<IAlgebraicRewriteRule>();
+        xquery.add(new BreakSelectIntoConjunctsRule());
         xquery.add(new SimpleUnnestToProductRule());
         xquery.add(new PushMapOperatorDownThroughProductRule());
         xquery.add(new PushSubplanWithAggregateDownThroughProductRule());
-        xquery.add(new InlineNestedVariablesRule());
         xquery.add(new PushSelectDownRule());
         xquery.add(new PushSelectIntoJoinRule());
         // Clean up
@@ -163,18 +185,6 @@ public class RewriteRuleset {
         return xquery;
     }
 
-    public final static List<IAlgebraicRewriteRule> buildRedundantExpressionNormalizationRuleCollection() {
-        List<IAlgebraicRewriteRule> normalization = new LinkedList<IAlgebraicRewriteRule>();
-        normalization.add(new RemoveRedundantTreatExpressionsRule());
-        normalization.add(new RemoveRedundantDataExpressionsRule());
-        normalization.add(new RemoveRedundantPromoteExpressionsRule());
-        normalization.add(new RemoveRedundantCastExpressionsRule());
-        // Clean up
-        normalization.add(new RemoveRedundantVariablesRule());
-        normalization.add(new RemoveUnusedAssignAndAggregateRule());
-        return normalization;
-    }
-
     public final static List<IAlgebraicRewriteRule> buildTypeInferenceRuleCollection() {
         List<IAlgebraicRewriteRule> typeInfer = new LinkedList<IAlgebraicRewriteRule>();
         typeInfer.add(new InferTypesRule());
@@ -239,6 +249,7 @@ public class RewriteRuleset {
     public final static List<IAlgebraicRewriteRule> buildPhysicalRewritesAllLevelsRuleCollection() {
         List<IAlgebraicRewriteRule> physicalPlanRewrites = new LinkedList<IAlgebraicRewriteRule>();
         physicalPlanRewrites.add(new PullSelectOutOfEqJoin());
+        physicalPlanRewrites.add(new PushFunctionsOntoEqJoinBranches());
         physicalPlanRewrites.add(new SetAlgebricksPhysicalOperatorsRule());
         physicalPlanRewrites.add(new EnforceStructuralPropertiesRule());
         physicalPlanRewrites.add(new PushProjectDownRule());
@@ -254,6 +265,7 @@ public class RewriteRuleset {
 
     public final static List<IAlgebraicRewriteRule> prepareForJobGenRuleCollection() {
         List<IAlgebraicRewriteRule> prepareForJobGenRewrites = new LinkedList<IAlgebraicRewriteRule>();
+        prepareForJobGenRewrites.add(new ConvertFromAlgebricksExpressionsRule());
         prepareForJobGenRewrites.add(new IsolateHyracksOperatorsRule(
                 HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
         prepareForJobGenRewrites.add(new ExtractCommonOperatorsRule());

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertFromAlgebricksExpressionsRule.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertFromAlgebricksExpressionsRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertFromAlgebricksExpressionsRule.java
new file mode 100644
index 0000000..482f05d
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertFromAlgebricksExpressionsRule.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
+import org.apache.vxquery.compiler.rewriter.rules.util.OperatorToolbox;
+import org.apache.vxquery.functions.BuiltinFunctions;
+import org.apache.vxquery.functions.BuiltinOperators;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class ConvertFromAlgebricksExpressionsRule implements IAlgebraicRewriteRule {
+    final List<Mutable<ILogicalExpression>> functionList = new ArrayList<Mutable<ILogicalExpression>>();
+
+    final Map<FunctionIdentifier, IFunctionInfo> ALGEBRICKS_MAP = new HashMap<FunctionIdentifier, IFunctionInfo>();
+
+    public ConvertFromAlgebricksExpressionsRule() {
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.AND, BuiltinOperators.AND);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.OR, BuiltinOperators.OR);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.NOT, BuiltinFunctions.FN_NOT_1);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.EQ, BuiltinOperators.VALUE_EQ);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.NEQ, BuiltinOperators.VALUE_NE);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.LT, BuiltinOperators.VALUE_LT);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.LE, BuiltinOperators.VALUE_LE);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.GT, BuiltinOperators.VALUE_GT);
+        ALGEBRICKS_MAP.put(AlgebricksBuiltinFunctions.GE, BuiltinOperators.VALUE_GE);
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        boolean modified = false;
+        List<Mutable<ILogicalExpression>> expressions = OperatorToolbox.getExpressions(opRef);
+        for (Mutable<ILogicalExpression> expression : expressions) {
+            if (processExpression(opRef, expression)) {
+                modified = true;
+            }
+        }
+        return modified;
+    }
+
+    private boolean processExpression(Mutable<ILogicalOperator> opRef, Mutable<ILogicalExpression> search) {
+        boolean modified = false;
+        for (FunctionIdentifier fid : ALGEBRICKS_MAP.keySet()) {
+            functionList.clear();
+            ExpressionToolbox.findAllFunctionExpressions(search, fid, functionList);
+            for (Mutable<ILogicalExpression> searchM : functionList) {
+                AbstractFunctionCallExpression searchFunction = (AbstractFunctionCallExpression) searchM.getValue();
+                searchFunction.setFunctionInfo(ALGEBRICKS_MAP.get(fid));
+                // Add boolean function before vxquery expression.
+                ScalarFunctionCallExpression booleanExp = new ScalarFunctionCallExpression(BuiltinFunctions.FN_BOOLEAN_1, new MutableObject<ILogicalExpression>(searchM.getValue()));
+                searchM.setValue(booleanExp);
+                modified = true;
+            }
+        }
+        return modified;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertToAlgebricksExpressionsRule.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertToAlgebricksExpressionsRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertToAlgebricksExpressionsRule.java
new file mode 100644
index 0000000..ebe265e
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertToAlgebricksExpressionsRule.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
+import org.apache.vxquery.compiler.rewriter.rules.util.OperatorToolbox;
+import org.apache.vxquery.functions.BuiltinFunctions;
+import org.apache.vxquery.functions.BuiltinOperators;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class ConvertToAlgebricksExpressionsRule implements IAlgebraicRewriteRule {
+    final List<Mutable<ILogicalExpression>> functionList = new ArrayList<Mutable<ILogicalExpression>>();
+
+    final Map<FunctionIdentifier, FunctionIdentifier> ALGEBRICKS_MAP = new HashMap<FunctionIdentifier, FunctionIdentifier>();
+
+    public ConvertToAlgebricksExpressionsRule() {
+        ALGEBRICKS_MAP.put(BuiltinOperators.AND.getFunctionIdentifier(), AlgebricksBuiltinFunctions.AND);
+        ALGEBRICKS_MAP.put(BuiltinOperators.OR.getFunctionIdentifier(), AlgebricksBuiltinFunctions.OR);
+        ALGEBRICKS_MAP.put(BuiltinFunctions.FN_NOT_1.getFunctionIdentifier(), AlgebricksBuiltinFunctions.NOT);
+        ALGEBRICKS_MAP.put(BuiltinOperators.VALUE_EQ.getFunctionIdentifier(), AlgebricksBuiltinFunctions.EQ);
+        ALGEBRICKS_MAP.put(BuiltinOperators.VALUE_NE.getFunctionIdentifier(), AlgebricksBuiltinFunctions.NEQ);
+        ALGEBRICKS_MAP.put(BuiltinOperators.VALUE_LT.getFunctionIdentifier(), AlgebricksBuiltinFunctions.LT);
+        ALGEBRICKS_MAP.put(BuiltinOperators.VALUE_LE.getFunctionIdentifier(), AlgebricksBuiltinFunctions.LE);
+        ALGEBRICKS_MAP.put(BuiltinOperators.VALUE_GT.getFunctionIdentifier(), AlgebricksBuiltinFunctions.GT);
+        ALGEBRICKS_MAP.put(BuiltinOperators.VALUE_GE.getFunctionIdentifier(), AlgebricksBuiltinFunctions.GE);
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        boolean modified = false;
+        List<Mutable<ILogicalExpression>> expressions = OperatorToolbox.getExpressions(opRef);
+        for (Mutable<ILogicalExpression> expression : expressions) {
+            if (processExpression(opRef, expression, context)) {
+                modified = true;
+            }
+        }
+        return modified;
+    }
+
+    private boolean processExpression(Mutable<ILogicalOperator> opRef, Mutable<ILogicalExpression> search,
+            IOptimizationContext context) {
+        boolean modified = false;
+        functionList.clear();
+        ExpressionToolbox.findAllFunctionExpressions(search, BuiltinFunctions.FN_BOOLEAN_1.getFunctionIdentifier(),
+                functionList);
+        for (Mutable<ILogicalExpression> searchM : functionList) {
+            // Get input function
+            AbstractFunctionCallExpression searchFunction = (AbstractFunctionCallExpression) searchM.getValue();
+            ILogicalExpression argFirst = searchFunction.getArguments().get(0).getValue();
+            if (argFirst.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+                continue;
+            }
+            AbstractFunctionCallExpression functionCall = (AbstractFunctionCallExpression) argFirst;
+            if (ALGEBRICKS_MAP.containsKey(functionCall.getFunctionIdentifier())) {
+                FunctionIdentifier algebricksFid = ALGEBRICKS_MAP.get(functionCall.getFunctionIdentifier());
+                IFunctionInfo algebricksFunction = context.getMetadataProvider().lookupFunction(algebricksFid);
+                functionCall.setFunctionInfo(algebricksFunction);
+                searchM.setValue(argFirst);
+                modified = true;
+            }
+        }
+        return modified;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushFunctionsOntoEqJoinBranches.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushFunctionsOntoEqJoinBranches.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushFunctionsOntoEqJoinBranches.java
new file mode 100644
index 0000000..96b36a5
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushFunctionsOntoEqJoinBranches.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class PushFunctionsOntoEqJoinBranches implements IAlgebraicRewriteRule {
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+
+        if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
+            return false;
+        }
+        AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) op;
+
+        ILogicalExpression expr = join.getCondition().getValue();
+        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return false;
+        }
+        AbstractFunctionCallExpression fexp = (AbstractFunctionCallExpression) expr;
+        FunctionIdentifier fi = fexp.getFunctionIdentifier();
+        if (!(fi.equals(AlgebricksBuiltinFunctions.AND) || fi.equals(AlgebricksBuiltinFunctions.EQ))) {
+            return false;
+        }
+        boolean modified = false;
+        List<Mutable<ILogicalExpression>> functionList = new ArrayList<Mutable<ILogicalExpression>>();
+        List<Mutable<ILogicalExpression>> variableList = new ArrayList<Mutable<ILogicalExpression>>();
+        functionList.clear();
+        ExpressionToolbox.findAllFunctionExpressions(join.getCondition(), AlgebricksBuiltinFunctions.EQ, functionList);
+        Collection<LogicalVariable> producedVariables = new ArrayList<LogicalVariable>();
+        for (Mutable<ILogicalExpression> searchM : functionList) {
+            ILogicalExpression search = searchM.getValue();
+            if (search.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+                continue;
+            }
+            AbstractFunctionCallExpression searchExp = (AbstractFunctionCallExpression) search;
+            // Go through all argument for EQ.
+            for (Mutable<ILogicalExpression> expressionM : searchExp.getArguments()) {
+                // Push on to branch when possible.
+                for (Mutable<ILogicalOperator> branch : join.getInputs()) {
+                    producedVariables.clear();
+                    getProducedVariablesInDescendantsAndSelf(branch.getValue(), producedVariables);
+                    variableList.clear();
+                    ExpressionToolbox.findVariableExpressions(expressionM, variableList);
+                    boolean found = true;
+                    for (Mutable<ILogicalExpression> searchVariableM : variableList) {
+                        VariableReferenceExpression vre = (VariableReferenceExpression) searchVariableM.getValue();
+                        if (!producedVariables.contains(vre.getVariableReference())) {
+                            found = false;
+                        }
+                    }
+                    if (found) {
+                        // push down
+                        LogicalVariable assignVariable = context.newVar();
+                        AssignOperator aOp = new AssignOperator(assignVariable, new MutableObject<ILogicalExpression>(expressionM.getValue()));
+                        aOp.getInputs().add(new MutableObject<ILogicalOperator>(branch.getValue()));
+                        branch.setValue(aOp);
+                        aOp.recomputeSchema();
+                        
+                        expressionM.setValue(new VariableReferenceExpression(assignVariable));
+                        modified = true;
+                    }
+                }
+            }
+        }
+        return modified;
+    }
+
+    public static void getProducedVariablesInDescendantsAndSelf(ILogicalOperator op, Collection<LogicalVariable> vars)
+            throws AlgebricksException {
+        // DFS traversal
+        VariableUtilities.getProducedVariables(op, vars);
+        for (Mutable<ILogicalOperator> c : op.getInputs()) {
+            getProducedVariablesInDescendantsAndSelf(c.getValue(), vars);
+        }
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveRedundantBooleanExpressionsRule.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveRedundantBooleanExpressionsRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveRedundantBooleanExpressionsRule.java
new file mode 100644
index 0000000..ba129c0
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveRedundantBooleanExpressionsRule.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import org.apache.vxquery.functions.BuiltinFunctions;
+import org.apache.vxquery.runtime.functions.type.SequenceTypeMatcher;
+import org.apache.vxquery.types.BuiltinTypeRegistry;
+import org.apache.vxquery.types.Quantifier;
+import org.apache.vxquery.types.SequenceType;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+/**
+ * The rule searches for where the xquery boolean function is used. When the
+ * expressions input is already a boolean value, remove the boolean function.
+ * 
+ * <pre>
+ * Before
+ * 
+ *   plan__parent
+ *   %OPERATOR( $v1 : boolean( \@input_expression ) )
+ *   plan__child
+ *   
+ *   Where treat \@input_expression is known to be a boolean value.
+ *   
+ * After 
+ * 
+ *   plan__parent
+ *   %OPERATOR( $v1 : \@input_expression )
+ *   plan__child
+ * </pre>
+ * 
+ * @author prestonc
+ */
+
+public class RemoveRedundantBooleanExpressionsRule extends AbstractRemoveRedundantTypeExpressionsRule {
+    final SequenceTypeMatcher stm = new SequenceTypeMatcher();
+
+    protected FunctionIdentifier getSearchFunction() {
+        return BuiltinFunctions.FN_BOOLEAN_1.getFunctionIdentifier();
+    }
+
+    @Override
+    public boolean hasTypeArgument() {
+        return false;
+    }
+
+    public boolean matchesAllInstancesOf(SequenceType sTypeArg, SequenceType sTypeOutput) {
+        stm.setSequenceType(SequenceType.create(BuiltinTypeRegistry.XS_BOOLEAN, Quantifier.QUANT_ONE));
+        if (sTypeOutput != null && stm.matchesAllInstances(sTypeOutput)) {
+            return true;
+        }
+        return false;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
index dc9e0d3..78ceeb7 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
@@ -78,6 +78,18 @@ public class ExpressionToolbox {
         return null;
     }
 
+    public static void findVariableExpressions(Mutable<ILogicalExpression> mutableLe, List<Mutable<ILogicalExpression>> finds) {
+        ILogicalExpression le = mutableLe.getValue();
+        if (le.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+            finds.add(mutableLe);
+        } else if (le.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+            AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression) le;
+            for (Mutable<ILogicalExpression> argExp : afce.getArguments()) {
+                findVariableExpressions(argExp, finds);
+            }
+        }
+    }
+
     public static Mutable<ILogicalExpression> findLastFunctionExpression(Mutable<ILogicalExpression> mutableLe) {
         ILogicalExpression le = mutableLe.getValue();
         if (le.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
@@ -180,12 +192,12 @@ public class ExpressionToolbox {
     }
 
     public static SequenceType getOutputSequenceType(Mutable<ILogicalOperator> opRef,
-            Mutable<ILogicalExpression> argFirstM, StaticContextImpl dCtx) {
+            Mutable<ILogicalExpression> argFirstM, StaticContext dCtx) {
         ILogicalExpression argFirstLe = argFirstM.getValue();
         switch (argFirstLe.getExpressionTag()) {
             case FUNCTION_CALL:
                 // Only process defined functions.
-                Function function = ExpressionToolbox.getBuiltIn(argFirstM);
+                Function function = ExpressionToolbox.getBuiltIn(argFirstM, dCtx);
                 if (function == null) {
                     return null;
                 } else if (function.getFunctionIdentifier().equals(BuiltinOperators.CAST.getFunctionIdentifier())) {

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
index cd323d9..71963be 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
@@ -146,8 +146,16 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
     }
 
     @Override
-    public IFunctionInfo lookupFunction(FunctionIdentifier fid) {
-        return null;
+    public IFunctionInfo lookupFunction(final FunctionIdentifier fid) {
+        return new IFunctionInfo() {
+            @Override
+            public FunctionIdentifier getFunctionIdentifier() {
+                return fid;
+            }
+            public boolean isFunctional() {
+                return true;
+            }
+        };
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/bool/FnBooleanScalarEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/bool/FnBooleanScalarEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/bool/FnBooleanScalarEvaluatorFactory.java
index a6a1d0b..b9c8c6f 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/bool/FnBooleanScalarEvaluatorFactory.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/bool/FnBooleanScalarEvaluatorFactory.java
@@ -53,7 +53,7 @@ public class FnBooleanScalarEvaluatorFactory extends AbstractTaggedValueArgument
     }
 
     private static class FnBooleanScalarEvaluator extends AbstractTaggedValueArgumentScalarEvaluator {
-        final SequencePointable seqp = new SequencePointable();
+        final SequencePointable seqp = (SequencePointable) SequencePointable.FACTORY.createPointable();
         final LongPointable lp = (LongPointable) LongPointable.FACTORY.createPointable();
         final IntegerPointable ip = (IntegerPointable) IntegerPointable.FACTORY.createPointable();
         final ShortPointable sp = (ShortPointable) ShortPointable.FACTORY.createPointable();

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
index cd8d632..a7d7186 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
@@ -1200,6 +1200,7 @@ public class FunctionHelper {
 
     public static boolean isDerivedFromString(int tid) {
         switch (tid) {
+            case ValueTag.XS_UNTYPED_ATOMIC_TAG:
             case ValueTag.XS_STRING_TAG:
             case ValueTag.XS_NORMALIZED_STRING_TAG:
             case ValueTag.XS_TOKEN_TAG:

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/runtime/provider/VXQueryBinaryHashFunctionFamilyProvider.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/provider/VXQueryBinaryHashFunctionFamilyProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/provider/VXQueryBinaryHashFunctionFamilyProvider.java
new file mode 100644
index 0000000..b73cc75
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/provider/VXQueryBinaryHashFunctionFamilyProvider.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.runtime.provider;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFamilyProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+import edu.uci.ics.hyracks.data.std.accessors.MurmurHash3BinaryHashFunctionFamily;
+
+public class VXQueryBinaryHashFunctionFamilyProvider implements IBinaryHashFunctionFamilyProvider {
+
+    public static final VXQueryBinaryHashFunctionFamilyProvider INSTANCE = new VXQueryBinaryHashFunctionFamilyProvider();
+
+    private VXQueryBinaryHashFunctionFamilyProvider() {
+    }
+
+    @Override
+    public IBinaryHashFunctionFamily getBinaryHashFunctionFamily(Object type) throws AlgebricksException {
+        return MurmurHash3BinaryHashFunctionFamily.INSTANCE;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/3e53bf31/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
index 8b10efc..80ee135 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
@@ -33,6 +33,7 @@ import org.apache.vxquery.exceptions.ErrorCode;
 import org.apache.vxquery.exceptions.SystemException;
 import org.apache.vxquery.metadata.VXQueryMetadataProvider;
 import org.apache.vxquery.runtime.provider.VXQueryBinaryHashFunctionFactoryProvider;
+import org.apache.vxquery.runtime.provider.VXQueryBinaryHashFunctionFamilyProvider;
 import org.apache.vxquery.types.BuiltinTypeRegistry;
 import org.apache.vxquery.types.Quantifier;
 import org.apache.vxquery.types.SequenceType;
@@ -115,6 +116,7 @@ public class XMLQueryCompiler {
             }
         });
         builder.setHashFunctionFactoryProvider(VXQueryBinaryHashFunctionFactoryProvider.INSTANCE);
+        builder.setHashFunctionFamilyProvider(VXQueryBinaryHashFunctionFamilyProvider.INSTANCE);
         builder.setTypeTraitProvider(new ITypeTraitProvider() {
             @Override
             public ITypeTraits getTypeTrait(Object type) {
@@ -196,9 +198,9 @@ public class XMLQueryCompiler {
         defaultLogicalRewrites.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(priorityCtrl,
                 RewriteRuleset.buildXQueryNormalizationRuleCollection()));
         defaultLogicalRewrites.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,
-                RewriteRuleset.buildNestedDataSourceRuleCollection()));
-        defaultLogicalRewrites.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,
                 RewriteRuleset.buildRedundantExpressionNormalizationRuleCollection()));
+        defaultLogicalRewrites.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(priorityCtrl,
+                RewriteRuleset.buildNestedDataSourceRuleCollection()));
         defaultLogicalRewrites.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,
                 RewriteRuleset.buildTypeInferenceRuleCollection()));
         defaultLogicalRewrites.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,