You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by bo...@apache.org on 2013/08/02 01:41:26 UTC

svn commit: r1509471 - /incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py

Author: boustani
Date: Thu Aug  1 23:41:26 2013
New Revision: 1509471

URL: http://svn.apache.org/r1509471
Log:
first version of rcmed.py under data_source folder

Added:
    incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py

Added: incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py?rev=1509471&view=auto
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py (added)
+++ incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py Thu Aug  1 23:41:26 2013
@@ -0,0 +1,451 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+'''
+Classes:
+    RCMED - A class for retrieving data from Regional Climate Model Evalutaion Database (JPL).
+'''
+
+import urllib, urllib2
+import re
+import json
+import numpy as np
+import numpy.ma as ma
+from datetime import datetime
+import calendar
+from dataset import Dataset
+
+
+URL = 'http://rcmes.jpl.nasa.gov/query-api/query.php?'
+
+
+'''
+def normalizeDatetimes(datetimes, time_step):
+    """
+    Input::
+        datetimes - list of datetime objects that need to be normalized
+        time_step - string of value ('daily' | 'monthly')
+    Output::
+        normalDatetimes - list of datetime objects that have been normalized
+    
+    Normalization Rules::
+        Daily data will be forced to an hour value of 00:00:00
+        Monthly data will be forced to the first of the month at midnight 
+    """
+    normalDatetimes = []
+    if time_step.lower() == 'monthly':
+        for inputDatetime in datetimes:
+            if inputDatetime.day != 1:
+                # Clean the inputDatetime
+                inputDatetimeString = inputDatetime.strftime('%Y%m%d')
+                normalInputDatetimeString = inputDatetimeString[:6] + '01'
+                inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+
+            normalDatetimes.append(inputDatetime)
+
+    elif time_step.lower() == 'daily':
+        for inputDatetime in datetimes:
+            if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
+                datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
+                normalDatetimeString = datetimeString[:8] + '000000'
+                inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+            
+            normalDatetimes.append(inputDatetime)
+
+
+    return normalDatetimes
+'''
+
+'''
+def _expand_date(start_time, end_time, time_step):
+    if time_step.lower() == 'monthly':
+        if start_time.day != 1:
+            # Clean the startTime
+            startTimeString = start_time.strftime('%Y%m%d')
+            normalInputDatetimeString = startTimeString[:6] + '01'
+            start_time = datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+            ##TODO: Change the 3 lines above with this line:
+            ##start_time = datetime(start_time.year, start_time.month, 1)
+
+        
+        lastDayOfMonth = calendar.monthrange(end_time.year, end_time.month)[1]
+        if end_time.day != lastDayOfMonth:
+            # Clean the endTime
+            endTimeString = end_time.strftime('%Y%m%d')
+            endTimeString = endTimeString[:6] + str(lastDayOfMonth)
+            end_time = datetime.strptime(endTimeString, '%Y%m%d')
+            ##TODO: Change the 3 lines above with this line:
+            ##end_time = datetime(end_time.year, end_time.month, lastDayOfMonth)
+
+    elif time_step.lower() == 'daily':
+        if start_time.hour != 0 or start_time.minute != 0 or start_time.second != 0:
+            datetimeString = start_time.strftime('%Y%m%d%H%M%S')
+            normalDatetimeString = datetimeString[:8] + '000000'
+            start_time = datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+            ##TODO: Change the 3 lines above with this line:
+            ##start_time = datetime(start_time.year, start_time.month, start_time.day, 00, 00, 00)
+        
+        endTimeString = end_time.strftime('%Y%m%d%H%M%S')
+        endTimeString = endTimeString[:8] + '235959'
+        end_time = datetime.strptime(endTimeString, '%Y%m%d%H%M%S')
+        ##TODO: Change the 3 lines above with this line:
+        ##end_time = datetime(end_time.year, end_time.month, end_time.day, 23, 59, 59)
+
+    return start_time, end_time
+'''
+
+'''
+def _reshape_arrays(lats, lons, levels, values, unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count):
+
+    # Reshape arrays
+    lats = lats.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+    lons = lons.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+    levels = np.array(levels).reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+    values = values.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+
+    # Flatten dimension if only single level
+    if unique_levels_count == 1:
+        values = values[:, :, :, 0]
+        lats = lats[0, :, :, 0]
+        lons = lons[0, :, :, 0]
+    
+    return lats, lons, levels, values
+'''
+
+'''
+def _reorder_data(lats, lons, times, values):
+
+    # Re-order values in values array such that when reshaped everywhere is where it should be
+    #  (as DB doesn't necessarily return everything in order)
+    order = np.lexsort((lons, lats, times))
+    counter = 0
+    sorted_values = np.zeros_like(values)
+    sorted_lats = np.zeros_like(lats)
+    sorted_lons = np.zeros_like(lons)
+    for i in order:
+        sorted_values[counter] = values[i]
+        sorted_lats[counter] = lats[i]
+        sorted_lons[counter] = lons[i]
+        counter += 1
+    
+    return sorted_lats, sorted_lons, sorted_values
+'''
+
+'''
+def _calculate_len(unique_lat, unique_lon, unique_level, unique_time):
+
+    unique_lats_count = len(unique_lat)
+    unique_lons_count = len(unique_lon)
+    unique_levels_count = len(unique_level)
+    unique_times_count = len(unique_time)
+    
+    return unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count
+'''
+
+
+def parameters_metadata():
+    '''
+    '''
+    pass
+
+
+def _make_mask_array(values):  
+    '''
+    '''
+
+    # Created masked array to deal with missing values
+    #  -these make functions like values.mean(), values.max() etc ignore missing values
+    mdi = -9999  # TODO: extract this value from the DB retrieval metadata
+    values = ma.masked_array(values, mask=(values == mdi))
+    
+    return values
+
+
+def _reshape_values(values, unique_values):
+    '''Reshape values into 4D array.
+    
+    :param values: Raw values data
+    :type values: numpy array
+    :param unique_values: Tuple of unique latitudes, longitudes, levels and times data.
+    :type unique_values: Tuple 
+    
+    :returns: Reshaped values data
+    :rtype: Numpy array
+    '''
+
+    lats_len = len(unique_values[0])
+    lons_len = len(unique_values[1])
+    levels_len = len(unique_values[2])
+    times_len = len(unique_values[3])
+    
+    values = values.reshape(levels_len, times_len, lats_len, lons_len)
+    
+    return values
+
+
+def _calculate_time(unique_times, time_step):
+    '''Convert each time to the datetime object.
+    
+    :param unique_times: Unique time data
+    :type unique_times: String
+    :param time_step: Time step
+    :type time_step: String
+    
+    :returns: Unique datetime objects of time data
+    :rtype: List
+    '''
+    
+    time_format = "%Y-%m-%d %H:%M:%S"
+    unique_times = [datetime.strptime(time, time_format) for time in unique_times]
+    #There is no need to sort time.
+    #This function may required still in RCMES
+    #unique_times.sort()
+    #This function should be moved to the data_process.
+    #unique_times = normalizeDatetimes(unique_times, time_step)
+    
+    return unique_times
+
+
+def _make_unique(lats, lons, levels, times):
+    '''Find the unique values of input data.
+    
+    :param lats: lats
+    :type lats: Numpy array
+    :param lons: lons
+    :type lons: Numpy array
+    :param levels: levels
+    :type levels: Numpy array
+    :param times: times
+    :type times: Numpy array
+    
+    :returns: Unique numpy arrays of latitudes, longitudes, levels and times
+    :rtype: Tuple
+    '''
+
+    unique_lats = np.unique(lats)
+    unique_lons = np.unique(lons)
+    unique_levels = np.unique(levels)
+    unique_times = np.unique(times)
+    
+    return (unique_lats, unique_lons, unique_levels, unique_times)
+
+
+def _get_data(url):
+    '''Reterive data from database.
+    
+    :param url: url to query from database
+    :type url: String
+    
+    :returns: Latitudes, longitudes, levels, times and values data
+    :rtype: (list, list, list, list, list)
+    '''
+
+    string = urllib2.urlopen(url)
+    data_string = string.read()    
+    index_of_data = re.search('data: \r\n', data_string)
+    data = data_string[index_of_data.end():len(data_string)]
+    data = data.split('\r\n')    
+
+    lats = []
+    lons = []
+    levels = []
+    values = []
+    times = []
+
+    for i in range(len(data) - 1):  # Because the last row is empty, "len(data)-1" is used.
+        row = data[i].split(',')
+        lats.append(np.float32(row[0]))
+        lons.append(np.float32(row[1]))
+        levels.append(np.float32(row[2]))
+        times.append(row[3])
+        values.append(np.float32(row[4]))
+        
+    return lats, lons, levels, times, values
+
+
+def _beginning_of_date(time, time_step):
+    '''Calculate the beginning of given time, based on time step.
+    
+    :param time: Given time
+    :type time: Datetime
+    :param time_step: Time step (monthly or daily)
+    :type time_step: String
+    
+    :returns: Beginning of given time
+    :rtype: Datetime
+    '''
+
+    if time_step.lower() == 'monthly':
+        if time.day != 1:
+            start_time_string = time.strftime('%Y%m%d')
+            start_time_string = start_time_string[:6] + '01'
+            time = datetime.strptime(start_time_string, '%Y%m%d')
+            ##TODO: Change the 3 lines above with this line:
+            ##time = datetime(time.year, time.month, 1)
+    elif time_step.lower() == 'daily':
+        if time.hour != 0 or time.minute != 0 or time.second != 0:
+            start_time_string = time.strftime('%Y%m%d%H%M%S')
+            start_time_string = start_time_string[:8] + '000000'
+            time = datetime.strptime(start_time_string, '%Y%m%d%H%M%S')
+            ##TODO: Change the 3 lines above with this line:
+            ##time = datetime(time.year, time.month, time.day, 00, 00, 00)
+    
+    return time
+
+
+def _end_of_date(time, time_step):
+    '''Calculate the end of given time, based on time step.
+    
+    :param time: Given time
+    :type time: Datetime
+    :param time_step: Time step (monthly or daily)
+    :type time_step: String
+    
+    :returns: End of given time
+    :rtype: Datetime
+    '''
+
+    last_day_of_month = calendar.monthrange(time.year, time.month)[1]
+    if time.day != last_day_of_month:
+        end_time_string = time.strftime('%Y%m%d')
+        end_time_string = end_time_string[:6] + str(last_day_of_month)
+        time = datetime.strptime(end_time_string, '%Y%m%d')
+        ##TODO: Change the 3 lines above with this line:
+        ##time = datetime(time.year, time.month, lastDayOfMonth)
+    elif time_step.lower() == 'daily':
+        end_time_string = time.strftime('%Y%m%d%H%M%S')
+        end_time_string = end_time_string[:8] + '235959'
+        time = datetime.strptime(end_time_string, '%Y%m%d%H%M%S')
+        ##TODO: Change the 3 lines above with this line:
+        ##time = datetime(time.year, time.month, end_time.day, 23, 59, 59)
+
+    return time
+
+
+
+def _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step):
+    '''Generate the url to query from database
+    
+    :param dataset_id: Dataset id.
+    :type dataset_id: Integer
+    :param parameter_id: Parameter id
+    :type parameter_id: Integer
+    :param min_lat: Minimum latitude
+    :type min_lat: Float
+    :param max_lat: Maximum latitude
+    :type max_lat: Float
+    :param min_lon: Minimum longitude
+    :type min_lon: Float
+    :param max_lon: Maximum longitude
+    :type max_lon: Float
+    :param start_time: Start time
+    :type start_time: Datetime
+    :param end_time: End time 
+    :type end_time: Datetime
+    :param time_step: Time step
+    :type time_step: String
+
+    :returns: url to query from database
+    :rtype: String
+    '''
+
+    start_time = _beginning_of_date(start_time, time_step)
+    end_time = _end_of_date(end_time, time_step)
+    start_time = start_time.strftime("%Y%m%dT%H%MZ")
+    end_time = end_time.strftime("%Y%m%dT%H%MZ")
+
+    query = [('datasetId',dataset_id), ('parameterId',parameter_id), ('latMin',min_lat), ('latMax',max_lat),
+             ('lonMin', min_lon), ('lonMax',max_lon), ('timeStart', start_time), ('timeEnd', end_time)]
+
+    query_url = urllib.urlencode(query)
+    url_request = URL + query_url
+    
+    return url_request
+
+
+
+def _get_parameter_info(dataset_id, parameter_id):
+    '''General information for given parameter id.
+
+    :param dataset_id: Dataset id.
+    :type dataset_id: Integer
+    :param parameter_id: Parameter id
+    :type parameter_id: Integer
+    
+    :returns: Database name, time step, realm, instrument, start_date, end_date and unit for given parameter
+    :rtype: (string, string, string, string, string, string, string)
+    '''
+
+    query = [('datasetId',dataset_id), ('parameterId',parameter_id)]
+    query_url = urllib.urlencode(query)
+    url = URL + query_url + "&info=yes"
+    string = urllib2.urlopen(url)
+    data_string = string.read()
+    data_string = json.loads(data_string)
+    database = data_string["database"]
+    time_step = data_string["timestep"]
+    realm = data_string["realm"]
+    instrument = data_string["instrument"]
+    start_date = data_string["start_date"]
+    end_date = data_string["end_date"]
+    unit = data_string["units"]
+    
+    return (database, time_step, realm, instrument, start_date, end_date, unit)
+
+
+
+def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time):
+    '''Get data from one database(parameter).
+    
+    :param dataset_id: Dataset id.
+    :type dataset_id: Integer
+    :param parameter_id: Parameter id
+    :type parameter_id: Integer
+    :param min_lat: Minimum latitude
+    :type min_lat: Float
+    :param max_lat: Maximum latitude
+    :type max_lat: Float
+    :param min_lon: Minimum longitude
+    :type min_lon: Float
+    :param max_lon: Maximum longitude
+    :type max_lon: Float
+    :param start_time: Start time
+    :type start_time: Datetime
+    :param end_time: End time 
+    :type end_time: Datetime
+    
+    :returns: Dataset object
+    :rtype: Object
+    '''
+
+    parameter_info  = _get_parameter_info(dataset_id, parameter_id)
+    time_step = parameter_info[1]
+    parameter_name = parameter_info[0]
+    url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step)
+    lats, lons, levels, times, values = _get_data(url)
+    
+    lats = np.array(lats)
+    lons = np.array(lons)
+    times = np.array(times)
+    values = np.array(values)
+
+    unique_lats_lons_levels_times = _make_unique(lats, lons, levels, times)
+    unique_times = _calculate_time(unique_lats_lons_levels_times[3], time_step)
+    values = _reshape_values(values, unique_lats_lons_levels_times)
+    values = _make_mask_array(values)
+    
+    return Dataset(unique_lats_lons_levels_times[0], unique_lats_lons_levels_times[1], unique_times, values, parameter_name)
\ No newline at end of file