You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by le...@apache.org on 2017/10/27 22:41:50 UTC
[13/15] incubator-sdap-edge git commit: SDAP-1 Import all code under
the SDAP SGA
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetatomresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/datasetatomresponse.py b/src/main/python/libraries/edge/opensearch/datasetatomresponse.py
new file mode 100644
index 0000000..dc11a93
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/datasetatomresponse.py
@@ -0,0 +1,85 @@
+import datetime
+import urllib
+
+from edge.opensearch.atomresponsebysolr import AtomResponseBySolr
+from edge.dateutility import DateUtility
+
+class DatasetAtomResponse(AtomResponseBySolr):
+ def __init__(self, portalUrl, host, url, datasets):
+ super(DatasetAtomResponse, self).__init__()
+ self.portalUrl = portalUrl
+ self.host = host
+ self.url = url
+ self.datasets = datasets
+
+ def _populateChannel(self, solrResponse):
+ self.variables.append({'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-granule-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }})
+
+ def _populateItem(self, solrResponse, doc, item):
+ persistentId = doc['Dataset-PersistentId'][0]
+ idTuple = ('datasetId', persistentId)
+ if persistentId == '':
+ idTuple = ('shortName', doc['Dataset-ShortName'][0])
+ item.append({'name': 'title', 'value': doc['Dataset-LongName'][0]})
+ item.append({'name': 'content', 'value': doc['Dataset-Description'][0]})
+
+ item.append({'name': 'link', 'attribute': {'href': self.url + self.searchBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('full', 'true')])), 'rel': 'enclosure', 'type': 'application/atom+xml', 'title': 'PO.DAAC Metadata' }})
+ item.append({'name': 'link', 'attribute': {'href': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'iso')])), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'ISO-19115 Metadata' }})
+ item.append({'name': 'link', 'attribute': {'href': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'gcmd')])), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'GCMD Metadata' }})
+
+ #Only generate granule search link if dataset has granules
+ if (doc['Dataset-ShortName'][0] in self.datasets):
+ supportedGranuleParams = dict([(key,value) for key,value in self.parameters.iteritems() if key in ['bbox', 'startTime', 'endTime']])
+ if persistentId == '':
+ supportedGranuleParams['shortName'] = doc['Dataset-ShortName'][0]
+ else:
+ supportedGranuleParams['datasetId'] = persistentId
+ item.append({'name': 'link', 'attribute': {'href': self.url + self.searchBasePath + 'granule?' + urllib.urlencode(supportedGranuleParams), 'rel': 'search', 'type': 'application/atom+xml', 'title': 'Granule Search' }})
+
+ if 'Dataset-ImageUrl' in doc and doc['Dataset-ImageUrl'][0] != '':
+ item.append({'name': 'link', 'attribute': {'href': doc['Dataset-ImageUrl'][0], 'rel': 'enclosure', 'type': 'image/jpg', 'title': 'Thumbnail' }})
+
+ if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc:
+ url = dict(zip(doc['DatasetLocationPolicy-Type'], doc['DatasetLocationPolicy-BasePath']))
+ if 'LOCAL-OPENDAP' in url:
+ item.append({'name': 'link', 'attribute': {'href': url['LOCAL-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }})
+ elif 'REMOTE-OPENDAP' in url:
+ item.append({'name': 'link', 'attribute': {'href': url['REMOTE-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }})
+ if 'LOCAL-FTP' in url:
+ item.append({'name': 'link', 'attribute': {'href': url['LOCAL-FTP'], 'rel': 'enclosure', 'type': 'text/plain', 'title': 'FTP URL' }})
+ elif 'REMOTE-FTP' in url:
+ item.append({'name': 'link', 'attribute': {'href': url['REMOTE-FTP'], 'rel': 'enclosure', 'type': 'text/plain', 'title': 'FTP URL' }})
+ if doc['DatasetPolicy-ViewOnline'][0] == 'Y' and doc['DatasetPolicy-AccessType-Full'][0] in ['OPEN', 'PREVIEW', 'SIMULATED', 'REMOTE']:
+ portalUrl = self.portalUrl+'/'+doc['Dataset-ShortName'][0]
+ item.append({'name': 'link', 'attribute': {'href': portalUrl, 'rel': 'enclosure', 'type': 'text/html', 'title': 'Dataset Information' }})
+ updated = None
+ if 'DatasetMetaHistory-LastRevisionDateLong' in doc and doc['DatasetMetaHistory-LastRevisionDateLong'][0] != '':
+ updated = DateUtility.convertTimeLongToIso(doc['DatasetMetaHistory-LastRevisionDateLong'][0])
+ else:
+ updated = datetime.datetime.utcnow().isoformat()+'Z'
+
+ item.append({'name': 'updated', 'value': updated})
+ item.append({'name': 'id', 'value': persistentId})
+ item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]})
+ item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]})
+
+ if doc['DatasetCoverage-WestLon'][0] != '' and doc['DatasetCoverage-SouthLat'][0] != '' and doc['DatasetCoverage-EastLon'][0] != '' and doc['DatasetCoverage-NorthLat'][0] != '':
+ item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['DatasetCoverage-WestLon'][0], doc['DatasetCoverage-SouthLat'][0]]) }, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['DatasetCoverage-EastLon'][0], doc['DatasetCoverage-NorthLat'][0]])}]}})
+
+ if 'DatasetCoverage-StartTimeLong' in doc and doc['DatasetCoverage-StartTimeLong'][0] != '':
+ item.append({'namespace': 'time', 'name': 'start', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StartTimeLong'][0])})
+
+ if 'DatasetCoverage-StopTimeLong' in doc and doc['DatasetCoverage-StopTimeLong'][0] != '':
+ item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StopTimeLong'][0])})
+
+ if 'full' in self.parameters and self.parameters['full']:
+ if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc:
+ for i, x in enumerate(doc['DatasetLocationPolicy-Type']):
+ item.append({'namespace': 'podaac', 'name': self._camelCaseStripHyphen(x.title()), 'value': doc['DatasetLocationPolicy-BasePath'][i]})
+ del doc['DatasetLocationPolicy-Type']
+ del doc['DatasetLocationPolicy-BasePath']
+
+ multiValuedElementsKeys = ('DatasetRegion-', 'DatasetCharacter-', 'DatasetCitation-', 'DatasetContact-Contact-', 'DatasetDatetime-',
+ 'DatasetInteger-', 'DatasetParameter-', 'DatasetProject-', 'DatasetReal-', 'DatasetResource-',
+ 'DatasetSoftware-', 'DatasetSource-', 'DatasetVersion-', 'Collection-')
+ self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys)
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py b/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py
new file mode 100644
index 0000000..002bdc9
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/datasetgcmdresponse.py
@@ -0,0 +1,11 @@
+from edge.opensearch.gcmdresponsebysolr import GcmdResponseBySolr
+
+class DatasetGcmdResponse(GcmdResponseBySolr):
+ def __init__(self, configuration):
+ super(DatasetGcmdResponse, self).__init__(configuration)
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ pass
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py b/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py
new file mode 100644
index 0000000..f9c62a1
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/datasetgranulewriter.py
@@ -0,0 +1,233 @@
+from types import *
+import logging
+import urllib
+import urlparse
+import httplib
+from xml.dom.minidom import Document
+import json
+import xml.sax.saxutils
+import datetime
+import codecs
+
+from edge.opensearch.responsewriter import ResponseWriter
+from edge.dateutility import DateUtility
+from edge.httputility import HttpUtility
+from edge.spatialsearch import SpatialSearch
+import re
+
+class DatasetGranuleWriter(ResponseWriter):
+ def __init__(self, configFilePath, requiredParams = None):
+ super(DatasetGranuleWriter, self).__init__(configFilePath, requiredParams)
+ self.solrGranuleResponse = None
+
+ def get(self, requestHandler):
+ super(DatasetGranuleWriter, self).get(requestHandler)
+ #logging.debug('uri: '+str(requestHandler.request.headers))
+
+ startIndex = 0
+ try:
+ startIndex = requestHandler.get_argument('startIndex')
+ except:
+ pass
+
+ entriesPerPage = self._configuration.getint('solr', 'entriesPerPage')
+ try:
+ entriesPerPage = requestHandler.get_argument('itemsPerPage')
+ #cap entries per age at 400
+ if (int(entriesPerPage) > 400):
+ entriesPerPage = 400
+ except:
+ pass
+
+ #pretty = True
+ try:
+ if requestHandler.get_argument('pretty').lower() == 'false':
+ self.pretty = False
+ except:
+ pass
+
+ parameters = ['startTime', 'endTime', 'keyword', 'granuleName', 'datasetId', 'shortName', 'bbox', 'sortBy']
+ #variables = {}
+ for parameter in parameters:
+ try:
+ value = requestHandler.get_argument(parameter)
+ self.variables[parameter] = value
+ except:
+ pass
+
+ if 'keyword' in self.variables:
+ self.variables['keyword'] = self.variables['keyword'].replace('*', '')
+ self.variables['keyword'] = self.variables['keyword'].lower()
+ """
+ if 'bbox' in variables:
+ points = variables['bbox'].split(',')
+ if len(points) == 4:
+ spatialSearch = SpatialSearch(
+ self._configuration.get('service', 'database')
+ )
+ spatialResult = spatialSearch.searchGranules(
+ int(startIndex),
+ int(entriesPerPage),
+ float(points[0]),
+ float(points[1]),
+ float(points[2]),
+ float(points[3])
+ )
+ if len(spatialResult[0]) > 0:
+ variables['granuleIds'] = spatialResult[0]
+ variables['granuleIdsFound'] = spatialResult[1]
+
+ del variables['bbox']
+ """
+ try:
+ self._getSolrResponse(startIndex, entriesPerPage, self.variables)
+ """
+ solrJson = json.loads(solrResponse)
+ if len(solrJson['response']['docs']) >= 1:
+ dataset = solrJson['response']['docs'][0]['Dataset-ShortName'][0];
+ logging.debug('Getting solr response for dataset ' + dataset)
+ solrDatasetResponse = self._getSingleSolrDatasetResponse({'shortName' : dataset})
+ """
+ except:
+ logging.exception('Failed to get solr response.')
+ """
+ if 'granuleIdsFound' in variables:
+ #solrJson = json.loads(solrResponse)
+ numFound = solrJson['response']['numFound']
+ solrJson['response']['numFound'] = int(variables['granuleIdsFound'])
+ solrJson['response']['start'] = int(startIndex)
+ solrJson['responseHeader']['params']['rows'] = numFound
+ solrResponse = json.dumps(solrJson)
+
+ searchText = ''
+ if 'keyword' in variables:
+ searchText = variables['keyword']
+ try:
+ openSearchResponse = self._generateOpenSearchResponse(
+ solrResponse,
+ solrDatasetResponse,
+ searchText,
+ self._configuration.get('service', 'url')+requestHandler.request.uri,
+ pretty
+ )
+ requestHandler.set_header("Content-Type", "application/xml")
+ requestHandler.write(openSearchResponse)
+ except Exception as exception:
+ logging.exception(exception)
+ requestHandler.set_status(404)
+ requestHandler.write('ERROR - ' + str(exception))
+ """
+
+ def _getSolrResponse(self, startIndex, entriesPerPage, variables):
+ query = self._constructSolrQuery(startIndex, entriesPerPage, variables)
+ url = self._configuration.get('solr', 'granuleUrl')
+
+ httpUtility = HttpUtility()
+ httpUtility.getResponse(url+'/select/?'+query, self._onSolrGranuleResponse)
+
+ def _constructSolrQuery(self, startIndex, entriesPerPage, variables):
+ #set default sort order
+ sort='Granule-StartTimeLong+desc'
+ queries = []
+ for key, value in variables.iteritems():
+ #query = ''
+ if key == 'startTime':
+ startTime = DateUtility.convertISOToUTCTimestamp(value)
+ if startTime is not None:
+ query = 'Granule-StartTimeLong:'
+ query += '['+str(startTime)+'%20TO%20*]'
+ queries.append(query)
+ elif key == 'endTime':
+ stopTime = DateUtility.convertISOToUTCTimestamp(value)
+ if stopTime is not None:
+ query = 'Granule-StartTimeLong:'
+ query += '[*%20TO%20'+str(stopTime)+']'
+ queries.append(query)
+ elif key == 'keyword':
+ newValue = urllib.quote(value)
+
+ query = 'SearchableText-LowerCased:('+newValue+')'
+ queries.append(query)
+ elif key == 'datasetId':
+ query = 'Dataset-PersistentId:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'shortName':
+ query = 'Dataset-ShortName-Full:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'granuleName':
+ query = 'Granule-Name-Full:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'granuleIds':
+ granuleIds = []
+ for granuleId in value:
+ granuleIds.append(str(granuleId))
+ query = 'Granule-Id:('+'+OR+'.join(granuleIds)+')'
+ queries.append(query)
+
+ startIndex = 0
+ elif key == 'sortBy':
+ sortByMapping = {'timeAsc': 'Granule-StartTimeLong+asc', 'archiveTimeDesc': 'Granule-ArchiveTimeLong+desc'}
+ if value in sortByMapping.keys():
+ sort = sortByMapping[value]
+ elif key == 'archiveTime':
+ query = 'Granule-ArchiveTimeLong:['+str(value)+'%20TO%20*]'
+ queries.append(query)
+ #if query != '':
+ # queries.append('%2B'+query)
+
+ if len(queries) == 0:
+ queries.append('*')
+
+ query = 'q='+'+AND+'.join(queries)+'&fq=Granule-AccessType:(OPEN+OR+PREVIEW+OR+SIMULATED+OR+REMOTE)+AND+Granule-Status:ONLINE&version=2.2&start='+str(startIndex)+'&rows='+str(entriesPerPage)+'&indent=on&wt=json&sort='+sort
+ logging.debug('solr query: '+query)
+
+ return query
+
+ def _readTemplate(self, path):
+ file = codecs.open(path, encoding='utf-8')
+ data = file.read()
+ file.close()
+
+ return data
+
+ def _generateOpenSearchResponse(self, solrGranuleResponse, solrDatasetResponse, pretty):
+ pass
+
+ def _onSolrGranuleResponse(self, response):
+ if response.error:
+ self._handleException(str(response.error))
+ else:
+ self.solrGranuleResponse = response.body
+ solrJson = json.loads(response.body)
+ if len(solrJson['response']['docs']) >= 1:
+ dataset = solrJson['response']['docs'][0]['Dataset-ShortName'][0];
+ logging.debug('Getting solr response for dataset ' + dataset)
+ self._getSingleSolrDatasetResponse({'shortName' : dataset}, self._onSolrDatasetResponse)
+ else:
+ try:
+ openSearchResponse = self._generateOpenSearchResponse(
+ None,
+ None,
+ self.pretty
+ )
+ self.requestHandler.set_header("Content-Type", "application/xml")
+ self.requestHandler.write(openSearchResponse)
+ self.requestHandler.finish()
+ except BaseException as exception:
+ self._handleException(str(exception))
+
+ def _onSolrDatasetResponse(self, response):
+ if response.error:
+ self._handleException(str(response.error))
+ else:
+ try:
+ openSearchResponse = self._generateOpenSearchResponse(
+ self.solrGranuleResponse,
+ response.body,
+ self.pretty
+ )
+ self.requestHandler.set_header("Content-Type", "application/xml")
+ self.requestHandler.write(openSearchResponse)
+ self.requestHandler.finish()
+ except BaseException as exception:
+ self._handleException(str(exception))
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetisoresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/datasetisoresponse.py b/src/main/python/libraries/edge/opensearch/datasetisoresponse.py
new file mode 100644
index 0000000..823d24a
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/datasetisoresponse.py
@@ -0,0 +1,11 @@
+from edge.opensearch.isoresponsebysolr import IsoResponseBySolr
+
+class DatasetIsoResponse(IsoResponseBySolr):
+ def __init__(self):
+ super(DatasetIsoResponse, self).__init__()
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ pass
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py b/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py
new file mode 100644
index 0000000..53e89ae
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/datasetresponsebysolr.py
@@ -0,0 +1,14 @@
+from edge.opensearch.responsebysolr import ResponseBySolr
+
+class DatasetResponseBySolr(ResponseBySolr):
+ def __init__(self, portalUrl):
+ super(DatasetResponseBySolr, self).__init__()
+ self.portalUrl = portalUrl
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ item.append({'name': 'title', 'value': doc['Dataset-LongName'][0]})
+ item.append({'name': 'description', 'value': doc['Dataset-Description'][0]})
+ item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]})
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetrssresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/datasetrssresponse.py b/src/main/python/libraries/edge/opensearch/datasetrssresponse.py
new file mode 100644
index 0000000..e9194bc
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/datasetrssresponse.py
@@ -0,0 +1,85 @@
+import urllib
+from edge.opensearch.rssresponsebysolr import RssResponseBySolr
+from edge.dateutility import DateUtility
+
+class DatasetRssResponse(RssResponseBySolr):
+ def __init__(self, portalUrl, url, datasets):
+ super(DatasetRssResponse, self).__init__()
+ self.portalUrl = portalUrl
+ self.url = url
+ self.datasets = datasets
+
+ def _populateChannel(self, solrResponse):
+ self.variables.append({'namespace': 'atom', 'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-granule-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }})
+
+ def _populateItem(self, solrResponse, doc, item):
+ persistentId = doc['Dataset-PersistentId'][0]
+ idTuple = ('datasetId', persistentId)
+ if persistentId == '':
+ idTuple = ('shortName', doc['Dataset-ShortName'][0])
+ portalUrl = ""
+ if doc['DatasetPolicy-ViewOnline'][0] == 'Y' and doc['DatasetPolicy-AccessType-Full'][0] in ['OPEN', 'PREVIEW', 'SIMULATED', 'REMOTE']:
+ portalUrl = self.portalUrl+'/'+doc['Dataset-ShortName'][0]
+ item.append({'name': 'enclosure', 'attribute': {'url': portalUrl, 'type': 'text/html', 'length': '0'}})
+ item.append({'name': 'title', 'value': doc['Dataset-LongName'][0]})
+ item.append({'name': 'description', 'value': doc['Dataset-Description'][0]})
+ item.append({'name': 'link', 'value': portalUrl})
+
+ item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.searchBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('full', 'true'), ('format', 'rss')])), 'type': 'application/rss+xml', 'length': '0'}})
+ item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'iso')])), 'type': 'text/xml', 'length': '0'}})
+ item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.metadataBasePath + 'dataset?' + urllib.urlencode(dict([idTuple, ('format', 'gcmd')])), 'type': 'text/xml', 'length': '0'}})
+
+ #Only generate granule search link if dataset has granules
+ if (doc['Dataset-ShortName'][0] in self.datasets):
+ supportedGranuleParams = dict([(key,value) for key,value in self.parameters.iteritems() if key in ['bbox', 'startTime', 'endTime', 'format']])
+ if persistentId == '':
+ supportedGranuleParams['shortName'] = doc['Dataset-ShortName'][0]
+ else:
+ supportedGranuleParams['datasetId'] = persistentId
+ item.append({'name': 'enclosure', 'attribute': {'url': self.url + self.searchBasePath + 'granule?' + urllib.urlencode(supportedGranuleParams), 'type': 'application/rss+xml', 'length': '0'}})
+
+ if 'Dataset-ImageUrl' in doc and doc['Dataset-ImageUrl'][0] != '':
+ item.append({'name': 'enclosure', 'attribute': {'url': doc['Dataset-ImageUrl'][0], 'type': 'image/jpg', 'length': '0'}})
+
+ if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc:
+ url = dict(zip(doc['DatasetLocationPolicy-Type'], doc['DatasetLocationPolicy-BasePath']))
+ if 'LOCAL-OPENDAP' in url:
+ item.append({'name': 'enclosure', 'attribute': {'url': url['LOCAL-OPENDAP'], 'type': 'text/html', 'length': '0'}})
+ elif 'REMOTE-OPENDAP' in url:
+ item.append({'name': 'enclosure', 'attribute': {'url': url['REMOTE-OPENDAP'], 'type': 'text/html', 'length': '0'}})
+ if 'LOCAL-FTP' in url:
+ item.append({'name': 'enclosure', 'attribute': {'url': url['LOCAL-FTP'], 'type': 'text/plain', 'length': '0'}})
+ elif 'REMOTE-FTP' in url:
+ item.append({'name': 'enclosure', 'attribute': {'url': url['REMOTE-FTP'], 'type': 'text/plain', 'length': '0'}})
+
+ updated = None
+ if 'DatasetMetaHistory-LastRevisionDateLong' in doc and doc['DatasetMetaHistory-LastRevisionDateLong'][0] != '':
+ updated = DateUtility.convertTimeLongToIso(doc['DatasetMetaHistory-LastRevisionDateLong'][0])
+ else:
+ updated = datetime.datetime.utcnow().isoformat()+'Z'
+
+ item.append({'name': 'pubDate', 'value': updated})
+ item.append({'name': 'guid', 'value': persistentId})
+ item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]})
+ item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]})
+
+ if doc['DatasetCoverage-WestLon'][0] != '' and doc['DatasetCoverage-SouthLat'][0] != '' and doc['DatasetCoverage-EastLon'][0] != '' and doc['DatasetCoverage-NorthLat'][0] != '':
+ item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['DatasetCoverage-WestLon'][0], doc['DatasetCoverage-SouthLat'][0]]) }, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['DatasetCoverage-EastLon'][0], doc['DatasetCoverage-NorthLat'][0]])}]}})
+
+ if 'DatasetCoverage-StartTimeLong' in doc and doc['DatasetCoverage-StartTimeLong'][0] != '':
+ item.append({'namespace': 'time', 'name': 'start', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StartTimeLong'][0])})
+
+ if 'DatasetCoverage-StopTimeLong' in doc and doc['DatasetCoverage-StopTimeLong'][0] != '':
+ item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['DatasetCoverage-StopTimeLong'][0])})
+
+ if 'full' in self.parameters and self.parameters['full']:
+ if 'DatasetLocationPolicy-Type' in doc and 'DatasetLocationPolicy-BasePath' in doc:
+ for i, x in enumerate(doc['DatasetLocationPolicy-Type']):
+ item.append({'namespace': 'podaac', 'name': self._camelCaseStripHyphen(x.title()), 'value': doc['DatasetLocationPolicy-BasePath'][i]})
+ del doc['DatasetLocationPolicy-Type']
+ del doc['DatasetLocationPolicy-BasePath']
+
+ multiValuedElementsKeys = ('DatasetRegion-', 'DatasetCharacter-', 'DatasetCitation-', 'DatasetContact-Contact-', 'DatasetDatetime-',
+ 'DatasetInteger-', 'DatasetParameter-', 'DatasetProject-', 'DatasetReal-', 'DatasetResource-',
+ 'DatasetSoftware-', 'DatasetSource-', 'DatasetVersion-', 'Collection-')
+ self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys)
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/datasetwriter.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/datasetwriter.py b/src/main/python/libraries/edge/opensearch/datasetwriter.py
new file mode 100644
index 0000000..3ec56cb
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/datasetwriter.py
@@ -0,0 +1,192 @@
+from types import *
+import json
+import logging
+import urllib
+
+import requestresponder
+from edge.dateutility import DateUtility
+from edge.httputility import HttpUtility
+from edge.opensearch.responsewriter import ResponseWriter
+import re
+
+class DatasetWriter(ResponseWriter):
+ def __init__(self, configFilePath):
+ super(DatasetWriter, self).__init__(configFilePath)
+ self.datasets = []
+
+ def get(self, requestHandler):
+ super(DatasetWriter, self).get(requestHandler)
+ #searchParameters = {}
+ #logging.debug('uri: '+str(requestHandler.request.headers))
+
+ startIndex = 0
+ try:
+ startIndex = requestHandler.get_argument('startIndex')
+ except:
+ pass
+
+ entriesPerPage = self._configuration.getint('solr', 'entriesPerPage')
+ try:
+ entriesPerPage = requestHandler.get_argument('itemsPerPage')
+ #cap entries per age at 400
+ if (int(entriesPerPage) > 400):
+ entriesPerPage = 400
+ self.searchParameters['itemsPerPage'] = entriesPerPage
+ except:
+ pass
+
+ #pretty = True
+ try:
+ if requestHandler.get_argument('pretty').lower() == 'false':
+ self.pretty = False
+ self.searchParameters['pretty'] = 'false'
+ except:
+ pass
+
+ try:
+ if requestHandler.get_argument('full').lower() == 'true':
+ self.searchParameters['full'] = 'true'
+ except:
+ pass
+
+ try:
+ self.searchParameters['format'] = requestHandler.get_argument('format')
+ except:
+ pass
+
+ parameters = ['startTime', 'endTime', 'keyword', 'datasetId', 'shortName', 'instrument', 'satellite', 'fileFormat', 'status', 'processLevel', 'sortBy', 'bbox', 'allowNone']
+ #variables = {}
+ for parameter in parameters:
+ try:
+ value = requestHandler.get_argument(parameter)
+ self.variables[parameter] = value
+ self.searchParameters[parameter] = value
+ except:
+ pass
+
+ if 'keyword' in self.variables:
+ self.variables['keyword'] = self.variables['keyword'].replace('*', '')
+ self.variables['keyword'] = self.variables['keyword'].lower()
+ """
+ else:
+ variables['keyword'] = '""'
+ """
+ #If generating OpenSearch response, need to make additional call to solr
+ #to determine which datasets have granules
+ try:
+ if 'search' in requestHandler.request.path:
+ callback = self._getSolrHasGranuleResponseCallback(startIndex, entriesPerPage)
+ self._getSolrHasGranuleResponse(callback)
+ else:
+ self._getSolrResponse(startIndex, entriesPerPage, self.variables)
+ except:
+ logging.exception('Failed to get solr response.')
+ """
+ searchText = ''
+ if 'keyword' in variables:
+ searchText = variables['keyword']
+ openSearchResponse = self._generateOpenSearchResponse(
+ solrResponse,
+ searchText,
+ self._configuration.get('service', 'url') + requestHandler.request.path,
+ searchParameters,
+ pretty
+ )
+
+ requestHandler.set_header("Content-Type", "application/xml")
+ #requestHandler.set_header("Content-Type", "application/rss+xml")
+ #requestHandler.write(solrResponse)
+ requestHandler.write(openSearchResponse)
+ """
+
+ def _getSolrResponse(self, startIndex, entriesPerPage, variables):
+ query = self._constructSolrQuery(startIndex, entriesPerPage, variables)
+ url = self._configuration.get('solr', 'datasetUrl')
+
+ httpUtility = HttpUtility()
+ httpUtility.getResponse(url+'/select/?'+query, self._onSolrResponse)
+
+ def _constructSolrQuery(self, startIndex, entriesPerPage, variables):
+ queries = []
+ sort = None
+ filterQuery = None
+ for key, value in variables.iteritems():
+ #query = ''
+ if key == 'startTime':
+ startTime = DateUtility.convertISOToUTCTimestamp(value)
+ if startTime is not None:
+ query = 'DatasetCoverage-StopTimeLong-Long:'
+ query += '['+str(startTime)+'%20TO%20*]'
+ queries.append(query)
+ elif key == 'endTime':
+ stopTime = DateUtility.convertISOToUTCTimestamp(value)
+ if stopTime is not None:
+ query = 'DatasetCoverage-StartTimeLong-Long:'
+ query += '[*%20TO%20'+str(stopTime)+']'
+ queries.append(query)
+ elif key == 'keyword':
+ newValue = urllib.quote(value)
+
+ query = 'SearchableText-LowerCased:('+newValue+')'
+ queries.append(query)
+ elif key == 'datasetId':
+ query = 'Dataset-PersistentId:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'shortName':
+ query = 'Dataset-ShortName-Full:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'satellite':
+ query = 'DatasetSource-Source-ShortName-Full:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'instrument':
+ query = 'DatasetSource-Sensor-ShortName-Full:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'fileFormat':
+ query = 'DatasetPolicy-DataFormat-LowerCased:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'status':
+ query = 'DatasetPolicy-AccessType-LowerCased:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'processLevel':
+ query = 'Dataset-ProcessingLevel-LowerCased:'+value
+ queries.append(query)
+ elif key == 'sortBy':
+ sortByMapping = {'timeDesc': 'DatasetCoverage-StartTimeLong-Long+desc', 'timeAsc': 'DatasetCoverage-StartTimeLong-Long+asc',
+ 'popularityDesc': 'Dataset-AllTimePopularity+desc', 'popularityAsc': 'Dataset-AllTimePopularity+asc'}
+ if value in sortByMapping.keys():
+ sort = sortByMapping[value]
+ elif key == 'bbox':
+ filterQuery = self._constructBoundingBoxQuery(value)
+
+ #if query != '':
+ # queries.append('%2B'+query)
+
+ if len(queries) == 0:
+ queries.append('*')
+
+ query = 'q='+'+AND+'.join(queries)+'&fq=DatasetPolicy-AccessType-Full:(OPEN+OR+PREVIEW+OR+SIMULATED+OR+REMOTE)+AND+DatasetPolicy-ViewOnline:Y&version=2.2&start='+str(startIndex)+'&rows='+str(entriesPerPage)+'&indent=on&wt=json'
+ if sort is not None:
+ query += '&sort=' + sort
+ if filterQuery is not None:
+ query += '&' + filterQuery
+ logging.debug('solr query: '+query)
+
+ return query
+
+ def _getSolrHasGranuleResponse(self, callback):
+ url = self._configuration.get('solr', 'granuleUrl')
+
+ httpUtility = HttpUtility()
+ return httpUtility.getResponse(url+'/select?q=*:*&facet=true&facet.field=Dataset-ShortName-Full&facet.limit=-1&rows=0&indent=on&wt=json&version=2.2', callback)
+
+ def _getSolrHasGranuleResponseCallback(self, startIndex, entriesPerPage):
+ def onSolrHasGranuleResponse(response):
+ try:
+ solrJson = json.loads(response.body)
+ logging.debug("Got response for dataset facet")
+ datasetCounter = solrJson['facet_counts']['facet_fields']['Dataset-ShortName-Full']
+ self.datasets = [datasetCounter[i] for i in range(len(datasetCounter)) if i % 2 == 0]
+ self._getSolrResponse(startIndex, entriesPerPage, self.variables)
+ except:
+ logging.exception('Failed to get solr response.')
+ return onSolrHasGranuleResponse
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/fgdcresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/fgdcresponse.py b/src/main/python/libraries/edge/opensearch/fgdcresponse.py
new file mode 100644
index 0000000..c8738ce
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/fgdcresponse.py
@@ -0,0 +1,56 @@
+import logging
+
+from jinja2 import Environment, Template
+import re
+import xml.etree.ElementTree
+
+from edge.opensearch.response import Response
+
+class FgdcResponse(Response):
+ def __init__(self):
+ self.namespaces = {}
+ self.env = Environment()
+ self.env.trim_blocks = True
+ self.env.autoescape = True
+ self.variables = {}
+
+ def setTemplate(self, template):
+ self.template = self.env.from_string(template.replace('>\n<', '><'))
+
+ def addNamespace(self, name, uri):
+ self.namespaces[name] = uri
+
+ def removeNamespace(self, name):
+ del self.namespaces[name]
+
+ def generate(self, pretty=False, xmlDeclaration=""):
+ logging.debug('FgdcResponse.generate is called.')
+ fgdcStr = self.template.render(self.variables).encode('utf-8')
+ if fgdcStr != "" and pretty:
+ #xmlDeclaration ="<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<!DOCTYPE metadata SYSTEM \"http://www.fgdc.gov/metadata/fgdc-std-001-1998.dtd\">\n"
+ tree = xml.etree.ElementTree.fromstring(fgdcStr)
+ self._indent(tree)
+
+ for namespace in self.namespaces.keys():
+ xml.etree.ElementTree.register_namespace(namespace, self.namespaces[namespace])
+
+ return xmlDeclaration + xml.etree.ElementTree.tostring(tree, encoding='utf-8')
+ else:
+ return fgdcStr
+
+ # Provided by http://effbot.org/zone/element-lib.htm#prettyprint
+ def _indent(self, elem, level=0):
+ i = "\n" + level * " "
+ if len(elem):
+ if not elem.text or not elem.text.strip():
+ elem.text = i + " "
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ for elem in elem:
+ self._indent(elem, level + 1)
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ else:
+ if level and (not elem.tail or not elem.tail.strip()):
+ elem.tail = i
+
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py b/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py
new file mode 100644
index 0000000..562dc08
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/fgdcresponsebysolr.py
@@ -0,0 +1,141 @@
+import json
+import logging
+
+from edge.opensearch.fgdcresponse import FgdcResponse
+from datetime import datetime
+
+class FgdcResponseBySolr(FgdcResponse):
+ def __init__(self):
+ super(FgdcResponseBySolr, self).__init__()
+
+ def generate(self, solrDatasetResponse, solrGranuleResponse = None, pretty=False):
+ self._populate(solrDatasetResponse, solrGranuleResponse)
+ return super(FgdcResponseBySolr, self).generate(pretty, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<!DOCTYPE metadata SYSTEM \"http://www.fgdc.gov/metadata/fgdc-std-001-1998.dtd\">\n")
+
+ def _populate(self, solrDatasetResponse, solrGranuleResponse = None):
+ if solrDatasetResponse is not None:
+ solrJson = json.loads(solrDatasetResponse)
+
+ logging.debug('dataset count: '+str(len(solrJson['response']['docs'])))
+
+ if len(solrJson['response']['docs']) == 1:
+ # ok now populate variables!
+ doc = solrJson['response']['docs'][0]
+
+ self.variables['doc'] = doc
+
+ # Round spatial to 3 decimal places
+ doc['DatasetCoverage-WestLon'][0] = '%.3f' % round(float(doc['DatasetCoverage-WestLon'][0]), 3)
+ doc['DatasetCoverage-EastLon'][0] = '%.3f' % round(float(doc['DatasetCoverage-EastLon'][0]), 3)
+ doc['DatasetCoverage-NorthLat'][0] = '%.3f' % round(float(doc['DatasetCoverage-NorthLat'][0]), 3)
+ doc['DatasetCoverage-SouthLat'][0] = '%.3f' % round(float(doc['DatasetCoverage-SouthLat'][0]), 3)
+
+ # Base on the value of Dataset-ProcessingLevel, we query the SOLR differently.
+ # For 2 or 2P, we look for these 2 attributes:
+ #
+ # ACROSS_TRACK_RESOLUTION NUMBER
+ # ALONG_TRACK_RESOLUTION NUMBER
+ #
+ # Because the units of 2 and 2P products are in meters, we have to convert to decimal degrees.
+ #
+ # The formula is:
+ #
+ # 1 degree = 111.16 km or 111160.0 meters
+ #
+ # Calculate latitude and longitude resolution for 2 and 2P products
+ if (doc['Dataset-ProcessingLevel'][0] == '2' or doc['Dataset-ProcessingLevel'][0] == '2P'):
+ self.variables['Dataset_LatitudeResolution'] = '%.17f' % round(float(doc['Dataset-AlongTrackResolution'][0]) / 111160.0, 17)
+ self.variables['Dataset_LongitudeResolution'] = '%.17f' % round(float(doc['Dataset-AcrossTrackResolution'][0]) / 111160.0, 17)
+ # For value of Dataset-ProcessingLevel of 3 or 4, we look for different attributes:
+ #
+ # LATIUDE_RESOLUTION
+ # LONGITUDE RESOLUTION
+ elif (doc['Dataset-ProcessingLevel'][0] == '3' or doc['Dataset-ProcessingLevel'][0] == '4'):
+ self.variables['Dataset_LatitudeResolution'] = doc['Dataset-LatitudeResolution'][0]
+ self.variables['Dataset_LongitudeResolution'] = doc['Dataset-LongitudeResolution'][0]
+
+ # Format dates
+ try:
+ self.variables['DatasetCitation_ReleaseDateTime'] = self._convertTimeLongToString(doc['DatasetCitation-ReleaseDateLong'][0])
+ self.variables['DatasetCitation_ReleaseDate'] = datetime.utcfromtimestamp(float(doc['DatasetCitation-ReleaseDateLong'][0]) / 1000).strftime('%Y%m%d')
+ self.variables['DatasetCitation_ReleaseTime'] = datetime.utcfromtimestamp(float(doc['DatasetCitation-ReleaseDateLong'][0]) / 1000).strftime('%H%M%S')+'Z'
+ self.variables['DatasetCoverage_StartTime'] = self._convertTimeLongToString(doc['DatasetCoverage-StartTimeLong'][0])
+ except:
+ pass
+
+ # Create list of unique dataset sensor
+ self.variables['UniqueDatasetSensor'] = {}
+ for i, x in enumerate(doc['DatasetSource-Sensor-ShortName']):
+ self.variables['UniqueDatasetSensor'][x] = i
+ self.variables['UniqueDatasetSensor'] = self.variables['UniqueDatasetSensor'].values()
+
+ # Create list of unique dataset source
+ self.variables['UniqueDatasetSource'] = {}
+ for i, x in enumerate(doc['DatasetSource-Source-ShortName']):
+ self.variables['UniqueDatasetSource'][x] = i
+ self.variables['UniqueDatasetSource'] = self.variables['UniqueDatasetSource'].values()
+
+ # Create dictionary for dataset_resource
+ self.variables['DatasetResource'] = dict(zip(doc['DatasetResource-Type'], doc['DatasetResource-Path']))
+
+ # Get index of dataset Technical Contact
+ self.variables['TechnicalContactIndex'] = -1
+ for i, x in enumerate(doc['DatasetContact-Contact-Role']):
+ if (x.upper() == 'TECHNICAL CONTACT'):
+ logging.debug('tech contact is ' + str(i))
+ self.variables['TechnicalContactIndex'] = i
+ break;
+
+ if 'Dataset-Provider-ProviderResource-Path' not in doc:
+ doc['Dataset-Provider-ProviderResource-Path'] = ['']
+ else:
+ raise Exception('No dataset found')
+
+ else:
+ raise Exception('No dataset found')
+
+ if solrGranuleResponse is not None:
+ solrGranuleJson = json.loads(solrGranuleResponse)
+
+ logging.debug('granule count: '+str(len(solrGranuleJson['response']['docs'])))
+ if (len(solrGranuleJson['response']['docs']) == 0):
+ raise Exception('No granules found')
+
+ for doc in solrGranuleJson['response']['docs']:
+ self._populateItem(solrGranuleResponse, doc, None)
+
+ doc['Granule-StartTimeLong'][0] = self._convertTimeLongToString(doc['Granule-StartTimeLong'][0])
+ doc['Granule-StopTimeLong'][0] = self._convertTimeLongToString(doc['Granule-StopTimeLong'][0])
+
+ # Create dictionary for bounding box extent
+ '''
+ if ('GranuleReal-Value' in doc and 'GranuleReal-DatasetElement-Element-ShortName' in doc):
+ # Round real value to 3 decimal places
+ doc['GranuleReal-Value'] = ['%.3f' % round(float(value), 3) for value in doc['GranuleReal-Value']]
+ doc['GranuleBoundingBox'] = dict(zip(doc['GranuleReal-DatasetElement-Element-ShortName'], doc['GranuleReal-Value']))
+ '''
+ if 'GranuleSpatial-NorthLat' in doc and 'GranuleSpatial-EastLon' in doc and 'GranuleSpatial-SouthLat' in doc and 'GranuleSpatial-WestLon' in doc:
+ doc['GranuleBoundingBox'] = dict([('southernmostLatitude', '%.3f' % round(float(doc['GranuleSpatial-SouthLat'][0]), 3)),
+ ('northernmostLatitude', '%.3f' % round(float(doc['GranuleSpatial-NorthLat'][0]), 3)),
+ ('westernmostLongitude', '%.3f' % round(float(doc['GranuleSpatial-WestLon'][0]), 3)),
+ ('easternmostLongitude', '%.3f' % round(float(doc['GranuleSpatial-EastLon'][0]), 3))])
+ else:
+ # Encounter granule with no bounding box so raise an exception
+ raise Exception('granule ' + doc['Granule-Name'][0] + ' has no bounding box')
+ self.variables['granules'] = solrGranuleJson['response']['docs']
+ else:
+ raise Exception('No granules found')
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ pass
+
+ def _convertTimeLongToString(self, time):
+ isoTime = ''
+ try:
+ isoTime = datetime.utcfromtimestamp(float(time) / 1000).strftime('%Y%m%dT%H%M%SZ')
+ except ValueError:
+ pass
+ return isoTime
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py b/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py
new file mode 100644
index 0000000..588fc4a
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/gcmdresponsebysolr.py
@@ -0,0 +1,123 @@
+import json
+import logging
+
+from edge.opensearch.isoresponse import IsoResponse
+from datetime import date, datetime
+
+class GcmdResponseBySolr(IsoResponse):
+ def __init__(self, configuration):
+ super(GcmdResponseBySolr, self).__init__()
+ self._configuration = configuration
+
+ def generate(self, solrResponse, pretty=False, allowNone=False):
+ self._populate(solrResponse, allowNone)
+ return super(GcmdResponseBySolr, self).generate(pretty)
+
+ def _populate(self, solrResponse, allowNone):
+ if solrResponse is not None:
+ solrJson = json.loads(solrResponse)
+
+ logging.debug('dataset count: '+str(len(solrJson['response']['docs'])))
+
+ if len(solrJson['response']['docs']) == 1:
+ # ok now populate variables!
+ doc = solrJson['response']['docs'][0]
+
+ #self.variables['Dataset_ShortName'] = doc['Dataset-ShortName'][0]
+ #self.variables['Dataset_ShortName'] = u'unko'
+
+ #Filter response from solr, if value contains none, N/A, null set to empty string
+ if not allowNone:
+ for key, value in doc.iteritems():
+ if key not in ['DatasetPolicy-AccessConstraint', 'DatasetPolicy-UseConstraint'] and isinstance(value[0], basestring) and len(value[0].strip()) <= 4 and value[0].strip().lower() in ['none', 'na', 'n/a', 'null']:
+ doc[key][0] = ""
+
+ self.variables['doc'] = doc
+
+ # Entry_ID
+ self.variables['Entry_ID'] = doc['Dataset-PersistentId'][0] if doc['Dataset-PersistentId'][0] != "" else doc['Dataset-ShortName'][0]
+
+ # Entry_Title
+ self.variables['Entry_Title'] = doc['Dataset-LongName'][0]
+
+ # Dataset_Citation
+ datasetCitationCol = ['Dataset_Creator', 'Dataset_Title', 'Dataset_Series_Name', 'Dataset_Release_Date', 'Dataset_Release_Place', 'Dataset_Publisher', 'Version', 'Other_Citation_Details', 'Online_Resource']
+ if 'DatasetCitation-Creator' in doc:
+ for i, x in enumerate(doc['DatasetCitation-ReleaseDateLong']):
+ try:
+ doc['DatasetCitation-ReleaseDateLong'][i] = datetime.utcfromtimestamp(float(x) / 1000).strftime('%Y-%m-%d')
+ except:
+ pass
+ self.variables['Dataset_Citation'] = [dict(zip(datasetCitationCol,x)) for x in zip(doc['DatasetCitation-Creator'], doc['DatasetCitation-Title'], doc['DatasetCitation-SeriesName'], doc['DatasetCitation-ReleaseDateLong'], doc['DatasetCitation-ReleasePlace'], doc['DatasetCitation-Publisher'], doc['DatasetCitation-Version'], doc['DatasetCitation-CitationDetail'], doc['DatasetCitation-OnlineResource'])]
+
+ # Personnel
+ datasetPersonnelCol = ['Role', 'First_Name', 'Middle_Name', 'Last_Name', 'Email', 'Phone', 'Fax', 'Provider_Short_Name']
+ if 'DatasetContact-Contact-Role' in doc:
+ self.variables['Personnel'] = [dict(zip(datasetPersonnelCol, x)) for x in zip(doc['DatasetContact-Contact-Role'], doc['DatasetContact-Contact-FirstName'], doc['DatasetContact-Contact-MiddleName'], doc['DatasetContact-Contact-LastName'], doc['DatasetContact-Contact-Email'], doc['DatasetContact-Contact-Phone'], doc['DatasetContact-Contact-Fax'], doc['DatasetContact-Contact-Provider-ShortName'])]
+
+ # Locate dataset provider contact
+ self.variables['Provider_Personnel'] = next((item for item in self.variables['Personnel'] if item["Provider_Short_Name"] == doc['Dataset-Provider-ShortName'][0]), None)
+
+ # Parameter
+ datasetParameterCol = ['Category', 'Topic', 'Term', 'Variable_Level_1', 'Detailed_Variable']
+ if 'DatasetParameter-Category' in doc:
+ # Replace all none, None values with empty string
+ doc['DatasetParameter-VariableDetail'] = [self._filterString(variableDetail) for variableDetail in doc['DatasetParameter-VariableDetail']]
+ self.variables['Parameters'] = [dict(zip(datasetParameterCol, x)) for x in zip(doc['DatasetParameter-Category'], doc['DatasetParameter-Topic'], doc['DatasetParameter-Term'], doc['DatasetParameter-Variable'], doc['DatasetParameter-VariableDetail'])]
+
+ # Format dates
+ try:
+ self.variables['Start_Date'] = datetime.utcfromtimestamp(float(doc['DatasetCoverage-StartTimeLong'][0]) / 1000).strftime('%Y-%m-%d')
+ self.variables['Stop_Date'] = datetime.utcfromtimestamp(float(doc['DatasetCoverage-StopTimeLong'][0]) / 1000).strftime('%Y-%m-%d')
+ except:
+ pass
+
+
+ # Project
+ projectCol = ['Short_Name', 'Long_Name']
+ if 'DatasetProject-Project-ShortName' in doc:
+ self.variables['Project'] = [dict(zip(projectCol, x)) for x in zip(doc['DatasetProject-Project-ShortName'], doc['DatasetProject-Project-LongName'])]
+
+ # Create list of unique dataset sensor
+ self.variables['UniqueDatasetSensor'] = {}
+ if 'DatasetSource-Sensor-ShortName' in doc:
+ for i, x in enumerate(doc['DatasetSource-Sensor-ShortName']):
+ self.variables['UniqueDatasetSensor'][x] = i
+ self.variables['UniqueDatasetSensor'] = self.variables['UniqueDatasetSensor'].values()
+
+ # Create list of unique dataset source
+ self.variables['UniqueDatasetSource'] = {}
+ if 'DatasetSource-Source-ShortName' in doc:
+ for i, x in enumerate(doc['DatasetSource-Source-ShortName']):
+ self.variables['UniqueDatasetSource'][x] = i
+ self.variables['UniqueDatasetSource'] = self.variables['UniqueDatasetSource'].values()
+
+ # Last_DIF_Revision_Date
+ self.variables['Last_DIF_Revision_Date'] = datetime.utcfromtimestamp(float(doc['DatasetMetaHistory-LastRevisionDateLong'][0]) / 1000).strftime('%Y-%m-%d')
+
+ # DIF_Revision_History
+ self.variables['DIF_Revision_History'] = doc['DatasetMetaHistory-RevisionHistory'][0]
+
+
+
+ # DIF_Creation_Date
+ self.variables['DIF_Creation_Date'] = datetime.utcnow().strftime('%Y-%m-%d')
+
+ # Set configurable DIF Author contact information
+ self.variables['author'] = dict(self._configuration.items('author'))
+
+ # Set configurable PO.DAAC and NODC contact information
+ self.variables['podaac'] = dict(self._configuration.items('podaac'))
+ self.variables['nodc'] = dict(self._configuration.items('nodc'))
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ pass
+
+ def _filterString(self, str):
+ if str.lower() == 'none':
+ return ''
+ else:
+ return str
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuleatomresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/granuleatomresponse.py b/src/main/python/libraries/edge/opensearch/granuleatomresponse.py
new file mode 100644
index 0000000..9b38347
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/granuleatomresponse.py
@@ -0,0 +1,110 @@
+import datetime
+import urllib
+
+from edge.opensearch.atomresponsebysolr import AtomResponseBySolr
+from edge.dateutility import DateUtility
+
+class GranuleAtomResponse(AtomResponseBySolr):
+ def __init__(self, linkToGranule, host, url):
+ super(GranuleAtomResponse, self).__init__()
+
+ self.linkToGranule = linkToGranule.split(',')
+ self.host = host
+ self.url = url
+
+ def _populateChannel(self, solrResponse):
+ self.variables.append({'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-dataset-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }})
+
+ def _populateItem(self, solrResponse, doc, item):
+ item.append({'name': 'title', 'value': doc['Granule-Name'][0]})
+ #item.append({'name': 'content', 'value': doc['Granule-Name'][0]})
+
+ updated = None
+ startTime = None
+ if 'Granule-StartTimeLong' in doc and doc['Granule-StartTimeLong'][0] != '':
+ updated = DateUtility.convertTimeLongToIso(doc['Granule-StartTimeLong'][0])
+ startTime = updated
+ else:
+ updated = datetime.datetime.utcnow().isoformat()+'Z'
+
+ item.append({'name': 'updated', 'value': updated})
+ item.append({'name': 'id', 'value': doc['Dataset-PersistentId'][0] + ':' + doc['Granule-Name'][0]})
+
+ parameters = {'datasetId': doc['Dataset-PersistentId'][0], 'granuleName': doc['Granule-Name'][0]}
+ parameters['full'] = 'true'
+ item.append({'name': 'link', 'attribute': {'href': self.url+self.searchBasePath + 'granule?' + urllib.urlencode(parameters), 'rel': 'enclosure', 'type': 'application/atom+xml', 'title': 'PO.DAAC Metadata' }})
+ del parameters['full']
+ parameters['format'] = 'iso'
+ item.append({'name': 'link', 'attribute': {'href': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'ISO-19115 Metadata' }})
+ parameters['format'] = 'fgdc'
+ item.append({'name': 'link', 'attribute': {'href': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'rel': 'enclosure', 'type': 'text/xml', 'title': 'FGDC Metadata' }})
+
+ #item.append({'name': 'description', 'value': doc['Dataset-Description'][0]})
+ #item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]})
+ #link = self._getLinkToGranule(doc)
+ #if link['href'] is not None:
+ # item.append({'name': 'link', 'attribute': link})
+ if 'GranuleReference-Type' in doc:
+ if 'Granule-DataFormat' in doc:
+ type = 'application/x-' + doc['Granule-DataFormat'][0].lower()
+ else:
+ type = 'text/plain'
+ #Look for ONLINE reference only
+ granuleRefDict = dict([(doc['GranuleReference-Type'][i], doc['GranuleReference-Path'][i]) for i,x in enumerate(doc['GranuleReference-Status']) if x=="ONLINE"])
+ if 'LOCAL-OPENDAP' in granuleRefDict:
+ item.append({'name': 'link', 'attribute': {'href': granuleRefDict['LOCAL-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }})
+ elif 'REMOTE-OPENDAP' in granuleRefDict:
+ item.append({'name': 'link', 'attribute': {'href': granuleRefDict['REMOTE-OPENDAP'], 'rel': 'enclosure', 'type': 'text/html', 'title': 'OPeNDAP URL' }})
+ if 'LOCAL-FTP' in granuleRefDict:
+ item.append({'name': 'link', 'attribute': {'href': granuleRefDict['LOCAL-FTP'], 'rel': 'enclosure', 'type': type, 'title': 'FTP URL' }})
+ elif 'REMOTE-FTP' in granuleRefDict:
+ item.append({'name': 'link', 'attribute': {'href': granuleRefDict['REMOTE-FTP'], 'rel': 'enclosure', 'type': type, 'title': 'FTP URL' }})
+
+ item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]})
+ item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]})
+
+ if 'GranuleSpatial-NorthLat' in doc and 'GranuleSpatial-EastLon' in doc and 'GranuleSpatial-SouthLat' in doc and 'GranuleSpatial-WestLon' in doc:
+ item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['GranuleSpatial-WestLon'][0], doc['GranuleSpatial-SouthLat'][0]])}, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['GranuleSpatial-EastLon'][0], doc['GranuleSpatial-NorthLat'][0]])}]}})
+
+ if startTime is not None:
+ item.append({'namespace': 'time', 'name': 'start', 'value': startTime})
+
+ if 'Granule-StopTimeLong' in doc and doc['Granule-StopTimeLong'][0] != '':
+ item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['Granule-StopTimeLong'][0])})
+
+ if 'full' in self.parameters and self.parameters['full']:
+ multiValuedElementsKeys = ('GranuleArchive-', 'GranuleReference-')
+ self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys)
+
+ '''
+ def _getLinkToGranule(self, doc):
+ attr = {}
+ link = None
+
+ if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0:
+ granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status'])))
+
+ for type in self.linkToGranule:
+ # check if reference type exists
+ if type in granuleRefDict:
+ # check if reference is online
+ if granuleRefDict[type][1] == 'ONLINE':
+ link = granuleRefDict[type][0]
+ break
+ if link is not None:
+ attr['rel'] = 'http://esipfed.org/ns/discovery/1.1/data#'
+ attr['title'] = 'Granule File'
+
+ if 'GranuleArchive-Name' in doc and 'GranuleArchive-Type' in doc and 'GranuleArchive-FileSize':
+ granuleArchiveDict = dict(zip(doc['GranuleArchive-Type'], zip(doc['GranuleArchive-Name'], doc['GranuleArchive-FileSize'])))
+ if link.endswith(granuleArchiveDict['DATA'][0]):
+ attr['size'] = granuleArchiveDict['DATA'][1]
+
+ if 'Granule-DataFormat' in doc:
+ attr['type'] = 'application/x-' + doc['Granule-DataFormat'][0].lower()
+ else:
+ #No link to granule download provided so create link back to opensearch to retrieve granule metadata
+ link = "http://" + self.host + "/granule/opensearch.atom?granule=" + doc['Granule-Name'][0]
+ attr['href'] = link
+ return attr
+ '''
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py b/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py
new file mode 100644
index 0000000..24b5dc0
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/granuledatacastingresponse.py
@@ -0,0 +1,41 @@
+import logging
+
+from edge.dateutility import DateUtility
+from edge.opensearch.datacastingresponsebysolr import DatacastingResponseBySolr
+
+class GranuleDatacastingResponse(DatacastingResponseBySolr):
+ def __init__(self, portalUrl, linkToGranule, archivedWithin):
+ super(GranuleDatacastingResponse, self).__init__(portalUrl, archivedWithin)
+
+ self.linkToGranule = linkToGranule.split(',')
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ doc['Granule-StartTimeLong'][0] = DateUtility.convertTimeLongToRFC822(doc['Granule-StartTimeLong'][0])
+ doc['Granule-StopTimeLong'][0] = DateUtility.convertTimeLongToRFC822(doc['Granule-StopTimeLong'][0])
+ doc['Granule-ArchiveTimeLong'][0] = DateUtility.convertTimeLongToRFC822(doc['Granule-ArchiveTimeLong'][0])
+
+ doc['GranuleLink'] = self._getLinkToGranule(doc)
+
+ doc['GranuleFileSize'] = dict(zip(doc['GranuleArchive-Type'], doc['GranuleArchive-FileSize']))
+
+ if 'GranuleReference-Type' in doc:
+ doc['GranuleReference'] = dict([(doc['GranuleReference-Type'][i], doc['GranuleReference-Path'][i]) for i,x in enumerate(doc['GranuleReference-Status']) if x=="ONLINE"])
+
+ def _getLinkToGranule(self, doc):
+ link = None
+
+ if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0:
+ granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status'])))
+
+ for type in self.linkToGranule:
+ # check if reference type exists
+ if type in granuleRefDict:
+ # check if reference is online
+ if granuleRefDict[type][1] == 'ONLINE':
+ link = granuleRefDict[type][0]
+ break
+
+ return link
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py b/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py
new file mode 100644
index 0000000..0582f60
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/granulefgdcresponse.py
@@ -0,0 +1,13 @@
+import datetime
+
+from edge.opensearch.fgdcresponsebysolr import FgdcResponseBySolr
+
+class GranuleFgdcResponse(FgdcResponseBySolr):
+ def __init__(self):
+ super(GranuleFgdcResponse, self).__init__()
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ pass
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuleisoresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/granuleisoresponse.py b/src/main/python/libraries/edge/opensearch/granuleisoresponse.py
new file mode 100644
index 0000000..7b9b0a7
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/granuleisoresponse.py
@@ -0,0 +1,33 @@
+import datetime
+
+from edge.opensearch.isoresponsebysolr import IsoResponseBySolr
+
+class GranuleIsoResponse(IsoResponseBySolr):
+ def __init__(self, linkToGranule):
+ super(GranuleIsoResponse, self).__init__()
+
+ self.linkToGranule = linkToGranule.split(',')
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ link = self._getLinkToGranule(doc)
+ if link is not None:
+ doc['link'] = link
+
+ def _getLinkToGranule(self, doc):
+ link = None
+
+ if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0:
+ granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status'])))
+
+ for type in self.linkToGranule:
+ # check if reference type exists
+ if type in granuleRefDict:
+ # check if reference is online
+ if granuleRefDict[type][1] == 'ONLINE':
+ link = granuleRefDict[type][0]
+ break
+
+ return link
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py b/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py
new file mode 100644
index 0000000..1b1ca80
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/granuleresponsebysolr.py
@@ -0,0 +1,37 @@
+from edge.opensearch.responsebysolr import ResponseBySolr
+
+class GranuleResponseBySolr(ResponseBySolr):
+ def __init__(self, linkToGranule):
+ super(GranuleResponseBySolr, self).__init__()
+
+ self.linkToGranule = linkToGranule
+
+ def _populateChannel(self, solrResponse):
+ pass
+
+ def _populateItem(self, solrResponse, doc, item):
+ item.append({'name': 'title', 'value': doc['Granule-Name'][0]})
+ item.append({'name': 'description', 'value': doc['Granule-Name'][0]})
+ #item.append({'name': 'description', 'value': doc['Dataset-Description'][0]})
+ #item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]})
+ link = self._getLinkToGranule(doc)
+ if link is not None:
+ item.append({'name': 'link', 'value': link})
+
+ def _getLinkToGranule(self, doc):
+ link = None
+
+ if 'GranuleReference-Type' in doc:
+ types = doc['GranuleReference-Type']
+
+ typeIndex = -1
+ for index, type in enumerate(types):
+ if type == self.linkToGranule:
+ typeIndex = index
+ break
+
+ if typeIndex >= 0:
+ if ('GranuleReference-Path' in doc) and (len(doc['GranuleReference-Path']) > typeIndex):
+ link = doc['GranuleReference-Path'][typeIndex]
+
+ return link
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granulerssresponse.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/granulerssresponse.py b/src/main/python/libraries/edge/opensearch/granulerssresponse.py
new file mode 100644
index 0000000..a514cca
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/granulerssresponse.py
@@ -0,0 +1,96 @@
+import urllib
+from edge.opensearch.rssresponsebysolr import RssResponseBySolr
+from edge.dateutility import DateUtility
+
+class GranuleRssResponse(RssResponseBySolr):
+ def __init__(self, linkToGranule, host, url):
+ super(GranuleRssResponse, self).__init__()
+
+ self.linkToGranule = linkToGranule.split(',')
+ self.host = host
+ self.url = url
+
+ def _populateChannel(self, solrResponse):
+ self.variables.append({'namespace':'atom', 'name': 'link', 'attribute': {'href': self.url+self.searchBasePath+'podaac-dataset-osd.xml', 'rel': 'search', 'type': 'application/opensearchdescription+xml' }})
+
+ def _populateItem(self, solrResponse, doc, item):
+ item.append({'name': 'title', 'value': doc['Granule-Name'][0]})
+ item.append({'name': 'description', 'value': doc['Granule-Name'][0]})
+ #item.append({'name': 'description', 'value': doc['Dataset-Description'][0]})
+ #item.append({'name': 'link', 'value': self.portalUrl+'/'+doc['Dataset-ShortName'][0]})
+
+ updated = None
+ startTime = None
+ if 'Granule-StartTimeLong' in doc and doc['Granule-StartTimeLong'][0] != '':
+ updated = DateUtility.convertTimeLongToIso(doc['Granule-StartTimeLong'][0])
+ startTime = updated
+ else:
+ updated = datetime.datetime.utcnow().isoformat()+'Z'
+
+ item.append({'name': 'pubDate', 'value': updated})
+ item.append({'name': 'guid', 'value': doc['Dataset-PersistentId'][0] + ':' + doc['Granule-Name'][0]})
+
+ link = self._getLinkToGranule(doc)
+ if link is not None:
+ item.append({'name': 'link', 'value': link})
+
+ parameters = {'datasetId': doc['Dataset-PersistentId'][0], 'granuleName': doc['Granule-Name'][0]}
+ parameters['full'] = 'true'
+ parameters['format'] = 'rss'
+ item.append({'name': 'enclosure', 'attribute': {'url': self.url+self.searchBasePath + 'granule?' + urllib.urlencode(parameters), 'type': 'application/rss+xml', 'length': '0'}})
+ del parameters['full']
+ parameters['format'] = 'iso'
+ item.append({'name': 'enclosure', 'attribute': {'url': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'type': 'text/xml', 'length': '0'}})
+ parameters['format'] = 'fgdc'
+ item.append({'name': 'enclosure', 'attribute': {'url': self.url+self.metadataBasePath + 'granule?' + urllib.urlencode(parameters), 'type': 'text/xml', 'length': '0'}})
+
+ if 'GranuleReference-Type' in doc:
+ if 'Granule-DataFormat' in doc:
+ type = 'application/x-' + doc['Granule-DataFormat'][0].lower()
+ else:
+ type = 'text/plain'
+ #Look for ONLINE reference only
+ granuleRefDict = dict([(doc['GranuleReference-Type'][i], doc['GranuleReference-Path'][i]) for i,x in enumerate(doc['GranuleReference-Status']) if x=="ONLINE"])
+ if 'LOCAL-OPENDAP' in granuleRefDict:
+ item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['LOCAL-OPENDAP'], 'type': 'text/html', 'length': '0'}})
+ elif 'REMOTE-OPENDAP' in granuleRefDict:
+ item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['REMOTE-OPENDAP'], 'type': 'text/html', 'length': '0'}})
+ if 'LOCAL-FTP' in granuleRefDict:
+ item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['LOCAL-FTP'], 'type': type, 'length': '0'}})
+ elif 'REMOTE-FTP' in granuleRefDict:
+ item.append({'name': 'enclosure', 'attribute': {'url': granuleRefDict['REMOTE-FTP'], 'type': type, 'length': '0'}})
+
+ item.append({'namespace': 'podaac', 'name': 'datasetId', 'value': doc['Dataset-PersistentId'][0]})
+ item.append({'namespace': 'podaac', 'name': 'shortName', 'value': doc['Dataset-ShortName'][0]})
+
+ if 'GranuleSpatial-NorthLat' in doc and 'GranuleSpatial-EastLon' in doc and 'GranuleSpatial-SouthLat' in doc and 'GranuleSpatial-WestLon' in doc:
+ item.append({'namespace': 'georss', 'name': 'where', 'value': {'namespace': 'gml', 'name': 'Envelope', 'value': [{'namespace': 'gml', 'name': 'lowerCorner', 'value': ' '.join([doc['GranuleSpatial-WestLon'][0], doc['GranuleSpatial-SouthLat'][0]])}, {'namespace': 'gml', 'name': 'upperCorner', 'value': ' '.join([doc['GranuleSpatial-EastLon'][0], doc['GranuleSpatial-NorthLat'][0]])}]}})
+
+ if 'Granule-StartTimeLong' in doc and doc['Granule-StartTimeLong'][0] != '':
+ item.append({'namespace': 'time', 'name': 'start', 'value': DateUtility.convertTimeLongToIso(doc['Granule-StartTimeLong'][0])})
+
+ if 'Granule-StopTimeLong' in doc and doc['Granule-StopTimeLong'][0] != '':
+ item.append({'namespace': 'time', 'name': 'end', 'value': DateUtility.convertTimeLongToIso(doc['Granule-StopTimeLong'][0])})
+
+ if 'full' in self.parameters and self.parameters['full']:
+ multiValuedElementsKeys = ('GranuleArchive-', 'GranuleReference-')
+ self._populateItemWithPodaacMetadata(doc, item, multiValuedElementsKeys)
+
+ def _getLinkToGranule(self, doc):
+ link = None
+
+ if 'GranuleReference-Type' in doc and len(self.linkToGranule) > 0:
+ granuleRefDict = dict(zip(doc['GranuleReference-Type'], zip(doc['GranuleReference-Path'], doc['GranuleReference-Status'])))
+
+ for type in self.linkToGranule:
+ # check if reference type exists
+ if type in granuleRefDict:
+ # check if reference is online
+ if granuleRefDict[type][1] == 'ONLINE':
+ link = granuleRefDict[type][0]
+ break
+ else:
+ #No link to granule download provided so create link back to opensearch to retrieve granule metadata
+ link = "http://" + self.host + "/granule/opensearch.rss?granule=" + doc['Granule-Name'][0]
+
+ return link
http://git-wip-us.apache.org/repos/asf/incubator-sdap-edge/blob/53351bf3/src/main/python/libraries/edge/opensearch/granulewriter.py
----------------------------------------------------------------------
diff --git a/src/main/python/libraries/edge/opensearch/granulewriter.py b/src/main/python/libraries/edge/opensearch/granulewriter.py
new file mode 100644
index 0000000..ddbb194
--- /dev/null
+++ b/src/main/python/libraries/edge/opensearch/granulewriter.py
@@ -0,0 +1,251 @@
+from types import *
+import logging
+import urllib
+import json
+
+from edge.opensearch.responsewriter import ResponseWriter
+from edge.dateutility import DateUtility
+from edge.httputility import HttpUtility
+from edge.spatialsearch import SpatialSearch
+import re
+
+class GranuleWriter(ResponseWriter):
+ def __init__(self, configFilePath, requiredParams = None):
+ super(GranuleWriter, self).__init__(configFilePath, requiredParams)
+ self.startIndex = 0
+ self.entriesPerPage = self._configuration.getint('solr', 'entriesPerPage')
+
+ def get(self, requestHandler):
+ super(GranuleWriter, self).get(requestHandler)
+ #searchParameters = {}
+ #logging.debug('uri: '+str(requestHandler.request.headers))
+
+ #startIndex = 0
+ try:
+ self.startIndex = requestHandler.get_argument('startIndex')
+ except:
+ pass
+
+ #entriesPerPage = self._configuration.getint('solr', 'entriesPerPage')
+ try:
+ self.entriesPerPage = requestHandler.get_argument('itemsPerPage')
+ #cap entries per age at 400
+ if (int(self.entriesPerPage) > 400):
+ self.entriesPerPage = 400
+ self.searchParameters['itemsPerPage'] = self.entriesPerPage
+ except:
+ pass
+
+ #pretty = True
+ try:
+ if requestHandler.get_argument('pretty').lower() == 'false':
+ self.pretty = False
+ self.searchParameters['pretty'] = 'false'
+ except:
+ pass
+
+ try:
+ if requestHandler.get_argument('full').lower() == 'true':
+ self.searchParameters['full'] = 'true'
+ except:
+ pass
+
+ try:
+ self.searchParameters['format'] = requestHandler.get_argument('format')
+ except:
+ pass
+
+ parameters = ['startTime', 'endTime', 'keyword', 'granuleName', 'datasetId', 'shortName', 'bbox', 'sortBy']
+ #variables = {}
+ for parameter in parameters:
+ try:
+ value = requestHandler.get_argument(parameter)
+ self.variables[parameter] = value
+ self.searchParameters[parameter] = value
+ except:
+ pass
+
+ if 'keyword' in self.variables:
+ self.variables['keyword'] = self.variables['keyword'].replace('*', '')
+ self.variables['keyword'] = self.variables['keyword'].lower()
+
+ #Fetch dataset metadata from Solr
+ datasetVariables = {}
+ if 'datasetId' in self.variables:
+ datasetVariables['datasetId'] = self.variables['datasetId']
+ if 'shortName' in self.variables:
+ datasetVariables['shortName'] = self.variables['shortName']
+ self._getSingleSolrDatasetResponse(datasetVariables, self._onSolrDetermineProcessLevelResponse)
+
+ def _getSolrResponse(self, startIndex, entriesPerPage, variables):
+ query = self._constructSolrQuery(startIndex, entriesPerPage, variables)
+ url = self._configuration.get('solr', 'granuleUrl')
+
+ httpUtility = HttpUtility()
+ httpUtility.getResponse(url+'/select/?'+query, self._onSolrResponse)
+
+ def _constructSolrQuery(self, startIndex, entriesPerPage, variables):
+ #set default sort order
+ sort='Granule-StartTimeLong+desc'
+ filterQuery = None
+ queries = []
+ for key, value in variables.iteritems():
+ #query = ''
+ if key == 'startTime':
+ startTime = DateUtility.convertISOToUTCTimestamp(value)
+ if startTime is not None:
+ query = 'Granule-StopTimeLong:'
+ query += '['+str(startTime)+'%20TO%20*]'
+ queries.append(query)
+ elif key == 'endTime':
+ stopTime = DateUtility.convertISOToUTCTimestamp(value)
+ if stopTime is not None:
+ query = 'Granule-StartTimeLong:'
+ query += '[*%20TO%20'+str(stopTime)+']'
+ queries.append(query)
+ elif key == 'keyword':
+ newValue = urllib.quote(value)
+
+ query = 'SearchableText-LowerCased:('+newValue+')'
+ queries.append(query)
+ elif key == 'datasetId':
+ query = 'Dataset-PersistentId:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'shortName':
+ query = 'Dataset-ShortName-Full:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'granuleName':
+ query = 'Granule-Name-Full:'+self._urlEncodeSolrQueryValue(value)
+ queries.append(query)
+ elif key == 'granuleIds':
+ granuleIds = []
+ for granuleId in value:
+ granuleIds.append(str(granuleId))
+ query = 'Granule-Id:('+'+OR+'.join(granuleIds)+')'
+ queries.append(query)
+
+ startIndex = 0
+ elif key == 'sortBy':
+ sortByMapping = {'timeAsc': 'Granule-StartTimeLong+asc'}
+ if value in sortByMapping.keys():
+ sort = sortByMapping[value]
+ elif key == 'bbox':
+ filterQuery = self._constructBoundingBoxQuery(value)
+ #if query != '':
+ # queries.append('%2B'+query)
+
+ if len(queries) == 0:
+ queries.append('*')
+
+ query = 'q='+'+AND+'.join(queries)+'&fq=Granule-AccessType:(OPEN+OR+PREVIEW+OR+SIMULATED+OR+REMOTE)+AND+Granule-Status:ONLINE&version=2.2&start='+str(startIndex)+'&rows='+str(entriesPerPage)+'&indent=on&wt=json&sort='+sort
+ if filterQuery is not None:
+ query += '&' + filterQuery
+ logging.debug('solr query: '+query)
+
+ return query
+
+ def _onSolrDetermineProcessLevelResponse(self, response):
+ try:
+ #Determine dataset processing level
+ processingLevel = None
+ solrJson = json.loads(response.body)
+ if len(solrJson['response']['docs']) >= 1:
+ if 'bbox' in self.variables:
+ processingLevel = solrJson['response']['docs'][0]['Dataset-ProcessingLevel-Full'][0]
+
+ if processingLevel is not None and processingLevel.find('2') != -1:
+ if self._configuration.get('service', 'bbox') == 'l2':
+ #Call Matt's L2 Search Service
+ #raise Exception(self._configuration.get('service', 'l2')+'?'+requestHandler.request.query)
+ httpUtility = HttpUtility()
+ url = self._configuration.get('service', 'l2') + '?'
+ if 'format' not in self.requestHandler.request.arguments:
+ url += 'format=atom&'
+ url += self.requestHandler.request.query
+ logging.debug("Calling L2 Service: " + url)
+ result = httpUtility.getResponse(url, self._onL2Response)
+ else:
+ points = self.variables['bbox'].split(',')
+ if len(points) == 4:
+ spatialSearch = SpatialSearch(
+ self._configuration.get('service', 'database')
+ )
+ spatialResult = spatialSearch.searchGranules(
+ int(self.startIndex),
+ int(self.entriesPerPage),
+ float(points[0]),
+ float(points[1]),
+ float(points[2]),
+ float(points[3])
+ )
+ logging.debug("Granule spatial search returned")
+ #if len(spatialResult[0]) > 0:
+ self.variables['granuleIds'] = spatialResult[0]
+ self.variables['granuleIdsFound'] = spatialResult[1]
+
+ del self.variables['bbox']
+ solrJson = {'responseHeader': {'params': {}}, 'response': {}}
+ solrJson['response']['numFound'] = int(self.variables['granuleIdsFound'])
+ solrJson['response']['start'] = int(self.startIndex)
+ solrJson['responseHeader']['params']['rows'] = int(self.entriesPerPage)
+ solrJson['response']['docs'] = []
+ for name in self.variables['granuleIds']:
+ solrJson['response']['docs'].append({'Granule-Name': [name]})
+ solrResponse = json.dumps(solrJson)
+
+ searchText = ''
+ if 'keyword' in self.variables:
+ searchText = self.variables['keyword']
+ openSearchResponse = self._generateOpenSearchResponse(
+ solrResponse,
+ searchText,
+ self._configuration.get('service', 'url')+self.requestHandler.request.path,
+ self.searchParameters,
+ self.pretty
+ )
+
+ self.requestHandler.set_header("Content-Type", "application/xml")
+ #requestHandler.set_header("Content-Type", "application/rss+xml")
+ #requestHandler.write(solrResponse)
+ self.requestHandler.write(openSearchResponse)
+ self.requestHandler.finish()
+ else:
+ #Dataset is not an L2 dataset so handle search via Solr
+ try:
+ self._getSolrResponse(self.startIndex, self.entriesPerPage, self.variables)
+ except:
+ logging.exception('Failed to get solr response.')
+ else:
+ #Not a bounding box search so handle search via Solr
+ try:
+ self._getSolrResponse(self.startIndex, self.entriesPerPage, self.variables)
+ except:
+ logging.exception('Failed to get solr response.')
+ else:
+ #Dataset metadata cannot be retreived so return empty search result
+ solrJson = {'responseHeader': {'params': {}}, 'response': {}}
+ solrJson['response']['numFound'] = 0
+ solrJson['response']['start'] = int(self.startIndex)
+ solrJson['responseHeader']['params']['rows'] = int(self.entriesPerPage)
+ solrJson['response']['docs'] = []
+ solrResponse = json.dumps(solrJson)
+
+ self._writeResponse(solrResponse)
+ except BaseException as exception:
+ logging.exception('Failed to determine dataset processing level for bbox search ' + str(exception))
+ self._handleException(str(exception))
+
+ def _onL2Response(self, response):
+ if response.error:
+ self._handleException(str(response.error))
+ else:
+ try:
+ logging.debug('header: Content-Type '+response.headers['Content-Type'])
+ self.requestHandler.set_header('Content-Type', response.headers['Content-Type'])
+ logging.debug('header: Content-Length '+response.headers['Content-Length'])
+ self.requestHandler.set_header('Content-Length', response.headers['Content-Length'])
+ except:
+ pass
+ self.requestHandler.write(response.body)
+ self.requestHandler.finish()
+