You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@sdap.apache.org by GitBox <gi...@apache.org> on 2018/10/23 17:31:32 UTC

[GitHub] fgreg closed pull request #10: SDAP-155 add processor to extract timestamp from granule metadata

fgreg closed pull request #10: SDAP-155 add processor to extract timestamp from granule metadata
URL: https://github.com/apache/incubator-sdap-ningesterpy/pull/10
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..bdf9b07
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,21 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="8">
+            <item index="0" class="java.lang.String" itemvalue="flask-accept" />
+            <item index="1" class="java.lang.String" itemvalue="protobuf" />
+            <item index="2" class="java.lang.String" itemvalue="werkzeug" />
+            <item index="3" class="java.lang.String" itemvalue="six" />
+            <item index="4" class="java.lang.String" itemvalue="PyYAML" />
+            <item index="5" class="java.lang.String" itemvalue="nexusproto" />
+            <item index="6" class="java.lang.String" itemvalue="pytz" />
+            <item index="7" class="java.lang.String" itemvalue="flask" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 0218159..3c94e77 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/.idea/ningesterpy.iml b/.idea/ningesterpy.iml
index 1f079f2..30a41bc 100644
--- a/.idea/ningesterpy.iml
+++ b/.idea/ningesterpy.iml
@@ -4,12 +4,9 @@
     <content url="file://$MODULE_DIR$">
       <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
-  <component name="PyDocumentationSettings">
-    <option name="renderExternalDocumentation" value="true" />
-  </component>
   <component name="TemplatesService">
     <option name="TEMPLATE_FOLDERS">
       <list>
diff --git a/sdap/processors/__init__.py b/sdap/processors/__init__.py
index 6d4a679..2cb2f73 100644
--- a/sdap/processors/__init__.py
+++ b/sdap/processors/__init__.py
@@ -62,6 +62,7 @@ def process_nexus_tile(self, nexus_tile):
 from sdap.processors.tilereadingprocessor import GridReadingProcessor, SwathReadingProcessor, TimeSeriesReadingProcessor
 from sdap.processors.tilesummarizingprocessor import TileSummarizingProcessor
 from sdap.processors.winddirspeedtouv import WindDirSpeedToUV
+from sdap.processors.extracttimestampprocessor import ExtractTimestampProcessor
 
 INSTALLED_PROCESSORS = {
     "CallNcpdq": CallNcpdq,
@@ -78,5 +79,7 @@ def process_nexus_tile(self, nexus_tile):
     "SwathReadingProcessor": SwathReadingProcessor,
     "TimeSeriesReadingProcessor": TimeSeriesReadingProcessor,
     "TileSummarizingProcessor": TileSummarizingProcessor,
-    "WindDirSpeedToUV": WindDirSpeedToUV
+    "WindDirSpeedToUV": WindDirSpeedToUV,
+    "ExtractTimestampProcessor": ExtractTimestampProcessor
 }
+
diff --git a/sdap/processors/extracttimestampprocessor.py b/sdap/processors/extracttimestampprocessor.py
new file mode 100644
index 0000000..98b4147
--- /dev/null
+++ b/sdap/processors/extracttimestampprocessor.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nexusproto
+from nexusproto.serialization import from_shaped_array
+
+import datetime
+import time
+import logging
+from netCDF4 import Dataset, num2date
+from pytz import timezone
+
+from sdap.processors import NexusTileProcessor
+
+EPOCH = timezone('UTC').localize(datetime.datetime(1970, 1, 1))
+
+
+class BadTimestampExtractionException(Exception):
+    pass
+
+def to_seconds_from_epoch(timestamp, pattern):
+    try:
+        seconds = int(time.mktime(time.strptime(timestamp, pattern)))
+        return seconds
+    except ValueError:
+        logging.error('{} timestamp is not of the format {}'.format(timestamp, pattern))
+
+class ExtractTimestampProcessor(NexusTileProcessor):
+
+    def __init__(self, timestamp_name, timestamp_pattern, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.timestamp_name = timestamp_name
+        self.timestamp_pattern = timestamp_pattern
+
+    def process_nexus_tile(self, nexus_tile):
+        output_tile = nexusproto.DataTile_pb2.NexusTile()
+        output_tile.CopyFrom(nexus_tile)
+
+        file_path = output_tile.summary.granule
+        file_path = file_path[len('file:'):] if file_path.startswith('file:') else file_path
+
+        tile_type = nexus_tile.tile.WhichOneof("tile_type")
+
+        with Dataset(file_path) as ds:
+            timestamp = getattr(ds,self.timestamp_name)
+            seconds = to_seconds_from_epoch(timestamp, self.timestamp_pattern)
+
+            if tile_type == "grid_tile":
+                nexus_tile.tile.grid_tile.time = seconds
+            else:
+                raise BadTimestampExtractionException("Unsupported tile type: {}".format(tile_type))
+
+        yield nexus_tile
diff --git a/tests/datafiles/not_empty_gpm.HDF5 b/tests/datafiles/not_empty_gpm.HDF5
new file mode 100644
index 0000000..01397a7
Binary files /dev/null and b/tests/datafiles/not_empty_gpm.HDF5 differ
diff --git a/tests/datafiles/not_empty_modis.nc b/tests/datafiles/not_empty_modis.nc
new file mode 100644
index 0000000..11bc775
Binary files /dev/null and b/tests/datafiles/not_empty_modis.nc differ
diff --git a/tests/extracttimestamp_test.py b/tests/extracttimestamp_test.py
new file mode 100644
index 0000000..e2800ab
--- /dev/null
+++ b/tests/extracttimestamp_test.py
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+from os import path
+import logging
+
+import sdap.processors
+from nexusproto import DataTile_pb2 as nexusproto
+from sdap.processors.extracttimestampprocessor import BadTimestampExtractionException
+
+
+class TestExtractTimestamp(unittest.TestCase):
+    def setUp(self):
+        self.module = sdap.processors.ExtractTimestampProcessor('time_coverage_start', '%Y-%m-%dT%H:%M:%S.000Z')
+
+    def test_extract_timestamp_from_metadata(self):
+        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+        input_tile = nexusproto.NexusTile()
+        tile_summary = nexusproto.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        input_tile.tile.grid_tile.CopyFrom(nexusproto.GridTile())
+        results = list(self.module.process_nexus_tile(input_tile))
+        nexus_tile_after = results[0]
+
+        self.assertEqual(1537428301, nexus_tile_after.tile.grid_tile.time)
+
+    def test_extract_timestamp_swath_exception(self):
+        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+        input_tile = nexusproto.NexusTile()
+        tile_summary = nexusproto.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        input_tile.tile.swath_tile.CopyFrom(nexusproto.SwathTile())
+
+        with self.assertRaises(BadTimestampExtractionException):
+            list(self.module.process_nexus_tile(input_tile))[0].tile.swath_tile.time
+
+    def test_extract_timestamp_timeseries_exception(self):
+        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+        input_tile = nexusproto.NexusTile()
+        tile_summary = nexusproto.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        input_tile.tile.time_series_tile.CopyFrom(nexusproto.TimeSeriesTile())
+
+        with self.assertRaises(BadTimestampExtractionException):
+            list(self.module.process_nexus_tile(input_tile))[0].tile.time_series_tile.time
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services