You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2013/10/01 19:44:38 UTC
svn commit: r1528157 - in /incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts: weather_data_files.py weather_download_files.py

Author: prestonc
Date: Tue Oct  1 17:44:37 2013
New Revision: 1528157

URL: http://svn.apache.org/r1528157
Log:
Simplified some of the output so we are now using all custom code instead of items from stackoverflow.

Modified:
    incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
    incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py

Modified: incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py
URL: http://svn.apache.org/viewvc/incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py?rev=1528157&r1=1528156&r2=1528157&view=diff
==============================================================================
--- incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py (original)
+++ incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py Tue Oct  1 17:44:37 2013
@@ -175,7 +175,7 @@ class WeatherDataFiles:
         print "CSV DETAILS OF PROCESSED STATIONS"
         print "Number of stations:\t" + "{:,}".format(station_count)
         print "Number of files:\t" + "{:,}".format(file_count)
-        print "Data size:\t\t" + sizeof_fmt(data_size) + "\n"
+        print "Data size:\t\t" + "{:,}".format(data_size) + " Bytes\n"
 
         print "CSV DETAILS OF unPROCESSED STATIONS"
         print "Number of stations:\t" + "{:,}".format(station_count_missing) + "\n"
@@ -183,7 +183,7 @@ class WeatherDataFiles:
         print "FOLDER DETAILS"
         print "Number of stations:\t" + "{:,}".format(station_count_actual)
         print "Number of files:\t" + "{:,}".format(file_count_actual)
-        print "Data size:\t\t" + sizeof_fmt(data_size_actual) + "\n"
+        print "Data size:\t\t" + "{:,}".format(data_size_actual) + " Bytes\n"
 
     
     def get_progress_csv_row(self, file_name, status, file_count=-1, data_size=-1):
@@ -247,17 +247,3 @@ class WeatherDataFiles:
             if columns[self.INDEX_DATA_STATUS].strip() != self.DATA_FILE_CREATED:
                 break
         return columns[self.INDEX_DATA_FILE_NAME]
-
-# sizeof_fmt function is taken from an answer posted to stackoverflow.com.
-#
-# Question: 
-#   http://stackoverflow.com/questions/1094841
-# Answer Author: 
-#   http://stackoverflow.com/users/55246/sridhar-ratnakumar
-def sizeof_fmt(num):
-    for x in ['bytes', 'KB', 'MB', 'GB']:
-        if num < 1024.0 and num > -1024.0:
-            return "%3.1f%s" % (num, x)
-        num /= 1024.0
-    return "%3.1f%s" % (num, 'TB')
-    

Modified: incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py
URL: http://svn.apache.org/viewvc/incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py?rev=1528157&r1=1528156&r2=1528157&view=diff
==============================================================================
--- incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py (original)
+++ incubator/vxquery/trunk/vxquery/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py Tue Oct  1 17:44:37 2013
@@ -14,10 +14,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import sys
 import os.path
 import shutil
 import tarfile
-import urllib2
+import urllib
 
 # Custom modules.
 from weather_dly_config import *
@@ -42,37 +43,9 @@ class WeatherDownloadFiles:
         file_name = self.save_path + "/" + url.split('/')[-1]
 
         if not os.path.isfile(file_name) or reset:
-            download_file_with_status(url, file_name)
-
-    # download_file_with_status function is based on a question posted to
-    # stackoverflow.com.
-    #
-    # Question: 
-    #   http://stackoverflow.com/questions/22676
-    # Answer Authors: 
-    #   http://stackoverflow.com/users/394/pablog
-    #   http://stackoverflow.com/users/160206/bjorn-pollex
-    def download_file_with_status(self, url, file_name):
-        u = urllib2.urlopen(url)
-        f = open(file_name, 'wb')
-        meta = u.info()
-        file_size = int(meta.getheaders("Content-Length")[0])
-        print "Downloading: %s Bytes: %s" % (file_name, file_size)
-
-        file_size_dl = 0
-        block_sz = 8192
-        while True:
-            buffer = u.read(block_sz)
-            if not buffer:
-                break
-
-            file_size_dl += len(buffer)
-            f.write(buffer)
-            status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
-            status = status + chr(8) * (len(status) + 1)
-            print status,
-
-        f.close()
+            print "Downloading: " + url
+            urllib.urlretrieve(url, file_name, report_download_status)
+            print
 
     # Unzip the package file, unless it exists.
     def unzip_package(self, package, reset=False):
@@ -86,4 +59,21 @@ class WeatherDownloadFiles:
             print "Unzipping: " + file_name
             tar_file = tarfile.open(file_name, 'r:gz')
             tar_file.extractall(unzipped_path)
- 
\ No newline at end of file
+ 
+# Report download status.
+def report_download_status(count, block, size):
+    line_size = 20
+    erase = "\b" * line_size
+    sys.stdout.write(erase)
+    report = get_report_line( (float(count) * block) / size, line_size)
+    sys.stdout.write(report)
+
+def get_report_line(percentage, line_size):
+    report = ""
+    for i in range(0, line_size):
+        if (float(i) / line_size < percentage):
+            report += "="
+        else:
+            report += "-"
+    return report
+