You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openoffice.apache.org by hd...@apache.org on 2014/05/28 17:04:50 UTC

svn commit: r1598032 - /openoffice/devtools/aoo-stats/detail-by-day.py

Author: hdu
Date: Wed May 28 15:04:50 2014
New Revision: 1598032

URL: http://svn.apache.org/r1598032
Log:
split logic and data to make the script more maintainable

the logic itself and the resulting outputs remain unchanged

Modified:
    openoffice/devtools/aoo-stats/detail-by-day.py

Modified: openoffice/devtools/aoo-stats/detail-by-day.py
URL: http://svn.apache.org/viewvc/openoffice/devtools/aoo-stats/detail-by-day.py?rev=1598032&r1=1598031&r2=1598032&view=diff
==============================================================================
--- openoffice/devtools/aoo-stats/detail-by-day.py (original)
+++ openoffice/devtools/aoo-stats/detail-by-day.py Wed May 28 15:04:50 2014
@@ -1,290 +1,119 @@
-################################################################
-# 
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#  
-#    http://www.apache.org/licenses/LICENSE-2.0
-#  
-#  Unless required by applicable law or agreed to in writing,
-#  software distributed under the License is distributed on an
-#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#  KIND, either express or implied.  See the License for the
-#  specific language governing permissions and limitations
-#  under the License.
-#  
-################################################################
-
-
-# This script queries the SourceForce REST API for download statistics for
-# sets of files on SourceForge, for a range of dates, in ISO format (YYYY-MM-DD)
-# passed in as a command line argument.  The data, in CSV format is written to stdout
-
-
-import urllib
-import json
-import sys
-import datetime
-
-from urllib import urlencode
-
-def getSourceForgeStats(download, startDate, endDate):
-
-    url = download + "/stats/json?start_date=" + startDate + "&" "end_date=" + endDate
-
-    #print >> sys.stderr, url
-
-    attempts = 0
-
-    while attempts < 3:
-        try:    
-            conn = urllib.urlopen(url)
-            data = conn.read()
-            
-            return data
-
-        except:
-            attempts += 1
-            print url
-            print >> sys.stderr, "error " + download + "(" + str(attempts) + ")"
-
-    return ""
-    
-    
-if len(sys.argv) != 4:
-    print "syntax:  python detail-by-day.py <urls.lst> <start-date> <end-date>"
-    print "where <file.list> is a list of files URL's to gather stats on, and <start-date> and <end-date> are in YYYY-MM-DD format."
-    exit(-1)
-
-
-downloads = [line.strip() for line in open(sys.argv[1])]
-start_date =  datetime.datetime.strptime(sys.argv[2], '%Y-%m-%d')
-end_date = datetime.datetime.strptime(sys.argv[3], '%Y-%m-%d')
-
-print '"date","count_total","count_340","count_341","count_400","count_401","count_410","windows","mac","linux","linux32","linux64","deb","rpm","ar","ast","eu","zh_TW","zh_CN","cs","da","nl","en_GB","en_US","fi","fr","gd","gl","de","hu","it","ja","km","ko","lt","nb","pl","pt_BR","ru","sk","sl","es","sv","el","pt","ta","sr","tr","vi","he","bg","hi","th"'
-
-today = start_date
-
-while today <= end_date:
-
-    linux32 = 0
-    linux64 = 0
-    windows = 0
-    mac = 0
-    linux = 0
-    count_total = 0
-    count_340 = 0
-    count_341 = 0
-    count_400 = 0
-    count_401 = 0
-    count_410 = 0
-    deb = 0
-    rpm = 0
-
-    ar = 0
-    ast = 0 
-    eu = 0
-    zh_TW = 0
-    zh_CN = 0
-    cs = 0
-    da = 0
-    nl = 0
-    el = 0
-    en_GB = 0
-    en_US = 0
-    fi = 0
-    fr = 0
-    gd = 0
-    gl = 0
-    de = 0
-    hu = 0
-    it = 0
-    ja = 0
-    km = 0
-    ko = 0
-    lt = 0
-    nb = 0
-    pl = 0
-    pt = 0
-    pt_BR = 0
-    ru = 0
-    sk = 0
-    sl = 0
-    es = 0
-    sv = 0
-    ta = 0
-    sr = 0
-    tr = 0
-    vi = 0
-    he = 0
-    bg = 0
-    hi = 0
-    th = 0
-
-    date_string = today.strftime("%Y-%m-%d")
-
-    print >> sys.stderr, date_string
-
-    for download in downloads :
-        
-        try:
-            data = json.loads(getSourceForgeStats(download,date_string,date_string))
-            day_count = data["total"]
-        except ValueError:
-            data = ""
-            day_count = 0
-
-        count_total = count_total + day_count
-
-#versions
-
-        if download.find("3.4.0") != -1:
-            count_340 = count_340 + day_count
-
-        if download.find("3.4.1") != -1:
-            count_341 = count_341 + day_count
-
-        if download.find("4.0.0") != -1:
-            count_400 = count_400 + day_count
-
-        if download.find("4.0.1") != -1:
-            count_401 = count_401 + day_count
-
-        if download.find("4.1.0") != -1:
-            count_410 = count_410 + day_count
-
-
-
-#platforms
-
-        if download.find("Win_x86") != -1:
-            windows = windows + day_count
-
-        if download.find("MacOS") != -1:
-            mac = mac + day_count
-
-        if download.find("Linux") != -1:
-            linux = linux + day_count
-
-#architecture
-
-        if download.find("Linux_x86_") != -1:
-            linux32 = linux32 + day_count
-
-        if download.find("Linux_x86-64_") != -1:
-            linux64 = linux64 + day_count
-
-#packaging
-
-        if download.find("install-deb_") != -1:
-            deb = deb + day_count
-
-        if download.find("install-rpm_") != -1:
-            rpm = rpm + day_count
-
-#languages
-
-        if download.find("_ar.") != -1:
-            ar = ar + day_count
-        if download.find("_ast.") != -1:
-            ast = ast+ day_count
-        if download.find("_eu.") != -1:
-            eu = eu + day_count
-        if download.find("_zh-TW.") != -1:
-            zh_TW = zh_TW + day_count
-        if download.find("_zh-CN.") != -1:
-            zh_CN = zh_CN + day_count
-        if download.find("_cs.") != -1:
-            cs = cs + day_count
-        if download.find("_da.") != -1:
-            da = da + day_count
-        if download.find("_nl.") != -1:
-            nl = nl + day_count
-        if download.find("_el.") != -1:
-            el = el + day_count
-        if download.find("_en-GB.") != -1:
-            en_GB = en_GB + day_count
-        if download.find("_en-US.") != -1:
-            en_US = en_US + day_count
-        if download.find("_fi.") != -1:
-            fi = fi + day_count
-        if download.find("_fr.") != -1:
-            fr = fr + day_count
-        if download.find("_gd.") != -1:
-            gd = gd + day_count
-        if download.find("_gl.") != -1:
-            gl = gl + day_count
-        if download.find("_de.") != -1:
-            de = de + day_count
-        if download.find("_hu.") != -1:
-            hu = hu + day_count
-        if download.find("_it.") != -1:
-            it = it + day_count
-        if download.find("_ja.") != -1:
-            ja = ja + day_count
-        if download.find("_km.") != -1:
-            km = km + day_count
-        if download.find("_ko.") != -1:
-            ko = ko + day_count
-        if download.find("_lt.") != -1:
-            lt = lt + day_count
-        if download.find("_nb.") != -1:
-            nb = nb + day_count
-        if download.find("_pl.") != -1:
-            pl = pl + day_count
-        if download.find("_pt.") != -1:
-            pt = pt + day_count
-        if download.find("_pt-BR.") != -1:
-            pt_BR = pt_BR + day_count
-        if download.find("_ru.") != -1:
-            ru = ru + day_count
-        if download.find("_sk.") != -1:
-            sk = sk + day_count
-        if download.find("_sl.") != -1:
-            sl = sl + day_count
-        if download.find("_es.") != -1:
-            es = es + day_count
-        if download.find("_sv.") != -1:
-            sv = sv + day_count
-        if download.find("_ta.") != -1:
-            ta = ta + day_count
-        if download.find("_sr.") != -1:
-            sr = sr + day_count
-        if download.find("_tr.") != -1:
-            tr = tr + day_count
-        if download.find("_vi.") != -1:
-            vi = vi + day_count
-        if download.find("_he.") != -1:
-            he = he + day_count
-        if download.find("_bg.") != -1:
-            bg = bg + day_count
-        if download.find("_hi.") != -1:
-            hi = hi + day_count
-        if download.find("_th.") != -1:
-            th = th + day_count
-
-    print date_string + "," + str(count_total) + "," + str(count_340) + "," + str(count_341) + "," + str(count_400) + "," + str(count_401) + "," + str(count_410) + "," + \
-        str(windows) + "," + str(mac) + "," + str(linux) + "," + str(linux32) + "," + str(linux64) + "," + \
-        str(deb) + "," + str(rpm) + "," +  \
-        str(ar) + "," + str(ast) + "," + str(eu) + "," + str(zh_TW) + "," + \
-        str(zh_CN) + "," + str(cs) + "," + str(da) + "," + \
-        str(nl) + "," + str(en_GB) + "," + str(en_US) + "," + \
-        str(fi) + "," + str(fr) + "," + str(gd) + "," + \
-        str(gl) + "," + str(de) + "," + str(hu) + "," + \
-        str(it) + "," + str(ja) + "," + str(km) + "," + \
-        str(ko) + "," + str(lt) + "," + str(nb) + "," + str(pl) + "," + str(pt_BR) + "," + \
-        str(ru) + "," + str(sk) + "," + str(sl) + "," + \
-        str(es) + "," + str(sv) + "," + str(el) + "," + \
-        str(pt) + "," + str(ta) + "," + str(sr) + "," + str(tr) + "," + str(vi) + "," + str(he) + "," + str(bg)+ "," + str(hi)+ "," + str(th)
-
-
-    today += datetime.timedelta(days=1)
-
-     
-
-
-
+################################################################
+# 
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#  
+#    http://www.apache.org/licenses/LICENSE-2.0
+#  
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+#  
+################################################################
+
+
+# This script queries the SourceForce REST API for download statistics for
+# sets of files on SourceForge, for a range of dates, in ISO format (YYYY-MM-DD)
+# passed in as a command line argument.  The data, in CSV format is written to stdout
+
+
+import urllib
+import json
+import sys
+import datetime
+
+from urllib import urlencode
+
+def getSourceForgeStats(download, startDate, endDate):
+
+    url = download + "/stats/json?start_date=" + startDate + "&" "end_date=" + endDate
+
+    #print >> sys.stderr, url
+
+    attempts = 0
+
+    while attempts < 3:
+        try:    
+            conn = urllib.urlopen(url)
+            data = conn.read()
+            
+            return data
+
+        except:
+            attempts += 1
+            print url
+            print >> sys.stderr, "error " + download + "(" + str(attempts) + ")"
+
+    return ""
+    
+    
+if len(sys.argv) != 4:
+    print "syntax:  python detail-by-day.py <urls.lst> <start-date> <end-date>"
+    print "where <file.list> is a list of files URL's to gather stats on, and <start-date> and <end-date> are in YYYY-MM-DD format."
+    exit(-1)
+
+
+downloads = [line.strip() for line in open(sys.argv[1])]
+start_date =  datetime.datetime.strptime(sys.argv[2], '%Y-%m-%d')
+end_date = datetime.datetime.strptime(sys.argv[3], '%Y-%m-%d')
+
+# columns of interest
+columns = [ "count_total","count_340","count_341","count_400","count_401","count_410","windows","mac","linux","linux32","linux64","deb","rpm","ar","ast","eu","zh-TW","zh-CN","cs","da","nl","en-GB","en-US","fi","fr","gd","gl","de","hu","it","ja","km","ko","lt","nb","pl","pt-BR","ru","sk","sl","es","sv","el","pt","ta","sr","tr","vi","he","bg","hi","th"]
+
+# column counters are updated if the download name contains a matching pattern
+# The dictionary below maps the column names to these search patterns.
+# If there is no entry for a column then the pattern for language columns is assumed
+patternDict = {
+    "count_total" : "",
+    "count_340" : "3.4.0",
+    "count_341" : "3.4.1",
+    "count_400" : "4.0.0",
+    "count_401" : "4.0.1",
+    "count_410" : "4.1.0",
+    "windows"   : "Win_x86",
+    "mac"       : "MacOS",
+    "linux"     : "Linux",
+    "linux32"   : "Linux_x86_",
+    "linux64"   : "Linux_x86-64_",
+    "deb"       : "install-deb_",
+    "rpm"       : "install-rpm_"
+}
+
+
+print( '"date","' + '","'.join(columns) + '"')
+
+today = start_date
+
+while today <= end_date:
+
+    counts = dict( [(c,0) for c in columns])
+
+    date_string = today.strftime("%Y-%m-%d")
+    print >> sys.stderr, date_string
+
+    for download in downloads :
+        
+        try:
+            data = json.loads(getSourceForgeStats(download,date_string,date_string))
+            day_count = data["total"]
+        except ValueError:
+            continue
+
+        # update the per column counts
+        for c in columns:
+            pattern = patternDict[c] if c in patternDict else ("_%s." % c)
+            if download.find(pattern) != -1:
+               	counts[c] += day_count
+
+    print( date_string + ',' + ','.join( [str(counts[c]) for c in columns]))
+
+    today += datetime.timedelta(days=1)
+