You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bluemarlin.apache.org by ra...@apache.org on 2022/02/18 01:21:15 UTC

[incubator-bluemarlin] branch main updated: refactoring

This is an automated email from the ASF dual-hosted git repository.

radibnia pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-bluemarlin.git


The following commit(s) were added to refs/heads/main by this push:
     new b15ce89  refactoring
     new f75442f  Merge pull request #50 from radibnia77/main
b15ce89 is described below

commit b15ce89611a2c3b74d1a296a88cbe714da7e5f4b
Author: Reza <re...@yahoo.com>
AuthorDate: Thu Feb 17 17:19:53 2022 -0800

    refactoring
---
 Model/predictor-dl-model/VERSION.md                |  16 +-
 .../{tests/trainer => docs}/README.md              |   0
 .../{tests/pipeline => docs/design}/README.md      |   0
 .../{scripts => docs/release}/README.md            |   0
 .../docs/test-reports/01012022.txt                 |  41 +++
 .../readme.txt => docs/test-reports/README.md}     |   0
 .../checkpoints/feeder.cpt.data-00000-of-00001     | Bin 86915721 -> 0 bytes
 .../experiments/checkpoints/feeder.cpt.index       | Bin 537 -> 0 bytes
 .../experiments/checkpoints/feeder_meta.pkl        | Bin 740 -> 0 bytes
 .../models/reza-01062022/readme.txt.txt            |   1 -
 Model/predictor-dl-model/setup.py                  |   2 +-
 .../{ => tests}/datagen/README.md                  |   0
 .../{ => tests}/datagen/algorithm.txt              |   0
 .../{ => tests}/datagen/generate_traffic.py        |   0
 .../{ => tests}/datagen/generate_user_file.py      |   0
 .../tests/datagen/region_mapping.md                | 374 +++++++++++++++++++++
 .../{ => tests}/datagen/slice_factdata.py          |   0
 .../{ => tests}/datagen/users.txt                  |   0
 .../7day_variance_uckey_weight_in_slotid.py        |  32 +-
 .../predictor-dl-model/tests/experiments/README.md |   3 +
 .../{ => tests}/experiments/aggregate_impr_data.py |   3 -
 .../{trainer => experiments/models}/README.md      |   0
 .../models/model-01062022}/20211222/saved_model.pb | Bin
 .../variables/variables.data-00000-of-00001        | Bin
 .../20211222/variables/variables.index             | Bin
 .../models/model-01062022/story.txt.txt            |   2 +
 .../tests/{trainer => scripts}/README.md           |   0
 .../{ => tests}/scripts/check_stable_si.py         |   0
 .../{ => tests}/scripts/import_factdata_files.py   |   0
 .../{ => tests}/scripts/import_factdata_files_1.py |   0
 .../{ => tests}/scripts/trainready_data_cmp.py     |   0
 .../{ => tests}/troubleshooting/check_model.py     |  17 +-
 .../{ => tests}/troubleshooting/check_pipeline.py  |   0
 .../{ => tests}/troubleshooting/client_rest_dl2.py |   0
 .../tests/troubleshooting/client_rest_dl2.pyc      | Bin 0 -> 20567 bytes
 .../{ => tests}/troubleshooting/get_model_diff.py  |   0
 .../troubleshooting/get_model_output.py            |   0
 .../{trainer => unit-tests/pipeline}/README.md     |   0
 .../tests/{ => unit-tests}/pipeline/config.yml     |   0
 .../{ => unit-tests}/pipeline/data/__init__.py     |   0
 .../tests/unit-tests/pipeline/data/__init__.pyc    | Bin 0 -> 170 bytes
 .../{ => unit-tests}/pipeline/data/test_set.py     |   0
 .../tests/unit-tests/pipeline/data/test_set.pyc    | Bin 0 -> 5338 bytes
 .../tests/{ => unit-tests}/pipeline/test_base.py   |   0
 .../{ => unit-tests}/pipeline/test_base_hive_db.py |   0
 .../pipeline/test_factdata_health.py               |   0
 .../{ => unit-tests}/pipeline/test_main_cluster.py |   0
 .../pipeline/test_main_filter_si_region_bucket.py  |   0
 .../{ => unit-tests}/pipeline/test_main_norm.py    |   0
 .../{ => unit-tests}/pipeline/test_main_ts.py      |   0
 .../tests/{ => unit-tests}/pipeline/test_util.py   |   0
 .../tests/{ => unit-tests}/trainer/README.md       |   0
 .../trainer/api_test/client_rest_api_test.py       |   0
 .../trainer/sample_model/00000123/saved_model.pb   | Bin
 .../variables/variables.data-00000-of-00001        | Bin
 .../00000123/variables/variables.index             | Bin
 .../trainer/sample_model/readme.txt                |   0
 Processes/dlpredictor/VERSION.md                   |   8 +-
 .../dlpredictor/docs}/README.md                    |   0
 .../dlpredictor/docs/design}/README.md             |   0
 .../dlpredictor/docs/release}/README.md            |   0
 .../dlpredictor/docs/test-reports/01012022.txt     |   7 +
 .../dlpredictor/docs/test-reports}/README.md       |   0
 Processes/dlpredictor/setup.py                     |   2 +-
 Processes/dlpredictor/test.sh                      |  16 -
 .../config.yml                                     |  54 +++
 .../main_spark_data_process.py                     |  65 +++-
 .../main_spark_predict_es.py                       | 122 ++++---
 .../sparkesutil.py                                 |   0
 .../experiments/elasticsearch-hadoop-6.8.0.jar     | Bin
 .../experiments/scripts/test_spark_es_big_write.py |   0
 .../experiments/scripts/test_spark_es_write.py     |   0
 .../scripts}/dense_sparse_count.py                 |   0
 .../scripts}/dense_sparse_uckeys.py                |   0
 .../scripts}/fact_data_count.py                    |   0
 .../scripts}/filter_si_tmp_area.py                 |   0
 .../scripts}/imp_of_traffic.py                     |   0
 .../si_traffic_prediction_check_2.py               |   4 +-
 .../dlpredictor/tests/unit-tests}/README.md        |   0
 Processes/dlpredictor/util.sh                      |  20 --
 80 files changed, 656 insertions(+), 133 deletions(-)

diff --git a/Model/predictor-dl-model/VERSION.md b/Model/predictor-dl-model/VERSION.md
index 5cc2f09..18590e5 100644
--- a/Model/predictor-dl-model/VERSION.md
+++ b/Model/predictor-dl-model/VERSION.md
@@ -1,4 +1,4 @@
-### 1.5
+### 0.1.1
 1. Project structure has been changed slightly. Folders tests and datagen are moved outside of main package.
 2. Factdata Health-Check test is added to the tests.
 3. A step to sanitize Factdata is added to the pipeline. This step is to:
@@ -6,12 +6,10 @@
     b. Remove region
     c. Remaps ip based on ip-mapping table
     d. Recalculate bucket-id
+4. Add TAG to config file. The whole set of tmp tables are named by product_tag and pipeline_tag. The user does not need to review the name of those tables anymore. 
+5. Remove residency from UCKey. The value of residency is repleace by an empty string. The number of commas are still the same.
+6. Remove region mapping for IPL.
+7. Remove normalization of residency and IPL in main_norm.
 
-### 1.6
-1. Add region and IPL features
-2. Add TAG to config file. The whole set of tmp tables are named by product_tag and pipeline_tag. The user does not need to review the name of those tables anymore. 
-
-### 1.7
-1. Remove residency from UCKey. The value of residency is repleace by an empty string. The number of commas are still the same.
-2. Remove region mapping for IPL.
-3. Remove normalization of residency and IPL in main_norm.
\ No newline at end of file
+### 0.1.2
+1. Add docs folder
\ No newline at end of file
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Model/predictor-dl-model/docs/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Model/predictor-dl-model/docs/README.md
diff --git a/Model/predictor-dl-model/tests/pipeline/README.md b/Model/predictor-dl-model/docs/design/README.md
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/README.md
rename to Model/predictor-dl-model/docs/design/README.md
diff --git a/Model/predictor-dl-model/scripts/README.md b/Model/predictor-dl-model/docs/release/README.md
similarity index 100%
rename from Model/predictor-dl-model/scripts/README.md
rename to Model/predictor-dl-model/docs/release/README.md
diff --git a/Model/predictor-dl-model/docs/test-reports/01012022.txt b/Model/predictor-dl-model/docs/test-reports/01012022.txt
new file mode 100644
index 0000000..695ce16
--- /dev/null
+++ b/Model/predictor-dl-model/docs/test-reports/01012022.txt
@@ -0,0 +1,41 @@
+The following are some data from the pipeline's check-points.
+
+Area_map:
+•	Record number: 2,025,819,694
+•	Number of Bucket_id:  10
+•	Total number of days: 91 days (2021-06-28 is missing)
+•	First day: 2021-05-01
+•	Last day: 2021-07-31
+
+Note: only 82 days are used for the trainer, see Trainready.
+
+Ts:
+•	Number of Uckeys: 1,289,460
+•	Length of ts is 82 days, from May 1st to July 21st 
+•	No buckets 
+
+Pre_cluster:
+•	Number of Uckeys: 559,442
+•	Sparse uckey : 406,957
+•	Dense uckey: 152,485
+•	Length of ts is 82 days, from May 1st to July 21st
+
+Cluster:
+•	Number of Uckey: 154,780
+
+Trainready:
+•	Number of Uckey: 154,780
+•	Length of ts is 82 days, from May 1st to July 21st 
+•	No bucket
+•	Total traffic: 15,864,359,742
+
+Trainer
+The SMAPE error rate of the model is 11%.
+
+Predictor
+The evaluation script (in dlpredictor process) shows aggregated error rate of 5% for prediction.
+This error rate is traffic-weighted average for all valid slots.
+
+traffic(si=A)xprediction-error(si=A) + traffic(si=C)xprediction-error(si=B) + traffic(si=C)xprediction-error(si=C)
+--------------------------------------------------------------------------------------------------------------------
+                                    traffic(si=A) + traffic(si=B) + traffic(si=C)
diff --git a/Model/predictor-dl-model/tests/trainer/sample_model/readme.txt b/Model/predictor-dl-model/docs/test-reports/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/sample_model/readme.txt
copy to Model/predictor-dl-model/docs/test-reports/README.md
diff --git a/Model/predictor-dl-model/experiments/checkpoints/feeder.cpt.data-00000-of-00001 b/Model/predictor-dl-model/experiments/checkpoints/feeder.cpt.data-00000-of-00001
deleted file mode 100644
index 47a73e9..0000000
Binary files a/Model/predictor-dl-model/experiments/checkpoints/feeder.cpt.data-00000-of-00001 and /dev/null differ
diff --git a/Model/predictor-dl-model/experiments/checkpoints/feeder.cpt.index b/Model/predictor-dl-model/experiments/checkpoints/feeder.cpt.index
deleted file mode 100644
index 3388893..0000000
Binary files a/Model/predictor-dl-model/experiments/checkpoints/feeder.cpt.index and /dev/null differ
diff --git a/Model/predictor-dl-model/experiments/checkpoints/feeder_meta.pkl b/Model/predictor-dl-model/experiments/checkpoints/feeder_meta.pkl
deleted file mode 100644
index a9fc062..0000000
Binary files a/Model/predictor-dl-model/experiments/checkpoints/feeder_meta.pkl and /dev/null differ
diff --git a/Model/predictor-dl-model/experiments/models/reza-01062022/readme.txt.txt b/Model/predictor-dl-model/experiments/models/reza-01062022/readme.txt.txt
deleted file mode 100644
index 39e244e..0000000
--- a/Model/predictor-dl-model/experiments/models/reza-01062022/readme.txt.txt
+++ /dev/null
@@ -1 +0,0 @@
-This a model I created with your tfrecrods.
\ No newline at end of file
diff --git a/Model/predictor-dl-model/setup.py b/Model/predictor-dl-model/setup.py
index 41d3664..b33f6fa 100644
--- a/Model/predictor-dl-model/setup.py
+++ b/Model/predictor-dl-model/setup.py
@@ -21,7 +21,7 @@ with open("README.md", "r") as fh:
 
 setuptools.setup(
     name="predictor_dl_model", # This is the package name.
-    version="1.6.0",
+    version="0.1.2",
     author="Reza Adibnia",
     author_email="radibnia@futurewei.com",
     description="All the packages required for running predictor pipeline",
diff --git a/Model/predictor-dl-model/datagen/README.md b/Model/predictor-dl-model/tests/datagen/README.md
similarity index 100%
rename from Model/predictor-dl-model/datagen/README.md
rename to Model/predictor-dl-model/tests/datagen/README.md
diff --git a/Model/predictor-dl-model/datagen/algorithm.txt b/Model/predictor-dl-model/tests/datagen/algorithm.txt
similarity index 100%
rename from Model/predictor-dl-model/datagen/algorithm.txt
rename to Model/predictor-dl-model/tests/datagen/algorithm.txt
diff --git a/Model/predictor-dl-model/datagen/generate_traffic.py b/Model/predictor-dl-model/tests/datagen/generate_traffic.py
similarity index 100%
rename from Model/predictor-dl-model/datagen/generate_traffic.py
rename to Model/predictor-dl-model/tests/datagen/generate_traffic.py
diff --git a/Model/predictor-dl-model/datagen/generate_user_file.py b/Model/predictor-dl-model/tests/datagen/generate_user_file.py
similarity index 100%
rename from Model/predictor-dl-model/datagen/generate_user_file.py
rename to Model/predictor-dl-model/tests/datagen/generate_user_file.py
diff --git a/Model/predictor-dl-model/tests/datagen/region_mapping.md b/Model/predictor-dl-model/tests/datagen/region_mapping.md
new file mode 100644
index 0000000..f05442a
--- /dev/null
+++ b/Model/predictor-dl-model/tests/datagen/region_mapping.md
@@ -0,0 +1,374 @@
+
+### Region Mapping
+
+This a mapping of regions to have fewer and more populated regions.
+region-id --> new region-id
+
+1,1
+2,2
+3,3
+4,4
+5,5
+6,6
+7,7
+8,8
+9,9
+10,10
+11,11
+12,12
+13,13
+14,14
+15,15
+16,16
+17,17
+18,18
+19,19
+20,20
+21,21
+22,22
+23,23
+24,24
+25,25
+26,26
+27,27
+28,28
+29,29
+30,30
+31,31
+32,32
+33,33
+34,34
+35,35
+36,36
+37,37
+38,38
+39,39
+40,40
+41,41
+42,42
+43,43
+44,44
+45,45
+46,46
+47,47
+48,48
+49,49
+50,50
+51,51
+52,52
+53,53
+54,54
+55,55
+56,56
+57,57
+58,58
+59,59
+60,60
+61,61
+62,62
+63,63
+64,64
+65,65
+66,66
+67,67
+68,68
+69,69
+70,69
+71,69
+72,69
+73,69
+74,69
+75,69
+76,69
+77,69
+78,70
+79,70
+80,70
+81,70
+82,70
+83,70
+84,70
+85,70
+86,70
+87,70
+88,70
+89,70
+90,70
+91,70
+92,70
+93,70
+94,70
+95,70
+96,70
+97,70
+98,70
+99,70
+100,70
+101,71
+102,71
+103,71
+104,71
+105,71
+106,71
+107,71
+108,71
+109,71
+110,71
+111,71
+112,71
+113,71
+114,71
+115,71
+116,71
+117,71
+118,71
+119,71
+120,71
+121,71
+122,71
+123,71
+124,72
+125,72
+126,72
+127,72
+128,72
+129,72
+130,72
+131,72
+132,72
+133,72
+134,72
+135,72
+136,72
+137,72
+138,72
+139,72
+140,72
+141,72
+142,72
+143,72
+144,72
+145,72
+146,72
+147,72
+148,72
+149,72
+150,72
+151,73
+152,73
+153,73
+154,73
+155,74
+156,74
+157,74
+158,74
+159,74
+160,74
+161,74
+162,74
+163,74
+164,74
+165,74
+166,74
+167,74
+168,74
+169,74
+170,74
+171,74
+172,74
+173,74
+174,74
+175,74
+176,74
+177,74
+178,75
+179,75
+180,75
+181,75
+182,75
+183,75
+184,75
+185,76
+186,76
+187,76
+188,76
+189,76
+190,76
+191,76
+192,76
+193,76
+194,76
+195,76
+196,76
+197,76
+198,76
+199,76
+200,76
+201,76
+202,76
+203,76
+204,76
+205,76
+206,76
+207,76
+208,76
+209,76
+210,76
+211,76
+212,76
+213,76
+214,77
+215,77
+216,77
+217,77
+218,77
+219,77
+220,77
+221,77
+222,78
+223,78
+224,78
+225,78
+226,78
+227,78
+228,78
+229,78
+230,78
+231,78
+232,78
+233,78
+234,78
+235,78
+236,78
+237,78
+238,78
+239,78
+240,78
+241,78
+242,79
+243,79
+244,79
+245,80
+246,80
+247,80
+248,80
+249,80
+250,80
+251,80
+252,80
+253,80
+254,80
+255,80
+256,80
+257,80
+258,80
+259,80
+260,80
+261,80
+262,80
+263,80
+264,80
+265,80
+266,80
+267,80
+268,80
+269,80
+270,80
+271,80
+272,80
+273,80
+274,80
+275,80
+276,80
+277,80
+278,80
+279,80
+280,80
+281,80
+282,80
+283,80
+284,80
+285,80
+286,80
+287,80
+288,80
+289,80
+290,80
+291,80
+292,80
+293,80
+294,80
+295,81
+296,81
+297,81
+298,81
+299,81
+300,81
+301,81
+302,81
+303,81
+304,81
+305,81
+306,81
+307,81
+308,81
+309,81
+310,81
+311,81
+312,81
+313,81
+314,81
+315,81
+316,81
+317,81
+318,81
+319,81
+320,81
+321,81
+322,81
+323,81
+324,81
+325,81
+326,81
+327,81
+328,81
+329,81
+330,81
+331,81
+332,81
+333,81
+334,81
+335,81
+336,81
+337,81
+338,81
+339,81
+340,82
+341,82
+342,82
+343,82
+344,82
+345,82
+346,82
+347,82
+348,82
+349,82
+350,82
+351,82
+352,82
+353,82
+354,82
+355,82
+356,82
+357,82
+358,82
+359,82
+360,82
+361,82
+362,82
+363,82
+364,82
+365,82
+366,82
+367,82
+368,82
diff --git a/Model/predictor-dl-model/datagen/slice_factdata.py b/Model/predictor-dl-model/tests/datagen/slice_factdata.py
similarity index 100%
rename from Model/predictor-dl-model/datagen/slice_factdata.py
rename to Model/predictor-dl-model/tests/datagen/slice_factdata.py
diff --git a/Model/predictor-dl-model/datagen/users.txt b/Model/predictor-dl-model/tests/datagen/users.txt
similarity index 100%
rename from Model/predictor-dl-model/datagen/users.txt
rename to Model/predictor-dl-model/tests/datagen/users.txt
diff --git a/Model/predictor-dl-model/experiments/7day_variance_uckey_weight_in_slotid.py b/Model/predictor-dl-model/tests/experiments/7day_variance_uckey_weight_in_slotid.py
similarity index 57%
rename from Model/predictor-dl-model/experiments/7day_variance_uckey_weight_in_slotid.py
rename to Model/predictor-dl-model/tests/experiments/7day_variance_uckey_weight_in_slotid.py
index d5d9731..6667684 100644
--- a/Model/predictor-dl-model/experiments/7day_variance_uckey_weight_in_slotid.py
+++ b/Model/predictor-dl-model/tests/experiments/7day_variance_uckey_weight_in_slotid.py
@@ -1,10 +1,11 @@
-from pyspark import SparkContext, SparkConf,SQLContext
+from pyspark import SparkContext, SparkConf, SQLContext
 from pyspark.sql.functions import count, lit, col, udf, expr, collect_list, explode
-from pyspark.sql.types import IntegerType, StringType, MapType, ArrayType, BooleanType,FloatType
+from pyspark.sql.types import IntegerType, StringType, MapType, ArrayType, BooleanType, FloatType
 from pyspark.sql import HiveContext
 from datetime import datetime, timedelta
 from pyspark.sql.functions import broadcast
 
+
 def _list_to_map(count_array):
     count_map = {}
     for item in count_array:
@@ -20,15 +21,15 @@ def add_count_map(df):
     df = df.withColumn('count_map', list_to_map_udf(df.count_array))
     return df
 
+
 def variance(plist):
-    l=len(plist)
-    ex=sum(plist)/l
-    ex2=sum([i*i for i in plist])/l
+    l = len(plist)
+    ex = sum(plist)/l
+    ex2 = sum([i*i for i in plist])/l
     return ex2-ex*ex
 
 
-
-query="select count_array,day,uckey from factdata where day in ('2020-05-15','2020-05-14','2020-05-13','2020-05-12','2020-05-11','2020-05-10','2020-05-09')"
+query = "select count_array,day,uckey from factdata where day in ('2020-05-15','2020-05-14','2020-05-13','2020-05-12','2020-05-11','2020-05-10','2020-05-09')"
 sc = SparkContext()
 hive_context = HiveContext(sc)
 
@@ -43,21 +44,20 @@ df = df.groupBy('uckey', 'day').sum('impr_count').withColumnRenamed("sum(impr_co
 
 split_uckey_udf = udf(lambda x: x.split(","), ArrayType(StringType()))
 df = df.withColumn('col', split_uckey_udf(df.uckey))
-df = df.select('uckey','impr_count', 'day', df.col[1]).withColumnRenamed("col[1]", 'slot_id')
+df = df.select('uckey', 'impr_count', 'day', df.col[1]).withColumnRenamed("col[1]", 'slot_id')
 
 
-df_slot=df.select('slot_id','impr_count', 'day')
-df_slot=df_slot.groupBy('slot_id','day').sum('impr_count').withColumnRenamed("sum(impr_count)", "impr_total")
+df_slot = df.select('slot_id', 'impr_count', 'day')
+df_slot = df_slot.groupBy('slot_id', 'day').sum('impr_count').withColumnRenamed("sum(impr_count)", "impr_total")
 bc_df_slot = broadcast(df_slot)
 
-df_new = df.join(bc_df_slot, on=["slot_id",'day'],how="inner")
+df_new = df.join(bc_df_slot, on=["slot_id", 'day'], how="inner")
 
-df_new = df_new.withColumn('percent', udf(lambda x,y: (x*100)/y, FloatType())(df_new.impr_count,df_new.impr_total))
+df_new = df_new.withColumn('percent', udf(lambda x, y: (x*100)/y, FloatType())(df_new.impr_count, df_new.impr_total))
 
 
-df2=df_new.groupBy("uckey").agg(collect_list('percent').alias('percent'))
+df2 = df_new.groupBy("uckey").agg(collect_list('percent').alias('percent'))
 df2 = df2.withColumn('var', udf(lambda x: variance(x), FloatType())(df2.percent))
-df2.select("uckey","var").orderBy(["var"],ascending=False).show(300,truncate=False)
+df2.select("uckey", "var").orderBy(["var"], ascending=False).show(300, truncate=False)
 df2.cache()
-print("% uckeys having varience > 0.01  ",df2.filter((df2.var <= 0.01)).count()*100/df2.count())
-
+print("% uckeys having varience > 0.01  ", df2.filter((df2.var <= 0.01)).count()*100/df2.count())
diff --git a/Model/predictor-dl-model/tests/experiments/README.md b/Model/predictor-dl-model/tests/experiments/README.md
new file mode 100644
index 0000000..8753993
--- /dev/null
+++ b/Model/predictor-dl-model/tests/experiments/README.md
@@ -0,0 +1,3 @@
+This folder contains experiments.
+Every individual experiment has to be in a separate folder.
+Every experiment shall have story file.
\ No newline at end of file
diff --git a/Model/predictor-dl-model/experiments/aggregate_impr_data.py b/Model/predictor-dl-model/tests/experiments/aggregate_impr_data.py
similarity index 97%
rename from Model/predictor-dl-model/experiments/aggregate_impr_data.py
rename to Model/predictor-dl-model/tests/experiments/aggregate_impr_data.py
index 19b284b..78d4018 100644
--- a/Model/predictor-dl-model/experiments/aggregate_impr_data.py
+++ b/Model/predictor-dl-model/tests/experiments/aggregate_impr_data.py
@@ -1,6 +1,3 @@
-import yaml
-import argparse
-from datetime import datetime
 from pyspark import SparkContext
 from pyspark.sql.functions import count, lit, col, udf, collect_list, explode, sqrt, mean
 from pyspark.sql.types import IntegerType, StringType, MapType, ArrayType, BooleanType, FloatType
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Model/predictor-dl-model/tests/experiments/models/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Model/predictor-dl-model/tests/experiments/models/README.md
diff --git a/Model/predictor-dl-model/experiments/models/reza-01062022/20211222/saved_model.pb b/Model/predictor-dl-model/tests/experiments/models/model-01062022/20211222/saved_model.pb
similarity index 100%
rename from Model/predictor-dl-model/experiments/models/reza-01062022/20211222/saved_model.pb
rename to Model/predictor-dl-model/tests/experiments/models/model-01062022/20211222/saved_model.pb
diff --git a/Model/predictor-dl-model/experiments/models/reza-01062022/20211222/variables/variables.data-00000-of-00001 b/Model/predictor-dl-model/tests/experiments/models/model-01062022/20211222/variables/variables.data-00000-of-00001
similarity index 100%
rename from Model/predictor-dl-model/experiments/models/reza-01062022/20211222/variables/variables.data-00000-of-00001
rename to Model/predictor-dl-model/tests/experiments/models/model-01062022/20211222/variables/variables.data-00000-of-00001
diff --git a/Model/predictor-dl-model/experiments/models/reza-01062022/20211222/variables/variables.index b/Model/predictor-dl-model/tests/experiments/models/model-01062022/20211222/variables/variables.index
similarity index 100%
rename from Model/predictor-dl-model/experiments/models/reza-01062022/20211222/variables/variables.index
rename to Model/predictor-dl-model/tests/experiments/models/model-01062022/20211222/variables/variables.index
diff --git a/Model/predictor-dl-model/tests/experiments/models/model-01062022/story.txt.txt b/Model/predictor-dl-model/tests/experiments/models/model-01062022/story.txt.txt
new file mode 100644
index 0000000..b2638f5
--- /dev/null
+++ b/Model/predictor-dl-model/tests/experiments/models/model-01062022/story.txt.txt
@@ -0,0 +1,2 @@
+This a model I created with your 3rd party tfrecrods.
+The model performance was acceptable.
\ No newline at end of file
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Model/predictor-dl-model/tests/scripts/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Model/predictor-dl-model/tests/scripts/README.md
diff --git a/Model/predictor-dl-model/scripts/check_stable_si.py b/Model/predictor-dl-model/tests/scripts/check_stable_si.py
similarity index 100%
rename from Model/predictor-dl-model/scripts/check_stable_si.py
rename to Model/predictor-dl-model/tests/scripts/check_stable_si.py
diff --git a/Model/predictor-dl-model/scripts/import_factdata_files.py b/Model/predictor-dl-model/tests/scripts/import_factdata_files.py
similarity index 100%
rename from Model/predictor-dl-model/scripts/import_factdata_files.py
rename to Model/predictor-dl-model/tests/scripts/import_factdata_files.py
diff --git a/Model/predictor-dl-model/scripts/import_factdata_files_1.py b/Model/predictor-dl-model/tests/scripts/import_factdata_files_1.py
similarity index 100%
rename from Model/predictor-dl-model/scripts/import_factdata_files_1.py
rename to Model/predictor-dl-model/tests/scripts/import_factdata_files_1.py
diff --git a/Model/predictor-dl-model/scripts/trainready_data_cmp.py b/Model/predictor-dl-model/tests/scripts/trainready_data_cmp.py
similarity index 100%
rename from Model/predictor-dl-model/scripts/trainready_data_cmp.py
rename to Model/predictor-dl-model/tests/scripts/trainready_data_cmp.py
diff --git a/Model/predictor-dl-model/troubleshooting/check_model.py b/Model/predictor-dl-model/tests/troubleshooting/check_model.py
similarity index 98%
rename from Model/predictor-dl-model/troubleshooting/check_model.py
rename to Model/predictor-dl-model/tests/troubleshooting/check_model.py
index 387e458..81144d7 100644
--- a/Model/predictor-dl-model/troubleshooting/check_model.py
+++ b/Model/predictor-dl-model/tests/troubleshooting/check_model.py
@@ -212,7 +212,7 @@ def run(cfg, hive_context):
     local = False
     if not local:
         df_trainready = hive_context.sql(
-            'SELECT * FROM {}'.format(cfg['trainready_table']))
+            'SELECT * FROM {} WHERE uckey="native,b6le0s4qo8,4G,g_f,5,CPC,,1156320000" and price_cat="1" '.format(cfg['trainready_table']))
         df_dist = hive_context.sql(
             'SELECT * FROM {} WHERE ratio=1'.format(cfg['dist_table']))
         df = df_trainready.join(
@@ -261,24 +261,23 @@ def run(cfg, hive_context):
                 predicted[i] = 0
         print(zip(expected, predicted))
         e = error_m(expected, predicted)[0]
-        print(e)
+        print(e*100)
         if e < 0:
             e = 0
         errs.append(e)
 
-    print(sum(errs)/(len(errs)*1.0))
+    print(sum(errs)/(len(errs)*1.0)*100)
 
 
 if __name__ == '__main__':
 
     cfg = {
         'log_level': 'warn',
-        'trainready_table': 'dlpm_110221_no_residency_no_mapping_trainready',
-        'dist_table': 'dlpm_110221_no_residency_no_mapping_tmp_distribution',
-        'serving_url': 'http://10.193.217.105:8506/v1/models/dl_no_mapped_ipl:predict',
-        'sample_ratio': 1,
-        'max_calls': 1000,
-        'model_stat_table': 'dlpm_110221_no_residency_no_mapping_model_stat',
+        'trainready_table': 'dlpm_111021_no_residency_no_mapping_trainready_test_12212021',
+        'dist_table': 'dlpm_111021_no_residency_no_mapping_tmp_distribution_test_12212021',
+        'serving_url': 'http://10.193.217.126:8503/v1/models/dl_test_1221:predict',
+        'max_calls': 4,
+        'model_stat_table': 'dlpm_111021_no_residency_no_mapping_model_stat_test_12212021',
         'yesterday': 'WILL BE SET IN PROGRAM'}
 
     sc = SparkContext.getOrCreate()
diff --git a/Model/predictor-dl-model/troubleshooting/check_pipeline.py b/Model/predictor-dl-model/tests/troubleshooting/check_pipeline.py
similarity index 100%
rename from Model/predictor-dl-model/troubleshooting/check_pipeline.py
rename to Model/predictor-dl-model/tests/troubleshooting/check_pipeline.py
diff --git a/Model/predictor-dl-model/troubleshooting/client_rest_dl2.py b/Model/predictor-dl-model/tests/troubleshooting/client_rest_dl2.py
similarity index 100%
rename from Model/predictor-dl-model/troubleshooting/client_rest_dl2.py
rename to Model/predictor-dl-model/tests/troubleshooting/client_rest_dl2.py
diff --git a/Model/predictor-dl-model/tests/troubleshooting/client_rest_dl2.pyc b/Model/predictor-dl-model/tests/troubleshooting/client_rest_dl2.pyc
new file mode 100644
index 0000000..20cb92f
Binary files /dev/null and b/Model/predictor-dl-model/tests/troubleshooting/client_rest_dl2.pyc differ
diff --git a/Model/predictor-dl-model/troubleshooting/get_model_diff.py b/Model/predictor-dl-model/tests/troubleshooting/get_model_diff.py
similarity index 100%
rename from Model/predictor-dl-model/troubleshooting/get_model_diff.py
rename to Model/predictor-dl-model/tests/troubleshooting/get_model_diff.py
diff --git a/Model/predictor-dl-model/troubleshooting/get_model_output.py b/Model/predictor-dl-model/tests/troubleshooting/get_model_output.py
similarity index 100%
rename from Model/predictor-dl-model/troubleshooting/get_model_output.py
rename to Model/predictor-dl-model/tests/troubleshooting/get_model_output.py
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Model/predictor-dl-model/tests/unit-tests/pipeline/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Model/predictor-dl-model/tests/unit-tests/pipeline/README.md
diff --git a/Model/predictor-dl-model/tests/pipeline/config.yml b/Model/predictor-dl-model/tests/unit-tests/pipeline/config.yml
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/config.yml
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/config.yml
diff --git a/Model/predictor-dl-model/tests/pipeline/data/__init__.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/data/__init__.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/data/__init__.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/data/__init__.py
diff --git a/Model/predictor-dl-model/tests/unit-tests/pipeline/data/__init__.pyc b/Model/predictor-dl-model/tests/unit-tests/pipeline/data/__init__.pyc
new file mode 100644
index 0000000..1bf0c86
Binary files /dev/null and b/Model/predictor-dl-model/tests/unit-tests/pipeline/data/__init__.pyc differ
diff --git a/Model/predictor-dl-model/tests/pipeline/data/test_set.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/data/test_set.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/data/test_set.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/data/test_set.py
diff --git a/Model/predictor-dl-model/tests/unit-tests/pipeline/data/test_set.pyc b/Model/predictor-dl-model/tests/unit-tests/pipeline/data/test_set.pyc
new file mode 100644
index 0000000..f666e77
Binary files /dev/null and b/Model/predictor-dl-model/tests/unit-tests/pipeline/data/test_set.pyc differ
diff --git a/Model/predictor-dl-model/tests/pipeline/test_base.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_base.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_base.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_base.py
diff --git a/Model/predictor-dl-model/tests/pipeline/test_base_hive_db.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_base_hive_db.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_base_hive_db.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_base_hive_db.py
diff --git a/Model/predictor-dl-model/tests/pipeline/test_factdata_health.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_factdata_health.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_factdata_health.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_factdata_health.py
diff --git a/Model/predictor-dl-model/tests/pipeline/test_main_cluster.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_cluster.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_main_cluster.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_cluster.py
diff --git a/Model/predictor-dl-model/tests/pipeline/test_main_filter_si_region_bucket.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_filter_si_region_bucket.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_main_filter_si_region_bucket.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_filter_si_region_bucket.py
diff --git a/Model/predictor-dl-model/tests/pipeline/test_main_norm.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_norm.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_main_norm.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_norm.py
diff --git a/Model/predictor-dl-model/tests/pipeline/test_main_ts.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_ts.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_main_ts.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_main_ts.py
diff --git a/Model/predictor-dl-model/tests/pipeline/test_util.py b/Model/predictor-dl-model/tests/unit-tests/pipeline/test_util.py
similarity index 100%
rename from Model/predictor-dl-model/tests/pipeline/test_util.py
rename to Model/predictor-dl-model/tests/unit-tests/pipeline/test_util.py
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Model/predictor-dl-model/tests/unit-tests/trainer/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Model/predictor-dl-model/tests/unit-tests/trainer/README.md
diff --git a/Model/predictor-dl-model/tests/trainer/api_test/client_rest_api_test.py b/Model/predictor-dl-model/tests/unit-tests/trainer/api_test/client_rest_api_test.py
similarity index 100%
rename from Model/predictor-dl-model/tests/trainer/api_test/client_rest_api_test.py
rename to Model/predictor-dl-model/tests/unit-tests/trainer/api_test/client_rest_api_test.py
diff --git a/Model/predictor-dl-model/tests/trainer/sample_model/00000123/saved_model.pb b/Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/00000123/saved_model.pb
similarity index 100%
rename from Model/predictor-dl-model/tests/trainer/sample_model/00000123/saved_model.pb
rename to Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/00000123/saved_model.pb
diff --git a/Model/predictor-dl-model/tests/trainer/sample_model/00000123/variables/variables.data-00000-of-00001 b/Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/00000123/variables/variables.data-00000-of-00001
similarity index 100%
rename from Model/predictor-dl-model/tests/trainer/sample_model/00000123/variables/variables.data-00000-of-00001
rename to Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/00000123/variables/variables.data-00000-of-00001
diff --git a/Model/predictor-dl-model/tests/trainer/sample_model/00000123/variables/variables.index b/Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/00000123/variables/variables.index
similarity index 100%
rename from Model/predictor-dl-model/tests/trainer/sample_model/00000123/variables/variables.index
rename to Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/00000123/variables/variables.index
diff --git a/Model/predictor-dl-model/tests/trainer/sample_model/readme.txt b/Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/readme.txt
similarity index 100%
rename from Model/predictor-dl-model/tests/trainer/sample_model/readme.txt
rename to Model/predictor-dl-model/tests/unit-tests/trainer/sample_model/readme.txt
diff --git a/Processes/dlpredictor/VERSION.md b/Processes/dlpredictor/VERSION.md
index c746b0a..330cf9b 100644
--- a/Processes/dlpredictor/VERSION.md
+++ b/Processes/dlpredictor/VERSION.md
@@ -1,7 +1,7 @@
-### 1.6
-1. Add residency and IPL features
+### 0.1.1
+1. Remove re-distributing of traffic for mapped IPL.
 2. Add tag to config file. The whole set tmp and final artifacts are named by product_tag and pipeline_tag. The user does not need to review the name of those tables anymore.
 3. Read model statistics from hive instead of elasticsearch.
 
-### 1.7
-1. Remove re-distributing of traffic for mapped IPL.
+### 0.1.2
+1. Add docs folder
\ No newline at end of file
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Processes/dlpredictor/docs/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Processes/dlpredictor/docs/README.md
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Processes/dlpredictor/docs/design/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Processes/dlpredictor/docs/design/README.md
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Processes/dlpredictor/docs/release/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Processes/dlpredictor/docs/release/README.md
diff --git a/Processes/dlpredictor/docs/test-reports/01012022.txt b/Processes/dlpredictor/docs/test-reports/01012022.txt
new file mode 100644
index 0000000..9b65211
--- /dev/null
+++ b/Processes/dlpredictor/docs/test-reports/01012022.txt
@@ -0,0 +1,7 @@
+Predictor
+The evaluation script (in dlpredictor process) shows aggregated error rate of 5% for prediction.
+This error rate is traffic-weighted average for all valid slots.
+
+traffic(si=A)xprediction-error(si=A) + traffic(si=C)xprediction-error(si=B) + traffic(si=C)xprediction-error(si=C)
+--------------------------------------------------------------------------------------------------------------------
+                                    traffic(si=A) + traffic(si=B) + traffic(si=C)
\ No newline at end of file
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Processes/dlpredictor/docs/test-reports/README.md
similarity index 100%
copy from Model/predictor-dl-model/tests/trainer/README.md
copy to Processes/dlpredictor/docs/test-reports/README.md
diff --git a/Processes/dlpredictor/setup.py b/Processes/dlpredictor/setup.py
index cb3f5d1..d7174be 100644
--- a/Processes/dlpredictor/setup.py
+++ b/Processes/dlpredictor/setup.py
@@ -6,7 +6,7 @@ with open("README.md", "r") as fh:
 
 setup(
     name='dlpredictor',
-    version='1.6.0',
+    version='0.1.2',
     author='Reza Adibnia',
     author_email="reza.adibnia@futurewei.com",
     packages=find_packages(),
diff --git a/Processes/dlpredictor/test.sh b/Processes/dlpredictor/test.sh
deleted file mode 100644
index 322637c..0000000
--- a/Processes/dlpredictor/test.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-
-if true
-then
-    spark-submit --master yarn --num-executors 8 --executor-cores 5 --conf spark.driver.maxResultSize=2048m --executor-memory 16G --driver-memory 16G --py-files $SCRIPTPATH/lib/imscommon-2.0.0-py2.7.egg $SCRIPTPATH/tests/time_series_prediction_check/test_dlpredictor_system_errors_1.py
-fi
-
-if false
-then
-    spark-submit --master yarn --num-executors 8 --executor-cores 5 --conf spark.driver.maxResultSize=2048m --executor-memory 16G --driver-memory 16G --py-files $SCRIPTPATH/lib/imscommon-2.0.0-py2.7.egg $SCRIPTPATH/tests/fact_data_prediction_check/test_dlpredictor_system_errors_2.py
-fi
-
-
-
diff --git a/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/config.yml b/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/config.yml
new file mode 100644
index 0000000..8d5f814
--- /dev/null
+++ b/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/config.yml
@@ -0,0 +1,54 @@
+log_level: 'info'
+product_tag: 'dlpredictor'
+pipeline_tag: '111021_no_residency_no_mapping_india_test_12212021'
+
+#input tables from dlpm pipeline
+area_map_table: 'dlpm_111021_no_residency_no_mapping_tmp_area_map' # this raw data, with filtered si, remapped r and ipl and partitioned by bucket-id
+distribution_table: 'dlpm_111021_no_residency_no_mapping_tmp_distribution_detail_test_12212021'
+norm_table: 'dlpm_111021_no_residency_no_mapping_trainready_test_12212021'
+model_stat_table: 'dlpm_111021_no_residency_no_mapping_model_stat_india'
+bucket_size: 10
+bucket_step: 2
+condition: ''
+
+yesterday: '2021-07-21'
+serving_url: 'http://10.193.217.126:8504/v1/models/dl_india:predict'
+
+config_table: '{product_tag}_{pipeline_tag}_config'
+
+es_host: '10.213.37.41'
+es_port: '9200'
+es_predictions_index: '{product_tag}_{pipeline_tag}_predictions'
+es_predictions_type: 'doc'
+holiday_list: ['2019-11-09', '2019-11-10', '2019-11-11', '2019-11-25', '2019-11-26', '2019-11-27','2019-11-28', '2019-12-24','2019-12-25', '2019-12-26','2019-12-31', '2020-01-01', '2020-01-02', '2020-01-19','2020-01-20', '2020-01-21', '2020-01-22', '2020-01-23',  '2020-01-24',  '2020-01-25', '2020-02-08']
+traffic_dist: [2.905931696,1.792490513,1.592770122,1.447972838,1.657679249,2.716197324,5.117835031,6.5308568,6.570800879,5.302576393,4.423806671,4.43379269,4.858198522,4.338925504,4.219093269,4.224086279,4.613541043,5.412422608,5.60714999,5.327541442,5.167765129,4.828240463,4.009386858,2.900938686]  
+
+dl_predict_ready_path: '{product_tag}_{pipeline_tag}_dl_predict_ready'
+dl_uckey_cluster_path: '{product_tag}_{pipeline_tag}_dl_uckey_cluster'
+
+eligble_slot_ids: [
+      'a47eavw7ex',
+    '66bcd2720e5011e79bc8fa163e05184e',
+    'x0ej5xhk60kjwq',
+    'l03493p0r3',
+    '7b0d7b55ab0c11e68b7900163e3e481d',
+    'b6le0s4qo8',
+    'e351de37263311e6af7500163e291137',
+    'a290af82884e11e5bdec00163e291137',
+    '68bcd2720e5011e79bc8fa163e05184e',
+    'f1iprgyl13',
+    'w9fmyd5r0i',
+    'w3wx3nv9ow5i97',
+    'd971z9825e',
+    'l2d4ec6csv',
+    'z041bf6g4s',
+    '71bcd2720e5011e79bc8fa163e05184e',
+    '5cd1c663263511e6af7500163e291137',
+    'x2fpfbm8rt',
+    'd9jucwkpr3',
+    'k4werqx13k',
+    'j1430itab9wj3b',
+    'a8syykhszz',
+    's4z85pd1h8',
+    '17dd6d8098bf11e5bdec00163e291137',
+    'd4d7362e879511e5bdec00163e291137']
diff --git a/Model/predictor-dl-model/experiments/dl_predictor_performance_issue/main_spark_data_process.py b/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/main_spark_data_process.py
similarity index 73%
rename from Model/predictor-dl-model/experiments/dl_predictor_performance_issue/main_spark_data_process.py
rename to Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/main_spark_data_process.py
index 15c020c..c622469 100644
--- a/Model/predictor-dl-model/experiments/dl_predictor_performance_issue/main_spark_data_process.py
+++ b/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/main_spark_data_process.py
@@ -21,15 +21,56 @@ import yaml
 import argparse
 
 from pyspark import SparkContext, SparkConf, Row
-from pyspark.sql.functions import concat_ws, count, lit, col, udf, expr, collect_list, create_map, sum as sum_agg, \
-    struct, explode
+from pyspark.sql.functions import concat_ws, count, lit, col, udf, expr, collect_list, create_map, sum as sum_agg, struct, explode
 from pyspark.sql.types import IntegerType, StringType, ArrayType, MapType, FloatType, BooleanType
 from pyspark.sql import HiveContext
-from forecaster import Forecaster
-from sparkesutil import *
 from datetime import datetime, timedelta
 import pickle
 
+from dlpredictor import transform
+from dlpredictor.configutil import *
+from dlpredictor.log import *
+from dlpredictor.prediction.forecaster import Forecaster
+from dlpredictor.util.sparkesutil import *
+
+
+'''
+spark-submit --master yarn --num-executors 10 --executor-cores 5 --executor-memory 32G --driver-memory 32G --py-files dist/dlpredictor-1.6.0-py2.7.egg,lib/imscommon-2.0.0-py2.7.egg,lib/predictor_dl_model-1.6.0-py2.7.egg --jars lib/elasticsearch-hadoop-6.8.0.jar experiments/dl_predictor_performance_issue/main_spark_data_process.py experiments/dl_predictor_performance_issue/config.yml '2021-07-21'
+'''
+
+# Reza
+def add_count_arrays(ca1, ca2):
+    '''
+    ca1 = [u'1:9']
+    '''
+    result_map = {}
+    for i in ca1+ca2:
+        key, value = i.split(':')
+        if key not in result_map:
+            result_map[key] = 0
+        result_map[key] += int(value)
+    result = []
+    for key, value in result_map.items():
+        result.append(key+":"+str(value))
+    return result
+
+# Reza
+def sum_day_count_array(day_count_arrays):
+    '''
+    [{u'2019-11-02': [u'1:9']}]
+    '''
+    result_map = {}
+    for day_count_array in day_count_arrays:
+        for item in day_count_array:
+            for day, v in item.items():
+                if not day:
+                    continue
+                if day not in result_map:
+                    result_map[day] = []
+                result_map[day] = add_count_arrays(result_map[day], v)
+
+    return [result_map]
+
 
 def sum_count_array(hour_counts):
     result_map = {}
@@ -56,12 +97,15 @@ def run(cfg, yesterday):
     dl_data_path = cfg['dl_predict_ready_path']
     bucket_size = cfg['bucket_size']
     bucket_step = cfg['bucket_step']
-    factdata_area_map = cfg['factdata']
+    factdata_area_map = cfg['area_map_table']
     distribution_table = cfg['distribution_table']
     norm_table = cfg['norm_table']
     dl_uckey_cluster_path = cfg['dl_uckey_cluster_path']
 
-    model_stats = get_model_stats_using_pickel(cfg)
+    # Reza
+    # model_stats = get_model_stats_using_pickel(cfg)
+    model_stat_table = cfg['model_stat_table']
+    model_stats = get_model_stats(hive_context, model_stat_table)
     if not model_stats:
         sys.exit("dl_spark_cmd: " + "null model stats")
 
@@ -127,17 +171,15 @@ def run(cfg, yesterday):
 
         # df_uckey_cluster keeps the ratio and cluster_key for only uckeys that are being processed
 
-        df_uckey_cluster = df.select(
-            'uckey', 'cluster_uckey', 'ratio', 'price_cat')
+        df_uckey_cluster = df.select('uckey', 'cluster_uckey', 'ratio', 'price_cat')
 
-        df = df.groupBy('cluster_uckey', 'price_cat').agg(
-            collect_list('day_price_imp_map_list').alias('cluster_day_price_imp_list'))
+        df = df.groupBy('cluster_uckey', 'price_cat').agg(collect_list('day_price_imp_map_list').alias('cluster_day_price_imp_list'))
         df = df.withColumn('ts', udf(sum_day_count_array,
                                      ArrayType(MapType(StringType(), ArrayType(StringType()))))(
             df.cluster_day_price_imp_list))
 
         df = df.drop('cluster_day_price_imp_list')
-        dl_data_path = 'dl_prediction_ready'
+        #dl_data_path = 'dl_prediction_ready'
 
         if i.value == 0:
             df.coalesce(100).write.mode('overwrite').parquet(dl_data_path)
@@ -162,6 +204,7 @@ if __name__ == '__main__':
     try:
         with open(args.config_file, 'r') as ymlfile:
             cfg = yaml.safe_load(ymlfile)
+            resolve_placeholder(cfg)
 
     except IOError as e:
         print(
diff --git a/Model/predictor-dl-model/experiments/dl_predictor_performance_issue/main_spark_predict_es.py b/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/main_spark_predict_es.py
similarity index 70%
rename from Model/predictor-dl-model/experiments/dl_predictor_performance_issue/main_spark_predict_es.py
rename to Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/main_spark_predict_es.py
index 05eb3a1..c5d5219 100644
--- a/Model/predictor-dl-model/experiments/dl_predictor_performance_issue/main_spark_predict_es.py
+++ b/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/main_spark_predict_es.py
@@ -20,25 +20,66 @@ import json
 import time
 import yaml
 import argparse
+import logging
 
 from pyspark import SparkContext, SparkConf, Row
-from pyspark.sql.functions import concat_ws, count, lit, col, udf, expr, collect_list, create_map, sum as sum_agg, \
-    struct, explode
+from pyspark.sql.functions import concat_ws, count, lit, col, udf, expr, collect_list, create_map, sum as sum_agg, struct, explode
 from pyspark.sql.types import IntegerType, StringType, ArrayType, MapType, FloatType, BooleanType, LongType
 from pyspark.sql import HiveContext, SQLContext
-from forecaster import Forecaster
-from sparkesutil import *
-import transform
 from datetime import datetime, timedelta
-import secrets
-from imscommon_dl.es.esclient import ESClient
-from imscommon_dl.es.es_predictions_dao import ESPredictionsDAO
-import pickle
+#import secrets
+#import pickle
+
+# from imscommon_dl.es.esclient import ESClient
+# from imscommon_dl.es.es_predictions_dao import ESPredictionsDAO
+
 
-import logging
 from logging.config import fileConfig
 
 
+from dlpredictor import transform
+from dlpredictor.configutil import *
+from dlpredictor.log import *
+from dlpredictor.prediction.forecaster import Forecaster
+from dlpredictor.util.sparkesutil import *
+
+'''
+spark-submit --master yarn --num-executors 10 --executor-cores 5 --executor-memory 32G --driver-memory 32G --py-files dist/dlpredictor-1.6.0-py2.7.egg,lib/imscommon-2.0.0-py2.7.egg,lib/predictor_dl_model-1.6.0-py2.7.egg --jars lib/elasticsearch-hadoop-6.8.0.jar experiments/dl_predictor_performance_issue/main_spark_predict_es.py experiments/dl_predictor_performance_issue/config.yml '2021-07-21'
+'''
+
+def add_count_arrays(ca1, ca2):
+    '''
+    ca1 = [u'1:9']
+    '''
+    result_map = {}
+    for i in ca1+ca2:
+        key, value = i.split(':')
+        if key not in result_map:
+            result_map[key] = 0
+        result_map[key] += int(value)
+    result = []
+    for key, value in result_map.items():
+        result.append(key+":"+str(value))
+    return result
+
+
+def sum_day_count_array(day_count_arrays):
+    '''
+    [{u'2019-11-02': [u'1:9']}]
+    '''
+    result_map = {}
+    for day_count_array in day_count_arrays:
+        for item in day_count_array:
+            for day, v in item.items():
+                if not day:
+                    continue
+                if day not in result_map:
+                    result_map[day] = []
+                result_map[day] = add_count_arrays(result_map[day], v)
+
+    return [result_map]
+
+
 def multiply_each_value_of_map_with_ratio(day_prediction_map, ratio):
     for k, v in day_prediction_map.items():
         day_prediction_map[k] = v * ratio
@@ -121,9 +162,12 @@ def run(cfg, yesterday, serving_url):
     distribution_table = cfg['distribution_table']
     norm_table = cfg['norm_table']
 
-    model_stats = get_model_stats_using_pickel(cfg)
+    # Reza
+    # model_stats = get_model_stats_using_pickel(cfg)
+    model_stat_table = cfg['model_stat_table']
+    model_stats = get_model_stats(hive_context, model_stat_table)
     if not model_stats:
-        sys.exit("dl_spark_cmd: ", "null model stats")
+        sys.exit("dl_spark_cmd: " + "null model stats")
 
     # Read dist
     command = "SELECT DIST.uckey, DIST.ratio, DIST.cluster_uckey, DIST.price_cat FROM {} AS DIST ".format(
@@ -150,23 +194,22 @@ def run(cfg, yesterday, serving_url):
     df = sqlcontext.read.parquet(dl_data_path)
     df_uckey_cluster = sqlcontext.read.parquet(dl_uckey_cluster_path)
 
+    # TODO: where is sum_day_count_array?
     df = df.groupBy('cluster_uckey', 'price_cat').agg(collect_list('ts').alias('ts_list'))
     df = df.withColumn('ts',
                        udf(sum_day_count_array, ArrayType(MapType(StringType(), ArrayType(StringType()))))(df.ts_list))
     df = df.drop('ts_list')
 
     df = df.join(df_norm, on=['cluster_uckey', 'price_cat'], how='inner')
-    df = df.join(df_uckey_cluster, on=[
-        'cluster_uckey', 'price_cat'], how='inner')
+    df = df.join(df_uckey_cluster, on=['cluster_uckey', 'price_cat'], how='inner')
 
-    df = df.where(df.uckey.like('%native,b6le0s4qo8,4G,g_f,5,CPC,,1156320000%'))
+    # df = df.where(df.uckey.like('%native,b6le0s4qo8,4G,g_f,5,CPC,,1156320000%'))
     predictor_udf = udf(transform.predict_daily_uckey(days=day_list,
                                                       serving_url=serving_url, forecaster=forecaster,
-                                                      model_stats=model_stats, columns=df.columns, config=cfg),
+                                                      model_stats=model_stats, columns=df.columns),
                         MapType(StringType(), FloatType()))
 
-    df = df.withColumn('day_prediction_map',
-                       predictor_udf(struct([df[name] for name in df.columns])))
+    df = df.withColumn('day_prediction_map',predictor_udf(struct([df[name] for name in df.columns])))
 
     df = df.select('cluster_uckey', 'price_cat', 'day_prediction_map', 'ratio', 'uckey')
 
@@ -182,13 +225,13 @@ def run(cfg, yesterday, serving_url):
     df.cache()
     hdfs_df = df
 
-    df = df.withColumn('hits', udf(lambda uckey, maps: add_uckey_to_json(uckey, maps), StringType())(df.uckey,
-                                                                                                     df.day_count_map)).select(
-        "hits")
-    
+    df = df.withColumn('hits', udf(lambda uckey, maps: add_uckey_to_json(uckey, maps), StringType())(df.uckey,df.day_count_map)).select("hits")
+
     hdfs_df = get_preditction_in_hdfs_formate(hdfs_df)
     hdfs_df.show()
-    hdfs_df.coalesce(hdfs_write_threads).write.mode('overwrite').partitionBy("day").parquet(cfg["hdfs_prefix_path"])
+    
+    #hdfs_df.coalesce(hdfs_write_threads).write.mode('overwrite').partitionBy("day").parquet(cfg["hdfs_prefix_path"])
+    hdfs_df.write.option('header', 'true').mode('overwrite').format('hive').saveAsTable(cfg["es_predictions_index"])
 
     sc.stop()
 
@@ -202,6 +245,7 @@ if __name__ == '__main__':
     try:
         with open(args.config_file, 'r') as ymlfile:
             cfg = yaml.safe_load(ymlfile)
+            resolve_placeholder(cfg)
 
     except IOError as e:
         print(
@@ -213,34 +257,32 @@ if __name__ == '__main__':
         ymlfile.close()
 
     yesterday = args.yesterday
-    es_json_dir = cfg["es_json_dir"]
-    index_data = dict()
-    with open(es_json_dir + '/put_predictor_index_css.json', 'r') as myfile:
-        index_data['css'] = myfile.read()
-
+    # es_json_dir = cfg["es_json_dir"]
+    # index_data = dict()
+    # with open(es_json_dir + '/put_predictor_index_css.json', 'r') as myfile:
+    #     index_data['css'] = myfile.read()
 
     eligble_slot_ids = cfg['eligble_slot_ids']
     yesterday = str(yesterday)
     es_prediction_index = cfg["es_predictions_index"] + "_" + yesterday
     es_prediction_type = cfg['es_predictions_type']
-    refresh_index_wait_time = cfg["refresh_index_wait_time"]
-    es_write_threads = cfg["es_write_threads"]
-    hdfs_write_threads = cfg["hdfs_write_threads"]
+    # refresh_index_wait_time = cfg["refresh_index_wait_time"]
+    # es_write_threads = cfg["es_write_threads"]
+    # hdfs_write_threads = cfg["hdfs_write_threads"]
     serving_url = cfg["serving_url"]
 
-    es_cfg = dict()
-    es_cfg['es_mode'] = cfg["es_mode"]
-    es_cfg['css_url'] = cfg["css_url"]
-    es_cfg['pem_path'] = cfg['pem_path']
+    # es_cfg = dict()
+    # es_cfg['es_mode'] = cfg["es_mode"]
+    # es_cfg['css_url'] = cfg["css_url"]
+    # es_cfg['pem_path'] = cfg['pem_path']
 
-    predictions_type = dict()
-    predictions_type['css'] = cfg['es_predictions_type']
+    # predictions_type = dict()
+    # predictions_type['css'] = cfg['es_predictions_type']
 
     es_predictions = " value removed"
     es_predictions_dao = "value removed"
     cfg["signKey"] = 'provide value'
     cfg["sign_prefix"] = 'provide value'
     run(cfg, yesterday, serving_url)
-    mesg = "dl_spark_cmd: ", "prediction save in ES index: ", es_prediction_index, "  ,and save the one copy  in hdfs at path: ", \
-           cfg["hdfs_prefix_path"]
-
+    # mesg = "dl_spark_cmd: ", "prediction save in ES index: ", es_prediction_index, "  ,and save the one copy  in hdfs at path: ", \
+    #        cfg["hdfs_prefix_path"]
diff --git a/Model/predictor-dl-model/experiments/dl_predictor_performance_issue/sparkesutil.py b/Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/sparkesutil.py
similarity index 100%
rename from Model/predictor-dl-model/experiments/dl_predictor_performance_issue/sparkesutil.py
rename to Processes/dlpredictor/tests/experiments/dl_predictor_performance_issue_01072022/sparkesutil.py
diff --git a/Processes/dlpredictor/experiments/elasticsearch-hadoop-6.8.0.jar b/Processes/dlpredictor/tests/experiments/elasticsearch-hadoop-6.8.0.jar
similarity index 100%
rename from Processes/dlpredictor/experiments/elasticsearch-hadoop-6.8.0.jar
rename to Processes/dlpredictor/tests/experiments/elasticsearch-hadoop-6.8.0.jar
diff --git a/Processes/dlpredictor/experiments/scripts/test_spark_es_big_write.py b/Processes/dlpredictor/tests/experiments/scripts/test_spark_es_big_write.py
similarity index 100%
rename from Processes/dlpredictor/experiments/scripts/test_spark_es_big_write.py
rename to Processes/dlpredictor/tests/experiments/scripts/test_spark_es_big_write.py
diff --git a/Processes/dlpredictor/experiments/scripts/test_spark_es_write.py b/Processes/dlpredictor/tests/experiments/scripts/test_spark_es_write.py
similarity index 100%
rename from Processes/dlpredictor/experiments/scripts/test_spark_es_write.py
rename to Processes/dlpredictor/tests/experiments/scripts/test_spark_es_write.py
diff --git a/Processes/dlpredictor/util-scripts/dense_sparse_count.py b/Processes/dlpredictor/tests/scripts/dense_sparse_count.py
similarity index 100%
rename from Processes/dlpredictor/util-scripts/dense_sparse_count.py
rename to Processes/dlpredictor/tests/scripts/dense_sparse_count.py
diff --git a/Processes/dlpredictor/util-scripts/dense_sparse_uckeys.py b/Processes/dlpredictor/tests/scripts/dense_sparse_uckeys.py
similarity index 100%
rename from Processes/dlpredictor/util-scripts/dense_sparse_uckeys.py
rename to Processes/dlpredictor/tests/scripts/dense_sparse_uckeys.py
diff --git a/Processes/dlpredictor/util-scripts/fact_data_count.py b/Processes/dlpredictor/tests/scripts/fact_data_count.py
similarity index 100%
rename from Processes/dlpredictor/util-scripts/fact_data_count.py
rename to Processes/dlpredictor/tests/scripts/fact_data_count.py
diff --git a/Processes/dlpredictor/util-scripts/filter_si_tmp_area.py b/Processes/dlpredictor/tests/scripts/filter_si_tmp_area.py
similarity index 100%
rename from Processes/dlpredictor/util-scripts/filter_si_tmp_area.py
rename to Processes/dlpredictor/tests/scripts/filter_si_tmp_area.py
diff --git a/Processes/dlpredictor/util-scripts/imp_of_traffic.py b/Processes/dlpredictor/tests/scripts/imp_of_traffic.py
similarity index 100%
rename from Processes/dlpredictor/util-scripts/imp_of_traffic.py
rename to Processes/dlpredictor/tests/scripts/imp_of_traffic.py
diff --git a/Processes/dlpredictor/tests/si_traffic_prediction_ckeck/si_traffic_prediction_check_2.py b/Processes/dlpredictor/tests/si_traffic_prediction_ckeck/si_traffic_prediction_check_2.py
index 69a4180..b522b10 100644
--- a/Processes/dlpredictor/tests/si_traffic_prediction_ckeck/si_traffic_prediction_check_2.py
+++ b/Processes/dlpredictor/tests/si_traffic_prediction_ckeck/si_traffic_prediction_check_2.py
@@ -235,9 +235,9 @@ if __name__ == "__main__":
         'uckey_attrs': ['m', 'si', 't', 'g', 'a', 'pm', 'r', 'ipl'],
         'es_host': '10.213.37.41',
         'es_port': '9200',
-        'es_predictions_index': 'dlpredictor_111021_no_residency_no_mapping_predictions',
+        'es_predictions_index': 'dlpredictor_111021_no_residency_no_mapping_predictions_test_12212021',
         'es_predictions_type': 'doc',
-        'report_table': 'si_traffic_prediction_check_111021_12012021'
+        'report_table': 'si_traffic_prediction_check_111021_12212021'
     }
 
     # list of days in ts_ver in ts table.
diff --git a/Model/predictor-dl-model/tests/trainer/README.md b/Processes/dlpredictor/tests/unit-tests/README.md
similarity index 100%
rename from Model/predictor-dl-model/tests/trainer/README.md
rename to Processes/dlpredictor/tests/unit-tests/README.md
diff --git a/Processes/dlpredictor/util.sh b/Processes/dlpredictor/util.sh
deleted file mode 100644
index c19055d..0000000
--- a/Processes/dlpredictor/util.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-
-if false
-then
-    spark-submit --master yarn --num-executors 8 --executor-cores 5 --conf spark.driver.maxResultSize=2048m --executor-memory 16G --driver-memory 16G $SCRIPTPATH/util-scripts/dense_sparse_uckeys.py
-fi
-
-if false
-then
-    spark-submit --master yarn --num-executors 8 --executor-cores 5 --conf spark.driver.maxResultSize=2048m --executor-memory 16G --driver-memory 16G $SCRIPTPATH/util-scripts/dense_sparse_count.py
-fi
-
-if true
-then
-    spark-submit --master yarn --num-executors 8 --executor-cores 5 --conf spark.driver.maxResultSize=2048m --executor-memory 16G --driver-memory 16G $SCRIPTPATH/util-scripts/fact_data_count.py
-fi
-
-