You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2023/09/12 08:52:33 UTC
[singa] branch dev-postgresql updated: Debug the training process for training free model evaluation metric
This is an automated email from the ASF dual-hosted git repository.
zhaojing pushed a commit to branch dev-postgresql
in repository https://gitbox.apache.org/repos/asf/singa.git
The following commit(s) were added to refs/heads/dev-postgresql by this push:
new 1d2f6e75 Debug the training process for training free model evaluation metric
new 7154dad2 Merge pull request #1095 from NLGithubWP/dev-postgresql
1d2f6e75 is described below
commit 1d2f6e758666f4fd40779ae0ef8186a755c5bbd4
Author: working <57...@users.noreply.github.com>
AuthorDate: Tue Sep 12 12:56:05 2023 +0800
Debug the training process for training free model evaluation metric
---
examples/model_selection_psql/ms_mlp/train_mlp.py | 40 ++++++++++++++---------
1 file changed, 25 insertions(+), 15 deletions(-)
diff --git a/examples/model_selection_psql/ms_mlp/train_mlp.py b/examples/model_selection_psql/ms_mlp/train_mlp.py
index 027e659a..d7b21fb5 100644
--- a/examples/model_selection_psql/ms_mlp/train_mlp.py
+++ b/examples/model_selection_psql/ms_mlp/train_mlp.py
@@ -42,12 +42,17 @@ class MSOptimizer(Optimizer):
return pn_p_g_list
def call_with_returns(self, loss):
+ print ("call_with_returns loss.data: \n", loss.data)
pn_p_g_list = []
for p, g in autograd.backward(loss):
if p.name is None:
p.name = id(p)
self.apply(p.name, p, g)
pn_p_g_list.append(p.name, p, g)
+ print ("call with returns")
+ print ("p.name: \n", p.name)
+ print ("p.data: \n", p.data)
+ print ("g.data: \n", g.data)
return pn_p_g_list
class MSSGD(MSOptimizer):
@@ -218,8 +223,8 @@ def augmentation(x, batch_size):
for data_num in range(0, batch_size):
offset = np.random.randint(8, size=2)
x[data_num, :, :, :] = xpad[data_num, :,
- offset[0]:offset[0] + x.shape[2],
- offset[1]:offset[1] + x.shape[2]]
+ offset[0]:offset[0] + x.shape[2],
+ offset[1]:offset[1] + x.shape[2]]
if_flip = np.random.randint(2)
if (if_flip):
x[data_num, :, :, :] = x[data_num, :, :, ::-1]
@@ -271,7 +276,7 @@ def resize_dataset(x, image_size):
for d in range(0, dim):
X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
(image_size, image_size), Image.BILINEAR),
- dtype=np.float32)
+ dtype=np.float32)
return X
@@ -333,8 +338,8 @@ def run(global_rank,
sys.path.insert(0, parent)
from mlp import model
model = model.create_model(data_size=data_size,
- num_classes=num_classes)
-
+ num_classes=num_classes)
+
elif model == 'msmlp':
import os, sys, inspect
current = os.path.dirname(
@@ -343,7 +348,7 @@ def run(global_rank,
sys.path.insert(0, parent)
from msmlp import model
model = model.create_model(data_size=data_size,
- num_classes=num_classes)
+ num_classes=num_classes)
# For distributed training, sequential has better performance
if hasattr(mssgd, "communicator"):
@@ -413,34 +418,39 @@ def run(global_rank,
synflow_flag = True
### step 1: all one input
# Copy the patch data into input tensors
- tx.copy_from_numpy(np.ones(x.shape))
+ tx.copy_from_numpy(np.ones(x.shape, dtype=np.float32))
ty.copy_from_numpy(y)
### step 2: all weights turned to positive (done)
### step 3: new loss (done)
- pn_p_g_list, out, loss = model(tx, ty, synflow_flag, dist_option, spars)
+ pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
### step 4: calculate the multiplication of weights
synflow_score = 0.0
for pn_p_g_item in pn_p_g_list:
print ("calculate weight param * grad parameter name: \n", pn_p_g_item[0])
- if len(pn_p_g_item[1].data.shape) == 2: # param_value.data is "weight"
- synflow_score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1].data) * tensor.to_numpy(pn_p_g_item[2].data)))
+ if len(pn_p_g_item[1].shape) == 2: # param_value.data is "weight"
+ print ("pn_p_g_item[1].shape: \n", pn_p_g_item[1].shape)
+ synflow_score += np.sum(np.absolute(tensor.to_numpy(pn_p_g_item[1]) * tensor.to_numpy(pn_p_g_item[2])))
print ("synflow_score: \n", synflow_score)
elif epoch == (max_epoch - 1) and b == (num_train_batch - 2): # all weights turned to positive
# Copy the patch data into input tensors
tx.copy_from_numpy(x)
ty.copy_from_numpy(y)
- pn_p_g_list, out, loss = model(tx, ty, synflow_flag, dist_option, spars)
+ pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
train_correct += accuracy(tensor.to_numpy(out), y)
train_loss += tensor.to_numpy(loss)[0]
# all params turned to positive
for pn_p_g_item in pn_p_g_list:
print ("absolute value parameter name: \n", pn_p_g_item[0])
- pn_p_g_item[1].data = tensor.abs(pn_p_g_item[1].data)
+ pn_p_g_item[1] = tensor.abs(pn_p_g_item[1]) # return tensor already
else: # normal train steps
# Copy the patch data into input tensors
tx.copy_from_numpy(x)
ty.copy_from_numpy(y)
- pn_p_g_list, out, loss = model(tx, ty, synflow_flag, dist_option, spars)
+ # print ("normal before model(tx, ty, synflow_flag, dist_option, spars)")
+ # print ("train_cnn tx: \n", tx)
+ # print ("train_cnn ty: \n", ty)
+ pn_p_g_list, out, loss = model(tx, ty, dist_option, spars, synflow_flag)
+ # print ("normal after model(tx, ty, synflow_flag, dist_option, spars)")
train_correct += accuracy(tensor.to_numpy(out), y)
train_loss += tensor.to_numpy(loss)[0]
@@ -490,7 +500,7 @@ if __name__ == '__main__':
description='Training using the autograd and graph.')
parser.add_argument(
'model',
- choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'alexnet'],
+ choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'msmlp', 'alexnet'],
default='cnn')
parser.add_argument('data',
choices=['mnist', 'cifar10', 'cifar100'],
@@ -501,7 +511,7 @@ if __name__ == '__main__':
dest='precision')
parser.add_argument('-m',
'--max-epoch',
- default=100,
+ default=10,
type=int,
help='maximum epochs',
dest='max_epoch')