0%

1
2
3
4
[n.name for n in tf.get_default_graph().as_graph_def().node]

for op in tf.get_default_graph().get_operations():
print str(op.name)

1
list_tensors | node_info | print_tensor | list_inputs | list_outputs | run_info | help

如果不置零,Variable 的梯度在每次 backward 的时候都会累加。

1
2
3
4
optimizer.zero_grad()
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()

参数组

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 有两个`param_group`即,len(optim.param_groups)==2
optim.SGD([
{'params': model.base.parameters()},
{'params': model.classifier.parameters(), 'lr': 1e-3}
], lr=1e-2, momentum=0.9)

#一个参数组
optim.SGD(model.parameters(), lr=1e-2, momentum=.9)

def lr_decay(optimizer, step, lr, decay_step, gamma):
lr = lr * gamma ** (step/decay_step)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr

锁住batch_norm

1
2
3
During training, this layer keeps a running estimate of its computed mean and variance. The running sum is kept with a default momentum of 0.1.

During evaluation, this running mean/variance is used for normalization.

声明global_step的目的在于获得 optimizer内部的step

1
print('>>> epoch: {} | lr: {:.5f}'.format(epoch + 1, lr_now))

pin_memory这样页面就不会被换出到硬盘。

1
2
3
4
5
6
test_loader = DataLoader(
dataset=Human36M(actions=actions, data_path=opt.data_dir, use_hg=opt.use_hg, is_train=False),
batch_size=opt.test_batch,
shuffle=False,
num_workers=opt.job,
pin_memory=True)