Initialization in DL

Category

Lecun

Xavier (or Glorot)

He / MSRA initialization

如果用了Relu/Leaky Relu最好用这个初始化方法。

Glorot_He

  • Uniform

  • Normal

RandomUniform

Tensorflow中的函数:

TruncatedNormal

Tensorflow中的函数:

Orthogonal

Template in tensorflow

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def init_weights():
def uniform(stdev, size):
if _weights_stdev is not None:
stdev = _weights_stdev
return np.random.uniform(
low=-stdev * np.sqrt(3),
high=stdev * np.sqrt(3),
size=size
).astype('float32')

# https://keras.io/initializers/
if initialization == 'lecun': # and input_dim != output_dim):
# disabling orth. init for now because it's too slow
weight_values = uniform(
np.sqrt(1. / input_dim),
(input_dim, output_dim)
)
# tf.contrib.layers.variance_scaling_initializer(
# factor=1.0,
# mode='FAN_IN',
# uniform=True,
# seed=None,
# dtype=tf.float32
# )
elif initialization == 'glorot' or initialization == 'xavier' or (initialization is None):
weight_values = uniform(
np.sqrt(2. / (input_dim + output_dim)),
(input_dim, output_dim)
)
# tf.contrib.layers.variance_scaling_initializer(
# factor=1.0,
# mode='FAN_AVG',
# uniform=True,
# seed=None,
# dtype=tf.float32
# )
elif initialization == 'he':
weight_values = uniform(
np.sqrt(2. / input_dim),
(input_dim, output_dim)
)
# tf.contrib.layers.variance_scaling_initializer(
# factor=2.0,
# mode='FAN_IN',
# uniform=True,
# seed=None,
# dtype=tf.float32
# )
elif initialization == 'glorot_he':
weight_values = uniform(
np.sqrt(4. / (input_dim + output_dim)),
(input_dim, output_dim)
)
# tf.contrib.layers.variance_scaling_initializer(
# factor=2.0,
# mode='FAN_AVG',
# uniform=True,
# seed=None,
# dtype=tf.float32
# )
elif initialization == 'orthogonal' or \
(initialization is None and input_dim == output_dim):
# From lasagne
def sample(shape):
if len(shape) < 2:
raise RuntimeError("Only shapes of length 2 or more are "
"supported.")
flat_shape = (shape[0], np.prod(shape[1:]))
# TODO: why normal and not uniform?
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape)
return q.astype('float32')

weight_values = sample((input_dim, output_dim))
elif initialization[0] == 'uniform':
weight_values = np.random.uniform(
low=-initialization[1],
high=initialization[1],
size=(input_dim, output_dim)
).astype('float32')
else:
raise Exception('Invalid initialization!')

weight = tf.get_variable(name='W', dtype=tf.float32,
initializer=weight_values)

问题

Reference