我试图通过编写一个新的层,在一个标准的完全连接的神经网络中实现层规范化。我复制了Dense
层的几乎所有代码,并添加了layer normalization
函数和相应的参数。我的代码如下:
class DenseLN(Layer):
def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
W_constraint=None, b_constraint=None, bias=True, input_dim=None, gamma_init=1., **kwargs):
self.init = initializations.get(init)
self.activation = activations.get(activation)
self.output_dim = output_dim
self.input_dim = input_dim
def gamma_init_func(shape, c=gamma_init):
if c == 1.:
return initializations.get('one')(shape)
return K.variable(np.ones(shape) * c, **kwargs)
self.gamma_init = gamma_init_func
self.beta_init = initializations.get('zero')
self.epsilon = 1e-5
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
self.initial_weights = weights
self.input_spec = [InputSpec(ndim=2)]
if self.input_dim:
kwargs['input_shape'] = (self.input_dim,)
super(DenseLN, self).__init__(**kwargs)
def ln(self, x):
m = K.mean(x, axis=-1, keepdims=True)
std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon)
x_normed = (x - m) / (std + self.epsilon)
x_normed = self.gamma * x_normed + self.beta
return x_normed
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = [InputSpec(dtype=K.floatx(),
shape=(None, input_dim))]
self.gamma = self.gamma_init(input_dim)
self.beta = self.beta_init(input_dim)
self.W = self.init((input_dim, self.output_dim),
name='{}_W'.format(self.name))
if self.bias:
self.b = K.zeros((self.output_dim,),
name='{}_b'.format(self.name))
self.trainable_weights = [self.W, self.gamma, self.beta, self.b]
else:
self.trainable_weights = [self.W, self.gamma, self.beta]
self.regularizers = []
if self.W_regularizer:
self.W_regularizer.set_param(self.W)
self.regularizers.append(self.W_regularizer)
if self.bias and self.b_regularizer:
self.b_regularizer.set_param(self.b)
self.regularizers.append(self.b_regularizer)
if self.activity_regularizer:
self.activity_regularizer.set_layer(self)
self.regularizers.append(self.activity_regularizer)
self.constraints = {}
if self.W_constraint:
self.constraints[self.W] = self.W_constraint
if self.bias and self.b_constraint:
self.constraints[self.b] = self.b_constraint
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
def call(self, x, mask=None):
output = K.dot(x, self.W)
#output = self.ln(output)
if self.bias:
output += self.b
return self.activation(output)
def get_output_shape_for(self, input_shape):
assert input_shape and len(input_shape) == 2
return (input_shape[0], self.output_dim)
但在fit
期间,它得到了一个TypeError
:unorderable types: NoneType() < NoneType()
。根据日志消息,原因似乎是trainable_weights
:
你能告诉我我做错什么了吗?我该怎么解决它?提前谢谢你!在
编辑:
以下是构建模型并进行拟合的代码:
model = Sequential()
model.add(DenseLN(12, input_dim=12))
model.add(Activation('relu'))
#model.add(Dropout(0.5))
model.add(DenseLN(108))
model.add(Activation('relu'))
model.add(DenseLN(108))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('relu'))
adadelta = Adadelta(lr=0.1, rho=0.95, epsilon=1e-08)
adagrad = Adagrad(lr=0.003, epsilon=1e-08)
model.compile(loss='poisson',
optimizer=adagrad,
metrics=['accuracy'])
model.fit(X_train_scale,
Y_train,
batch_size=3000,
nb_epoch=300)
编辑2:
我将name
属性添加到beta
和gamma
中,似乎源代码错误已经修复,但是又发生了另一个错误。编辑后的代码和新的日志消息如下:
self.gamma = K.ones((output_dim,), name='{}_gamma'.format(self.name))
self.beta = K.zeros((output_dim,), name='{}_beta'.format(self.name))
DisconnectedInputError:
Backtrace when that variable is created:
File "/home/lcc/anaconda3/envs/sensequant/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2809, in run_ast_nodes
if self.run_code(code, result):
File "/home/lcc/anaconda3/envs/sensequant/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2869, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-464-a90d5bdc38d3>", line 15, in <module>
model.add(DenseLN(108))
File "/home/lcc/anaconda3/envs/sensequant/lib/python3.5/site-packages/keras/models.py", line 146, in add
output_tensor = layer(self.outputs[0])
File "/home/lcc/anaconda3/envs/sensequant/lib/python3.5/site-packages/keras/engine/topology.py", line 458, in __call__
self.build(input_shapes[0])
File "<ipython-input-463-901584f9945c>", line 46, in build
self.beta = K.zeros((output_dim,), name='{}_beta'.format(self.name))
File "/home/lcc/anaconda3/envs/sensequant/lib/python3.5/site-packages/keras/backend/theano_backend.py", line 77, in zeros
return variable(np.zeros(shape), dtype, name)
File "/home/lcc/anaconda3/envs/sensequant/lib/python3.5/site-packages/keras/backend/theano_backend.py", line 31, in variable
return theano.shared(value=value, name=name, strict=False)
目前没有回答
相关问题 更多 >
编程相关推荐