我正在研究keras Transformer模型编码器层
我如何解决这个问题
我无法理解辍学率参数
# UNQ_C4 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION EncoderLayer
class EncoderLayer(tf.keras.layers.Layer):
"""
The encoder layer is composed by a multi-head self-attention mechanism,
followed by a simple, positionwise fully connected feed-forward network.
This archirecture includes a residual connection around each of the two
sub-layers, followed by layer normalization.
"""
def __init__(self, embedding_dim, num_heads, fully_connected_dim,
dropout_rate=0.1, layernorm_eps=1e-6):
super(EncoderLayer, self).__init__()
self.mha = MultiHeadAttention(num_heads=num_heads,
key_dim=embedding_dim,
dropout=dropout_rate)
self.ffn = FullyConnected(embedding_dim=embedding_dim,
fully_connected_dim=fully_connected_dim)
self.layernorm1 = LayerNormalization(epsilon=layernorm_eps)
self.layernorm2 = LayerNormalization(epsilon=layernorm_eps)
self.dropout_ffn = Dropout(dropout_rate)
def call(self, x, training, mask):
"""
Forward pass for the Encoder Layer
Arguments:
x -- Tensor of shape (batch_size, input_seq_len, fully_connected_dim)
training -- Boolean, set to true to activate
the training mode for dropout layers
mask -- Boolean mask to ensure that the padding is not
treated as part of the input
Returns:
encoder_layer_out -- Tensor of shape (batch_size, input_seq_len, fully_connected_dim)
"""
# START CODE HERE
# calculate self-attention using mha(~1 line). Dropout will be applied during training
attn_output = None # Self attention (batch_size, input_seq_len, fully_connected_dim)
# apply layer normalization on sum of the input and the attention output to get the
# output of the multi-head attention layer (~1 line)
out1 = None # (batch_size, input_seq_len, fully_connected_dim)
# pass the output of the multi-head attention layer through a ffn (~1 line)
ffn_output = None # (batch_size, input_seq_len, fully_connected_dim)
# apply dropout layer to ffn output during training (~1 line)
ffn_output = None
# apply layer normalization on sum of the output from multi-head attention and ffn output to get the
# output of the encoder layer (~1 line)
encoder_layer_out = None # (batch_size, input_seq_len, fully_connected_dim)
# END CODE HERE
return encoder_layer_out
测试代码是
# UNIT TEST
EncoderLayer_test(EncoderLayer)
我下面有个错误
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-16-00617004b1af> in <module>
1 # UNIT TEST
----> 2 EncoderLayer_test(EncoderLayer)
~/work/W4A1/public_tests.py in EncoderLayer_test(target)
92 [[ 0.23017104, -0.98100424, -0.78707516, 1.5379084 ],
93 [-1.2280797 , 0.76477575, -0.7169283 , 1.1802323 ],
---> 94 [ 0.14880152, -0.48318022, -1.1908402 , 1.5252188 ]]), "Wrong values when training=True"
95
96 encoded = encoder_layer1(q, False, np.array([[1, 1, 0]]))
AssertionError: Wrong values when training=True
目前没有回答
相关问题 更多 >
编程相关推荐