keras变压器模型编码器层

2024-04-26 20:37:29 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在研究keras Transformer模型编码器层

https://colab.research.google.com/github/tensorflow/examples/blob/master/community/en/transformer_chatbot.ipynb 教程

我如何解决这个问题

我无法理解辍学率参数

# UNQ_C4 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION EncoderLayer
class EncoderLayer(tf.keras.layers.Layer):
    """
    The encoder layer is composed by a multi-head self-attention mechanism,
    followed by a simple, positionwise fully connected feed-forward network. 
    This archirecture includes a residual connection around each of the two 
    sub-layers, followed by layer normalization.
    """
    def __init__(self, embedding_dim, num_heads, fully_connected_dim,
                 dropout_rate=0.1, layernorm_eps=1e-6):
        super(EncoderLayer, self).__init__()

        self.mha = MultiHeadAttention(num_heads=num_heads,
                                      key_dim=embedding_dim,
                                      dropout=dropout_rate)

        self.ffn = FullyConnected(embedding_dim=embedding_dim,
                                  fully_connected_dim=fully_connected_dim)

        self.layernorm1 = LayerNormalization(epsilon=layernorm_eps)
        self.layernorm2 = LayerNormalization(epsilon=layernorm_eps)

        self.dropout_ffn = Dropout(dropout_rate)
    
    def call(self, x, training, mask):
        """
        Forward pass for the Encoder Layer
        
        Arguments:
            x -- Tensor of shape (batch_size, input_seq_len, fully_connected_dim)
            training -- Boolean, set to true to activate
                        the training mode for dropout layers
            mask -- Boolean mask to ensure that the padding is not 
                    treated as part of the input
        Returns:
            encoder_layer_out -- Tensor of shape (batch_size, input_seq_len, fully_connected_dim)
        """
        # START CODE HERE
        # calculate self-attention using mha(~1 line). Dropout will be applied during training
        attn_output = None # Self attention (batch_size, input_seq_len, fully_connected_dim)
        
        # apply layer normalization on sum of the input and the attention output to get the  
        # output of the multi-head attention layer (~1 line)
        out1 = None  # (batch_size, input_seq_len, fully_connected_dim)

        # pass the output of the multi-head attention layer through a ffn (~1 line)
        ffn_output = None  # (batch_size, input_seq_len, fully_connected_dim)
        
        # apply dropout layer to ffn output during training (~1 line)
        ffn_output =  None
        
        # apply layer normalization on sum of the output from multi-head attention and ffn output to get the
        # output of the encoder layer (~1 line)
        encoder_layer_out = None  # (batch_size, input_seq_len, fully_connected_dim)
        # END CODE HERE
        
        return encoder_layer_out

测试代码是

# UNIT TEST
EncoderLayer_test(EncoderLayer)

我下面有个错误

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-16-00617004b1af> in <module>
      1 # UNIT TEST
----> 2 EncoderLayer_test(EncoderLayer)

~/work/W4A1/public_tests.py in EncoderLayer_test(target)
     92                        [[ 0.23017104, -0.98100424, -0.78707516,  1.5379084 ],
     93                        [-1.2280797 ,  0.76477575, -0.7169283 ,  1.1802323 ],
---> 94                        [ 0.14880152, -0.48318022, -1.1908402 ,  1.5252188 ]]), "Wrong values when training=True"
     95 
     96     encoded = encoder_layer1(q, False, np.array([[1, 1, 0]]))

AssertionError: Wrong values when training=True

Tags: oftheselflayerencoderinputoutputtraining