ConcatOp:输入的维度应该匹配:shape[0]=[1,24]与shape[1]=[256,8][Op:ConcatV2]名称:concat

2024-04-29 23:07:20 发布

您现在位置:Python中文网/ 问答频道 /正文

我训练了一个RNN分类器。这是我的密码:

类RNN标识符: definit(自身、语音大小、序列长度): #定义超参数 自学率=0.2应该是正确的 self.training_epochs=10#培训时间-选择适合上课时间的时间 self.display_epoch_freq=5#测试和打印统计数据的频率 self.dim=24#RNN隐藏状态的维度 self.embedding_dim=8#学习单词嵌入的维度 self.batch_size=256#有点随意-可以调整,但通常调整速度,而不是准确性 self.vocab_size=vocab_size#由上述文件读取器定义 self.sequence_length=sequence_length#由上述文件读取器定义 self.l2_lambda=0.001

    self.trainable_variables = []

    # Define the parameters
    self.E = tf.Variable(tf.random.normal([self.vocab_size, self.embedding_dim], stddev=0.1))
    self.trainable_variables.append(self.E)
    
    self.W_cl = tf.Variable(tf.random.normal([self.dim, 5], stddev=0.1))
    self.b_cl = tf.Variable(tf.random.normal([5], stddev=0.1))
    self.trainable_variables.append(self.W_cl)
    self.trainable_variables.append(self.b_cl)
    
    # TODO 1: Define the RNN parameters
    self.W = tf.Variable(tf.random.normal([self.dim + self.embedding_dim, self.dim], stddev=0.1)) 
    self.b = tf.Variable(tf.random.normal([self.dim], stddev=0.1))
    self.trainable_variables.append(self.W)
    self.trainable_variables.append(self.b)

    
def model_1(self,x):
    # Split up the inputs into individual tensors
    self.x_slices = tf.split(x, self.sequence_length, 1)
    #print('x slices')
    #print(self.x_slices)
    # Define the start state of the RNN
    self.h_zero = tf.zeros([1, self.dim])  
    
    # TODO 2: Write a (very short) Python function that defines one step of an RNN
    def step_1(x, h_prev):
        #add your code here
            
        x_e=tf.nn.embedding_lookup(self.E,x)
        #print(x_e.shape)
        #print(h_prev.shape)
        concat=tf.concat([h_prev,x_e],1)
        #concat=tf.concat(h_prev,x_e)
        h = tf.tanh(tf.matmul(concat,self.W)+self.b)
        return h
    
    # TODO 3: Unroll the RNN using a for loop, and obtain the sentence representation with the final hidden state
    sentence_representation = None
    sentence_representation = self.h_zero
   # tf.print(sentence_representation)

    #for slice in self.x_slices:
    #    tf.print(slice)
     
    for slice in self.x_slices:
        slice_1=tf.reshape(slice,shape=[-1])
        sentence_representation=step_1(slice_1, sentence_representation)
   

    # Compute the logits using one last linear layer
    logits = tf.matmul(sentence_representation, self.W_cl) + self.b_cl
    
    return logits 

def train(self, training_data, dev_set):
    def get_minibatch(dataset, start_index, end_index):
        indices = range(start_index, end_index)
        vectors = np.vstack([dataset[i]['index_sequence'] for i in indices])
        labels = [dataset[i]['label'] for i in indices]
        return vectors, labels
  
    print('Training.')

    # Training cycle
    for epoch in range(self.training_epochs):
        random.shuffle(training_set)
        avg_cost = 0.
        total_batch = int(len(training_set) / self.batch_size)
        
        # Loop over all batches in epoch
        for i in range(total_batch):
            # Assemble a minibatch of the next B examples
            minibatch_vectors, minibatch_labels = get_minibatch(training_set, 
                                                                self.batch_size * i, 
                                                                self.batch_size * (i + 1))

            # Run the optimizer to take a gradient step, and also fetch the value of the 
            # cost function for logging
            with tf.GradientTape() as tape:
              logits = self.model_1(minibatch_vectors)
            
              # Define the L2 cost
              self.l2_cost = self.l2_lambda * (tf.reduce_sum(tf.square(self.W)) +
                                              tf.reduce_sum(tf.square(self.W_cl)))

              # Define the cost function (here, the softmax exp and sum are built in)
              total_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=minibatch_labels, logits=logits) + self.l2_cost)
    
            # This  performs the main SGD update equation with gradient clipping
            optimizer = tf.optimizers.SGD(self.learning_rate)
            gradients = tape.gradient(total_cost, self.trainable_variables)
            gvs = zip(gradients, self.trainable_variables)
            capped_gvs = [(tf.clip_by_norm(grad, 5.0), var) for grad, var in gvs if grad is not None]
            optimizer.apply_gradients(capped_gvs)
                                                                        
            # Compute average loss
            avg_cost += total_cost / total_batch
            
        # Display some statistics about the step
        # Evaluating only one batch worth of data -- simplifies implementation slightly
        if (epoch+1) % self.display_epoch_freq == 0:
            tf.print("Epoch:", (epoch+1), "Cost:", avg_cost, \
                "Dev acc:", evaluate_classifier(self.classify, dev_set[0:256]), \
                "Train acc:", evaluate_classifier(self.classify, training_set[0:256]))  

def classify(self, examples):
    # This classifies a list of examples
    vectors = np.int32(np.vstack([example['index_sequence'] for example in examples]))
    logits = self.model_1(vectors)
    return np.argmax(logits, axis=1)

我得到以下错误: ConcatOp:输入的维度应该匹配:shape[0]=[1,24]与shape[1]=[256,8][Op:ConcatV2]名称:concat

我应该怎么做才能解决这个问题