我训练了一个RNN分类器。这是我的密码:
类RNN标识符: definit(自身、语音大小、序列长度): #定义超参数 自学率=0.2应该是正确的 self.training_epochs=10#培训时间-选择适合上课时间的时间 self.display_epoch_freq=5#测试和打印统计数据的频率 self.dim=24#RNN隐藏状态的维度 self.embedding_dim=8#学习单词嵌入的维度 self.batch_size=256#有点随意-可以调整,但通常调整速度,而不是准确性 self.vocab_size=vocab_size#由上述文件读取器定义 self.sequence_length=sequence_length#由上述文件读取器定义 self.l2_lambda=0.001
self.trainable_variables = []
# Define the parameters
self.E = tf.Variable(tf.random.normal([self.vocab_size, self.embedding_dim], stddev=0.1))
self.trainable_variables.append(self.E)
self.W_cl = tf.Variable(tf.random.normal([self.dim, 5], stddev=0.1))
self.b_cl = tf.Variable(tf.random.normal([5], stddev=0.1))
self.trainable_variables.append(self.W_cl)
self.trainable_variables.append(self.b_cl)
# TODO 1: Define the RNN parameters
self.W = tf.Variable(tf.random.normal([self.dim + self.embedding_dim, self.dim], stddev=0.1))
self.b = tf.Variable(tf.random.normal([self.dim], stddev=0.1))
self.trainable_variables.append(self.W)
self.trainable_variables.append(self.b)
def model_1(self,x):
# Split up the inputs into individual tensors
self.x_slices = tf.split(x, self.sequence_length, 1)
#print('x slices')
#print(self.x_slices)
# Define the start state of the RNN
self.h_zero = tf.zeros([1, self.dim])
# TODO 2: Write a (very short) Python function that defines one step of an RNN
def step_1(x, h_prev):
#add your code here
x_e=tf.nn.embedding_lookup(self.E,x)
#print(x_e.shape)
#print(h_prev.shape)
concat=tf.concat([h_prev,x_e],1)
#concat=tf.concat(h_prev,x_e)
h = tf.tanh(tf.matmul(concat,self.W)+self.b)
return h
# TODO 3: Unroll the RNN using a for loop, and obtain the sentence representation with the final hidden state
sentence_representation = None
sentence_representation = self.h_zero
# tf.print(sentence_representation)
#for slice in self.x_slices:
# tf.print(slice)
for slice in self.x_slices:
slice_1=tf.reshape(slice,shape=[-1])
sentence_representation=step_1(slice_1, sentence_representation)
# Compute the logits using one last linear layer
logits = tf.matmul(sentence_representation, self.W_cl) + self.b_cl
return logits
def train(self, training_data, dev_set):
def get_minibatch(dataset, start_index, end_index):
indices = range(start_index, end_index)
vectors = np.vstack([dataset[i]['index_sequence'] for i in indices])
labels = [dataset[i]['label'] for i in indices]
return vectors, labels
print('Training.')
# Training cycle
for epoch in range(self.training_epochs):
random.shuffle(training_set)
avg_cost = 0.
total_batch = int(len(training_set) / self.batch_size)
# Loop over all batches in epoch
for i in range(total_batch):
# Assemble a minibatch of the next B examples
minibatch_vectors, minibatch_labels = get_minibatch(training_set,
self.batch_size * i,
self.batch_size * (i + 1))
# Run the optimizer to take a gradient step, and also fetch the value of the
# cost function for logging
with tf.GradientTape() as tape:
logits = self.model_1(minibatch_vectors)
# Define the L2 cost
self.l2_cost = self.l2_lambda * (tf.reduce_sum(tf.square(self.W)) +
tf.reduce_sum(tf.square(self.W_cl)))
# Define the cost function (here, the softmax exp and sum are built in)
total_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=minibatch_labels, logits=logits) + self.l2_cost)
# This performs the main SGD update equation with gradient clipping
optimizer = tf.optimizers.SGD(self.learning_rate)
gradients = tape.gradient(total_cost, self.trainable_variables)
gvs = zip(gradients, self.trainable_variables)
capped_gvs = [(tf.clip_by_norm(grad, 5.0), var) for grad, var in gvs if grad is not None]
optimizer.apply_gradients(capped_gvs)
# Compute average loss
avg_cost += total_cost / total_batch
# Display some statistics about the step
# Evaluating only one batch worth of data -- simplifies implementation slightly
if (epoch+1) % self.display_epoch_freq == 0:
tf.print("Epoch:", (epoch+1), "Cost:", avg_cost, \
"Dev acc:", evaluate_classifier(self.classify, dev_set[0:256]), \
"Train acc:", evaluate_classifier(self.classify, training_set[0:256]))
def classify(self, examples):
# This classifies a list of examples
vectors = np.int32(np.vstack([example['index_sequence'] for example in examples]))
logits = self.model_1(vectors)
return np.argmax(logits, axis=1)
我得到以下错误: ConcatOp:输入的维度应该匹配:shape[0]=[1,24]与shape[1]=[256,8][Op:ConcatV2]名称:concat
我应该怎么做才能解决这个问题
目前没有回答
相关问题 更多 >
编程相关推荐