我得到了集成神经网络的0%堆栈测试精度,其中基本学习者是CNN和BiLSTM

2024-04-26 22:01:08 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在根据极性对评论进行二元分类。 我在Keras中创建了一个集成模型,使用CNN和BiLSTM作为基础模型,使用神经网络作为元学习器。我正在使用堆叠。但我得到的堆栈集合精度为0%。我应该在代码中做哪些更改才能顺利运行?我哪里做错了

text_input_layer = Input(shape=(length_trainshuffle,))
embedding_layer = Embedding(vocab_size, 100)(text_input_layer)

text_layer_cnn = Conv1D(128, 5, activation='relu')(embedding_layer)
text_layer_cnn = GlobalMaxPooling1D()(text_layer_cnn)
text_layer_cnn = Dropout(0.2)(text_layer_cnn)
text_layer_cnn = Dense(5,kernel_initializer='glorot_uniform', activation='tanh')(text_layer_cnn)
output_layer_cnn = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_cnn)
model_cnn = Model(text_input_layer, output_layer_cnn)
optimizer = Adamax(lr=0.001,decay=0.0001)
model_cnn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
filepath_cnn="cnnmodel.best.hdf5"
checkpoint_cnn = ModelCheckpoint(filepath_cnn, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_cnn = [checkpoint_cnn]
    # Fit the model    
model_cnn.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_cnn, verbose=1) 
model_cnn.save(filepath_cnn)
print('>Saved %s' % filepath_cnn)    
loss_cnn, acc_cnn = model_cnn.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy CNN: %f' % (acc_cnn*100))
print('Loss CNN: %f' %(loss_cnn))     




text_layer_bilstm = Bidirectional(CuDNNLSTM(256))(embedding_layer)
output_layer_bilstm = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_bilstm)
model_bilstm = Model(text_input_layer, output_layer_bilstm)
optimizer_bilstm = Adamax(lr=0.001,decay=0.0001)
model_bilstm.compile(optimizer=optimizer_bilstm, loss='binary_crossentropy', metrics=['accuracy'])
filepath_bilstm="bilstm_model.best.hdf5"
checkpoint_bilstm = ModelCheckpoint(filepath_bilstm, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_bilstm = [checkpoint_bilstm]
    # Fit the model    
model_bilstm.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_bilstm, verbose=1) 
model_bilstm.save(filepath_bilstm)
print('>Saved %s' % filepath_bilstm) 
loss_bilstm, acc_bilstm = model_bilstm.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy bilstm: %f' % (acc_bilstm*100))
print('Loss bilstm: %f' %(loss_bilstm))





all_models = list()
cnnmodel = load_model(filepath_cnn)
# add to list of members
all_models.append(cnnmodel)
print('>loaded %s' % filepath_cnn)
bilstmmodel = load_model(filepath_bilstm)
# add to list of members
all_models.append(bilstmmodel)
print('>loaded %s' % filepath_bilstm)






def define_stacked_model(all_models):
    # update all layers in all models to not be trainable
    for i in range(len(all_models)):
        model = all_models[i]
        for layer in model.layers:
            # make not trainable
            layer.trainable = False
            # rename to avoid 'unique layer name' issue
            layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
    # define multi-headed input
    ensemble_visible = [model.input for model in all_models]
    # concatenate merge output from each model
    ensemble_outputs = [model.output for model in all_models]
    merge = concatenate(ensemble_outputs)

    hidden = Dense(10, activation='relu')(merge)
  #hidden = Flatten()(hidden)
    output = Dense(2, activation='softmax')(hidden)
    model = Model(inputs=ensemble_visible, outputs=output)
    # plot graph of ensemble
    plot_model(model, show_shapes=True, to_file='model_graph.png')
    # compile
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


def fit_stacked_model(model, inputX, inputy):
    # prepare input data
    X = [inputX for _ in range(len(model.input))]
    # encode output data
    inputy_enc = to_categorical(inputy)
    # fit model
    model.fit(X, inputy_enc, epochs=10, verbose=1)

# make a prediction with a stacked model
def predict_stacked_model(model, inputX):
    # prepare input data
    X = [inputX for _ in range(len(model.input))]
    # make prediction
    return model.predict(X, verbose=0)


stacked_model = define_stacked_model(all_models)
stacked_model.summary()
# fit stacked model on test dataset
fit_stacked_model(stacked_model,validateX,array(validatelabelshuffle))
#stacked_model.fit(X=testX,y=array(testlabelshuffle),epochs=10, verbose=1)
# make predictions and evaluate
yhat = predict_stacked_model(stacked_model, testX)
yhat = argmax(yhat, axis=1)
acc = accuracy_score(array(testlabelshuffle), yhat)
print('Stacked Test Accuracy: %.3f' % acc)