我试图用VGG8和使用Keras的arcface在自定义数据上训练我的模型,训练看起来是正确的(并且收敛),这意味着它提高了准确性,在处理批次和年代时损失减少,但是当我测试模型以获得嵌入时,我测试的任何面都返回相同的嵌入。ArcFace层的代码基于:https://github.com/4uiiurz1/keras-arcface
我还尝试了不同的体系结构——VGG16、VGG19、resnet101、resnt50,还尝试了不同的批量大小=128,32,16,1和使用Adam和SGD的lr——但结果相同。 我使用了不同的弧面参数=(s=1-、30,50,64和w=0.2,0.3,0.5,1.3),但同样没有变化,尽管训练在14-11次EPOC后收敛
列车样本1645、试验705和val 705。图像大小112(RGB)
有人能帮忙吗
class ArcFace(Layer):
def __init__(self, n_classes=47, s=16.0, m=0.2, regularizer=None, **kwargs):
super(ArcFace, self).__init__(**kwargs)
self.n_classes = n_classes
self.s = s
self.m = m
self.regularizer = regularizers.get(regularizer)
def build(self, input_shape):
super(ArcFace, self).build(input_shape[0])
self.W = self.add_weight(name='W',
shape=(input_shape[0][-1], self.n_classes),
initializer='glorot_uniform', #glorot_uniform
trainable=True,
regularizer=self.regularizer)
def call(self, inputs):
x, y = inputs
c = K.shape(x)[-1]
# normalize feature
x = tf.nn.l2_normalize(x, axis=1)
# normalize weights
W = tf.nn.l2_normalize(self.W, axis=0)
# dot product
logits = x @ W
# add margin
# clip logits to prevent zero division when backward
theta = tf.acos(K.clip(logits, -1.0 + K.epsilon(), 1.0 - K.epsilon()))
target_logits = tf.cos(theta + self.m)
# sin = tf.sqrt(1 - logits**2)
# cos_m = tf.cos(logits)
# sin_m = tf.sin(logits)
# target_logits = logits * cos_m - sin * sin_m
#
logits = logits * (1 - y) + target_logits * y
# feature re-scale
logits *= self.s
out = tf.nn.softmax(logits)
return out
def compute_output_shape(self, input_shape):
return (None, self.n_classes)
n_classes = 47
input = Input(shape=(112, 112, 3))
y = Input(shape=(n_classes,))
x = vgg_block(input, 16, 2)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = vgg_block(x, 32, 2)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = vgg_block(x, 64, 2)
x = MaxPooling2D(pool_size=(2, 2))(x)
#my addition
x = vgg_block(x, 128, 2)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = vgg_block(x, 256, 2)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = vgg_block(x, 512, 2)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)
x = Dropout(0.25)(x)
#x = PReLU()(x)
x = Flatten()(x)
x = Dense(args.num_features, kernel_initializer='he_normal', bias_initializer='he_uniform',
kernel_regularizer=regularizers.l2(weight_decay))(x)
x = BatchNormalization()(x)
output = ArcFace(n_classes=n_classes, regularizer=regularizers.l2(weight_decay), name='arcFace')([x, y])
return Model([input, y], output)
callbacks = [
CSVLogger(os.path.join(args.model_path, args.name, 'log.csv')),
TensorBoard(log_dir=log_p, profile_batch=0),
TerminateOnNaN()]
if args.scheduler == 'CosineAnnealing':
callbacks.append(CosineAnnealingScheduler(T_max=args.epochs, eta_max=args.lr, eta_min=args.min_lr, verbose=1))
model.fit_generator(training_generator, epochs=args.epochs,
validation_data=validation_generator,
validation_steps=int(len(args.image_dic['validation']) / args.batch_size),
use_multiprocessing=False,
callbacks=callbacks,
verbose=1)
目前没有回答
相关问题 更多 >
编程相关推荐