alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
input_dim_model = (1, 32, 96)    # images are 96 x 32 with 1 channel of color (gray)
num_output_classes = len(alphabet) + 1
ltsm_hidden = 256

def bidirectionalLTSM(features, nHidden, nOut):
    a = C.layers.Recurrence(C.layers.LSTM(nHidden))(features)
    b = C.layers.Recurrence(C.layers.LSTM(nHidden), go_backwards=True)(features)
    c = C.splice(a, b)
    r = C.layers.Dense(nOut)(c)
    return r

def create_model_rnn(features):
    h = features
    h = bidirectionalLTSM(h, ltsm_hidden, ltsm_hidden)
    h = bidirectionalLTSM(h, ltsm_hidden, num_output_classes)
    return h

def create_model_cnn(features):
    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        h = features

        h = C.layers.Convolution2D(filter_shape=(3,3), 
                                    pad=True, name='conv_0')(h)

        #more layers...

        h = C.layers.BatchNormalization(name="batchnorm_6")(h)

        return h

x = C.input_variable(input_dim_model, name="x")
label = C.sequence.input((num_output_classes), name="y")

def create_model(features):
    #Composite(x: Tensor[1,32,96]) -> Tensor[512,1,23]
    a = create_model_cnn(features) 
    a = C.reshape(a, (512, 23))
    #Composite(x: Tensor[1,32,96]) -> Tensor[23,512]
    a = C.swapaxes(a, 0, 1) 

    #is there a better way to convert to sequence and still be compatible with forward_backwards() ?
    #Composite(x: Tensor[1,32,96], y: Sequence[Tensor[37]]) -> Sequence[Tensor[512]]
    a = C.to_sequence_like(a, label) 

    #Composite(x: Tensor[1,32,96], y: Sequence[Tensor[37]]) -> Sequence[Tensor[37]]
    a = create_model_rnn(a) 
    return a

#Composite(x: Tensor[1,32,96], y: Sequence[Tensor[37]]) -> Sequence[Tensor[37]]
z = create_model(x)

#LabelsToGraph(y: Sequence[Tensor[37]]) -> Sequence[Tensor[37]]
graph = C.labels_to_graph(label)

#Composite(y: Sequence[Tensor[37]], x: Tensor[1,32,96]) -> np.float32
criteria = C.forward_backward(C.labels_to_graph(label), z, blankTokenId=0) 

err = C.edit_distance_error(z, label, squashInputs=True, tokensToIgnore=[0])
lr = C.learning_rate_schedule(0.01, C.UnitType.sample)
learner = C.adadelta(z.parameters, lr)

progress_printer = C.logging.progress_print.ProgressPrinter(50, first=10, tag='Training')
trainer = C.Trainer(z, (criteria, err), learner, progress_writers=[progress_printer])

#some more custom code ...
#below is how I'm feeding the data

while True:
    x1, y1 = custom_datareader.next_minibatch()
    #x1 is a list of numpy arrays containing training images
    #y1 is a list of numpy arrays with one hot encoded labels

    trainer.train_minibatch({x: x1, label: y1})




^ {CD3>}是CTC解码的空白字符的索引。“:”前面的其他值是标签的字符代码。1是对序列中的每个向量进行1-热编码。有一堆尾随的空白字符,以确保序列与支持的最大序列长度一样长,因为在编写本文时,CTC丢失函数实现不支持可变长度序列。在


learner = C.adadelta(z.parameters, lr, use_mean_gradient=True)

