如何在googleml引擎上保存一个Tensorflow估计器模型

2024-04-24 10:14:04 发布

您现在位置:Python中文网/ 问答频道 /正文

我对Tensorflow很陌生。我试图建立和服务的模型上使用谷歌ML引擎估计器。但是,在尝试了一些方法之后,我不确定如何保存模型以供使用。你知道吗

我成功地训练了模型,精度可以接受。当我试图保存模型以供使用时,我四处搜索并找到了一些方法。但是,我还是遇到了一些问题。。。你知道吗

我尝试了3种出口方式,基于对其他几个问题的建议:

1)获取序列化示例作为输入-我遇到了一个错误“TypeError:bytes类型的对象不是JSON可序列化的”。另外,我也找不到一个好的方法来提供一个序列化的示例来有效地提供服务。由于我使用ML引擎提供服务,使用JSON输入似乎更容易。你知道吗

2)通过“基本”预处理获得JSON作为输入-我能够成功导出模型。在将模型加载到ML引擎之后,我试着做了一些预测。尽管返回了一个预测结果,但我发现,无论如何更改JSON输入,都会返回相同的结果。我查看了培训期间获得的验证结果。模型应该能够返回各种结果。我觉得服务函数中的预处理有问题,所以我尝试了第三种方法。。。你知道吗

3)具有“相同”预处理的JSON输入-我无法理解这一点,但我认为可能需要与在模型培训期间处理数据的方式完全相同的预处理。但是,作为服务输入函数tf.占位符,我不知道如何复制相同的预处理以使导出的模型工作。。。你知道吗

(请原谅我糟糕的编码风格…)


培训代码:

col_names = ['featureA','featureB','featureC']
target_name = 'langIntel'

col_def = {}
col_def['featureA'] = {'type':'float','tfType':tf.float32,'len':'fixed'}
col_def['featureB'] = {'type':'int','tfType':tf.int64,'len':'fixed'}
col_def['featureC'] = {'type':'bytes','tfType':tf.string,'len':'var'}


def _float_feature(value):
    if not isinstance(value, list): value = [value]
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def _int_feature(value):
    if not isinstance(value, list): value = [value]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def _bytes_feature(value):
    if not isinstance(value, list): value = [value]
    return tf.train.Feature(
        bytes_list=tf.train.BytesList(
            value=[p.encode('utf-8') for p in value]
        )
    )

functDict = {'float':_float_feature,
    'int':_int_feature,'bytes':_bytes_feature
}

training_targets = []
# Omitted validatin partition


with open('[JSON FILE PATH]') as jfile:
    json_data_input = json.load(jfile)

random.shuffle(json_data_input)


with tf.python_io.TFRecordWriter('savefile1.tfrecord') as writer:
    for item in json_data_input:
        if item[target_name] > 0:
            feature = {}

            for col in col_names:
                feature[col] = functDict[col_def[col]['type']](item[col])

            training_targets.append(item[target_name])

            example = tf.train.Example(
                features=tf.train.Features(feature=feature)
            )
            writer.write(example.SerializeToString())


def _parse_function(example_proto):
        example = {}

        for col in col_names:
            if col_def[col]['len'] == 'fixed':
                example[col] = tf.FixedLenFeature([], col_def[col]['tfType'])
            else:
                example[col] = tf.VarLenFeature(col_def[col]['tfType'])

        parsed_example = tf.parse_single_example(example_proto, example)

        features = {}

        for col in col_names:
            features[col] = parsed_example[col]

        labels = parsed_example.get(target_name)

        return features, labels


def my_input_fn(batch_size=1,num_epochs=None):
    dataset = tf.data.TFRecordDataset('savefile1.tfrecord')

    dataset = dataset.map(_parse_function)
    dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()

    return features, labels

allColumns = None

def train_model(
    learning_rate,
    n_trees,
    n_batchespl,
    batch_size):

    periods = 10

    vocab_list = ('vocab1', 'vocab2', 'vocab3')

    featureA_bucket = tf.feature_column.bucketized_column(
        tf.feature_column.numeric_column(
            key="featureA",dtype=tf.int64
            ), [5,10,15]
    )
    featureB_bucket = tf.feature_column.bucketized_column(
        tf.feature_column.numeric_column(
            key="featureB",dtype=tf.float32
        ), [0.25,0.5,0.75]
    )
    featureC_cat = tf.feature_column.indicator_column(
        tf.feature_column.categorical_column_with_vocabulary_list(
            key="featureC",vocabulary_list=vocab_list,
            num_oov_buckets=1
        )
    )


    theColumns = [featureA_bucket,featureB_bucket,featureC_cat]

    global allColumns
    allColumns = theColumns

    regressor = tf.estimator.BoostedTreesRegressor(
        feature_columns=theColumns,
        n_batches_per_layer=n_batchespl,
        n_trees=n_trees,
        learning_rate=learning_rate
    )

    training_input_fn = lambda: my_input_fn(batch_size=batch_size,num_epochs=5)
    predict_input_fn = lambda: my_input_fn(num_epochs=1)

    regressor.train(
        input_fn=training_input_fn
    )

    # omitted evaluation part

    return regressor

regressor = train_model(
    learning_rate=0.05,
    n_trees=100,
    n_batchespl=50,
    batch_size=20)

出口试用1:

def _serving_input_receiver_fn():
    serialized_tf_example = tf.placeholder(dtype=tf.string, shape=None, 
        name='input_example_tensor'
    )

    receiver_tensors = {'examples': serialized_tf_example}
    features = tf.parse_example(serialized_tf_example, feature_spec)
    return tf.estimator.export.ServingInputReceiver(features, 
        receiver_tensors
    )

servable_model_dir = "[OUT PATH]"
servable_model_path = regressor.export_savedmodel(servable_model_dir,
    _serving_input_receiver_fn
)

出口试用2:

def serving_input_fn():
    feature_placeholders = {
        'featureA': tf.placeholder(tf.int64, [None]),
        'featureB': tf.placeholder(tf.float32, [None]),
        'featureC': tf.placeholder(tf.string, [None, None])
    }

    receiver_tensors = {'inputs': feature_placeholders}

    feature_spec = tf.feature_column.make_parse_example_spec(allColumns)

    features = tf.parse_example(feature_placeholders, feature_spec)
    return tf.estimator.export.ServingInputReceiver(features, 
        feature_placeholders
    )

servable_model_dir = "[OUT PATH]"
servable_model_path = regressor.export_savedmodel(
    servable_model_dir, serving_input_fn
)

出口试用3:

def serving_input_fn():
    feature_placeholders = {
        'featureA': tf.placeholder(tf.int64, [None]),
        'featureB': tf.placeholder(tf.float32, [None]),
        'featureC': tf.placeholder(tf.string, [None, None])
    }    

    def toBytes(t):
        t = str(t)
        return t.encode('utf-8')

    tmpFeatures = {}

    tmpFeatures['featureA'] = tf.train.Feature(
        int64_list=feature_placeholders['featureA']
    )
    # TypeError: Parameter to MergeFrom() must be instance
    # of same class: expected tensorflow.Int64List got Tensor.
    tmpFeatures['featureB'] = tf.train.Feature(
        float_list=feature_placeholders['featureB']
    )
    tmpFeatures['featureC'] = tf.train.Feature(
        bytes_list=feature_placeholders['featureC']
    )

    tmpExample = tf.train.Example(
        features=tf.train.Features(feature=tmpFeatures)
    )
    tmpExample_proto = tmpExample.SerializeToString()

    example = {}

    for key, tensor in feature_placeholders.items():
        if col_def[key]['len'] == 'fixed':
            example[key] = tf.FixedLenFeature(
                [], col_def[key]['tfType']
            )
        else:
            example[key] = tf.VarLenFeature(
                col_def[key]['tfType']
            )

    parsed_example = tf.parse_single_example(
        tmpExample_proto, example
    )

    features = {}

    for key in tmpFeatures.keys():
        features[key] = parsed_example[key]

    return tf.estimator.export.ServingInputReceiver(
        features, feature_placeholders
    )

servable_model_dir = "[OUT PATH]"
servable_model_path = regressor.export_savedmodel(
    servable_model_dir, serving_input_fn
)

服务输入函数应该如何构造,以便输入JSON文件进行预测?非常感谢您的真知灼见!你知道吗


Tags: keynoneinputmodelvalueexampletfdef
1条回答
网友
1楼 · 发布于 2024-04-24 10:14:04

只是提供一个更新-我仍然无法得到出口完成。然后,我用Keras重建了训练模型,并成功地导出了模型以供服务(在我的例子中,重建模型可能占用了我较少的时间来找出如何导出估计器模型……)

相关问题 更多 >