更新帖子:
我仔细研究了最初的建议,以找到问题的根源。 最初的问题是,几个feature列产生了一个属性错误('noname'或'get_sparse_tensors')
以下是“tuple”对象没有属性“name”的示例代码:
metro = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_identity("metro",94)),
tf.feature_column.indicator_column(metro),
Metro列的数据如下所示:
^{pr2}$下面是一个关于“稀疏张量错误”的示例代码:
browser = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_identity("browser",54))
tf.feature_column.indicator_column(browser),
我可以解决这两个错误,方法是用词汇表列表替换分类列
metro = tf.feature_column.categorical_column_with_vocabulary_list('metro',
vocabulary_list=['(not set)','Abilene-Sweetwater TX','Albany-Schenectady-Troy NY','Atlanta GA'])
自从使用tf.feature_列分类列加上标识比写长词汇量表快得多,我很想知道为什么会出现这种错误?在
这是MCVE:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import argparse
import tensorflow as tf
# specify col.names.
names = [
'browser',
'metro',
'transactionRevenue'
]
# specify dtypes.
dtypes = {
'browser': str,
'metro': str,
'transactionRevenue': np.float32
}
df = pd.read_csv('dropped_train.csv', names=names, dtype=dtypes, n a_values='?',encoding ="ISO-8859-1")
def load_data(y_name="transactionRevenue", train_fraction=0.7, seed=None):
# Load the raw data columns.
data = df
# Shuffle the data
np.random.seed(seed)
# Split the data into train/test subsets.
x_train = data.sample(frac=train_fraction, random_state=seed)
x_test = data.drop(x_train.index)
# Extract the label from the features DataFrame.
y_train = x_train.pop(y_name)
y_test = x_test.pop(y_name)
return (x_train, y_train), (x_test, y_test)
load_data()
def features_columns():
metro = tf.feature_column.indicator_column (tf.feature_column.categorical_column_with_identity("metro",94)),
browser = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_identity("browser",54))
feature_columns = [
tf.feature_column.indicator_column(browser),
tf.feature_column.indicator_column(metro)
]
return feature_columns
features_columns()
log_dir = ("C:\\…\\gs sales\\model")
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=500, type=int, help='batch size')
parser.add_argument('--train_steps', default=10000, type=int, help='number of training steps')
parser.add_argument('--norm_factor', default=10., type=float, help='normalization factor')
def main(argv):
"""Builds, trains, and evaluates the model."""
args = parser.parse_args(argv[1:])
(train_x, train_y), (test_x, test_y) = load_data()
train_y /= args.norm_factor
test_y /= args.norm_factor
# Build the training dataset.
training_input_fn = tf.estimator.inputs.pandas_input_fn(x=train_x, y=train_y, batch_size=64,
shuffle=True, num_epochs=None)
# Build the Estimator.
model = tf.estimator.DNNRegressor(hidden_units=[50,30,10], feature_columns=features_columns(),
model_dir=log_dir)
# Train the model.
model.train(input_fn=training_input_fn, steps=args.train_steps)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run(main=main)
目前没有回答
相关问题 更多 >
编程相关推荐