tensorflow.python.framework.errors\u impl.OutOfRangeError:读取的字节数少于请求的字节数[[{{node checkpoint\u initializer\u 196}]

2024-03-29 07:15:27 发布

您现在位置:Python中文网/ 问答频道 /正文

我用Google research发布的BERT对我自己的数据集进行分类。我在源代码中创建了一个新类来指定我自己的分类任务,并在Colab上运行代码。在文件中运行run_classifier.py时发生错误。这是我的密码

创建我自己的任务

class MyProcessor(DataProcessor):
  """Base class for data converters for sequence classification data sets."""
  def __init__(self):
    self.labels = ['货物', '服务', '工程']

  def get_train_examples(self, data_dir):
    """Gets a collection of `InputExample`s for the train set."""
    return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")


  def get_dev_examples(self, data_dir):
    """Gets a collection of `InputExample`s for the dev set."""
    return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "val.tsv")), "val")


  def get_test_examples(self, data_dir):
    """Gets a collection of `InputExample`s for prediction."""
    return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")


  def get_labels(self):
    return self.labels


  def _create_examples(self, lines, set_type):
    examples = []
    for (i, line) in enumerate(lines):
      guid = "%s-%s" % (set_type, i)
      if set_type == "test":
        text_a = tokenization.convert_to_unicode(line[0])
        label = "货物"
      else:
        text_a = tokenization.convert_to_unicode(line[1])
        label = tokenization.convert_to_unicode(line[0])
      examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
    return examples

  @classmethod
  def _read_tsv(cls, input_file, quotechar=None):
    """Reads a tab separated value file."""
    with tf.gfile.Open(input_file, "r") as f:
      reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
      lines = []
      for line in reader:
        lines.append(line)
      return lines

跑步

!python run_classifier.py \
        --task_name=mytask \
                --data_dir=data/ \
                --bert_config_file=checkpoint/bert_config.json \
                --init_checkpoint=checkpoint/bert_model.ckpt \
                --vocab_file=checkpoint/vocab.txt \
                --output_dir=./output/result_dir/ \
        --max_seq_length=50 \
        --do_train=true \
        --do_eval=true \
        ----train_batch_size=128

Tags: textselffordatagetreturntsvdef