如何在Google Colab中使用pandas读取数据集时解决解析错误

-1 投票
1 回答
31 浏览
提问于 2025-04-12 23:31

movies = pd.read_csv('/content/tmdb_5000_movies.csv', on_bad_lines='skip')
209                 else:
    210                     kwargs[new_arg_name] = new_arg_value
--> 211             return func(*args, **kwargs)
    212 
    213         return cast(F, wrapper)

/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    329                     stacklevel=find_stack_level(),
    330                 )
--> 331             return func(*args, **kwargs)
    332 
    333         # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    948     kwds.update(kwds_defaults)
    949 
--> 950     return _read(filepath_or_buffer, kwds)
    951 
    952 

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    609 
    610     with parser:
--> 611         return parser.read(nrows)
    612 
    613 

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in read(self, nrows)
   1776                     columns,
   1777                     col_dict,
-> 1778                 ) = self._engine.read(  # type: ignore[attr-defined]
   1779                     nrows
   1780                 )

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/c_parser_wrapper.py in read(self, nrows)
    228         try:
    229             if self.low_memory:
--> 230                 chunks = self._reader.read_low_memory(nrows)
    231                 # destructive to chunks
    232                 data = _concatenate_chunks(chunks)

/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.read_low_memory()

/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows()

/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()

/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.raise_parser_error()

ParserError: Error tokenizing data. C error: EOF inside string starting at row 3336

我需要读取一个csv文件

1 个回答

0
movies = pd.read_csv('/content/tmdb_5000_movies.csv', error_bad_lines=False, skiprows=lambda x: x in skip_rows)

请试试这个

撰写回答