如何在Google Colab中使用pandas读取数据集时解决解析错误
movies = pd.read_csv('/content/tmdb_5000_movies.csv', on_bad_lines='skip')
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
212
213 return cast(F, wrapper)
/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
332
333 # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
948 kwds.update(kwds_defaults)
949
--> 950 return _read(filepath_or_buffer, kwds)
951
952
/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
609
610 with parser:
--> 611 return parser.read(nrows)
612
613
/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in read(self, nrows)
1776 columns,
1777 col_dict,
-> 1778 ) = self._engine.read( # type: ignore[attr-defined]
1779 nrows
1780 )
/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/c_parser_wrapper.py in read(self, nrows)
228 try:
229 if self.low_memory:
--> 230 chunks = self._reader.read_low_memory(nrows)
231 # destructive to chunks
232 data = _concatenate_chunks(chunks)
/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.read_low_memory()
/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows()
/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()
/usr/local/lib/python3.10/dist-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.raise_parser_error()
ParserError: Error tokenizing data. C error: EOF inside string starting at row 3336
我需要读取一个csv文件
1 个回答
0
movies = pd.read_csv('/content/tmdb_5000_movies.csv', error_bad_lines=False, skiprows=lambda x: x in skip_rows)
请试试这个