pandas 不可变 DataFrame
我想要一个不可变的数据框(DataFrame),用作程序中的参考表,也就是说在它最初创建好之后(在我的情况下,是在一个类的 def __init__()
方法里),它的属性应该是只读的,不能被修改。
我注意到索引对象是冻结的。
有没有办法让整个数据框变成不可变的呢?
4 个回答
通过查看pandas的实现方式和利用它的功能,我们可以对DataFrame对象进行一些修改,以实现特定的行为。我写了一个叫做 make_dataframe_immutable(dataframe)
的方法来解决这个问题。这是针对pandas版本0.25.3写的。
编辑:我还添加了针对pandas版本1.0.5和1.1.4的解决方案。
新的pandas版本可能需要一些调整,希望根据下面的测试内容来修改不会太难。
这个解决方案是新的,测试得还不够全面,任何反馈都非常欢迎。
如果有人能在这里发布一个相反的 make_dataframe_mutable()
方法,那就太好了。
import functools
import numpy as np
import pandas as pd
from pandas.core.indexing import _NDFrameIndexer
def make_dataframe_immutable(df: pd.DataFrame):
"""
Makes the given DataFrame immutable.
I.e. after calling this method - one cannot modify the dataframe using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(df, "_is_immutable", False):
return
df._is_immutable = True
df._set_value = functools.wraps(df._set_value)(_raise_immutable_exception)
df._setitem_slice = functools.wraps(df._setitem_slice)(_raise_immutable_exception)
df._setitem_frame = functools.wraps(df._setitem_frame)(_raise_immutable_exception)
df._setitem_array = functools.wraps(df._setitem_array)(_raise_immutable_exception)
df._set_item = functools.wraps(df._set_item)(_raise_immutable_exception)
df._data.delete = functools.wraps(df._data.delete)(_raise_immutable_exception)
df.update = functools.wraps(df.update)(_raise_immutable_exception)
df.insert = functools.wraps(df.insert)(_raise_immutable_exception)
df._get_item_cache = _make_result_immutable(df._get_item_cache)
# prevent modification through numpy arrays
df._data.as_array = _make_numpy_result_readonly(df._data.as_array)
_prevent_inplace_argument_in_function_calls(
df,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.DataFrame, attr, None), '__code__') and
# 'inplace' in getattr(pd.DataFrame, attr).__code__.co_varnames]
(
'bfill',
'clip',
'clip_lower',
'clip_upper',
'drop',
'drop_duplicates',
'dropna',
'eval',
'ffill',
'fillna',
'interpolate',
'mask',
'query',
'replace',
'reset_index',
'set_axis',
'set_index',
'sort_index',
'sort_values',
'where',
"astype",
"assign",
"reindex",
"rename",
),
)
def make_series_immutable(series: pd.Series):
"""
Makes the given Series immutable.
I.e. after calling this method - one cannot modify the series using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(series, "_is_immutable", False):
return
series._is_immutable = True
series._set_with_engine = functools.wraps(series._set_with_engine)(_raise_immutable_exception)
series._set_with = functools.wraps(series._set_with)(_raise_immutable_exception)
series.set_value = functools.wraps(series.set_value)(_raise_immutable_exception)
# prevent modification through numpy arrays
series._data.external_values = _make_numpy_result_readonly(series._data.external_values)
series._data.internal_values = _make_numpy_result_readonly(series._data.internal_values)
series._data.get_values = _make_numpy_result_readonly(series._data.get_values)
_prevent_inplace_argument_in_function_calls(
series,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.Series, attr, None), '__code__') and
# 'inplace' in getattr(pd.Series, attr).__code__.co_varnames]
(
"astype",
'bfill',
'clip',
'clip_lower',
'clip_upper',
'drop',
'drop_duplicates',
'dropna',
'ffill',
'fillna',
'interpolate',
'mask',
'replace',
'reset_index',
'set_axis',
'sort_index',
'sort_values',
"valid",
'where',
"_set_name",
),
)
class ImmutablePandas(Exception):
pass
def _raise_immutable_exception(*args, **kwargs):
raise ImmutablePandas(f"Cannot modify immutable dataframe. Please use df.copy()")
def _get_df_or_series_from_args(args):
if len(args) >= 2 and (isinstance(args[1], pd.DataFrame) or isinstance(args[1], pd.Series)):
return args[1]
def _safe__init__(self, *args, **kwargs):
super(_NDFrameIndexer, self).__init__(*args, **kwargs)
df_or_series = _get_df_or_series_from_args(args)
if df_or_series is not None:
if getattr(df_or_series, "_is_immutable", False):
self._get_setitem_indexer = functools.wraps(self._get_setitem_indexer)(_raise_immutable_exception)
# This line is the greatest foul in this module - as it performs a global patch.
# Notice that a reload of this module incurs overriding this variable again and again. It is supported.
_NDFrameIndexer.__init__ = functools.wraps(_NDFrameIndexer.__init__)(_safe__init__)
def _make_numpy_result_readonly(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, np.ndarray):
res.flags.writeable = False
return res
return wrapper
def _make_result_immutable(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, pd.Series):
make_series_immutable(res)
return res
return wrapper
def _prevent_inplace_operation(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# TODO: here we assume that in-place is not given as a positional.
# remove this assumption, either by hard-coding the position for each method or by parsing the
# function signature.
if kwargs.get("inplace", False):
_raise_immutable_exception()
return func(*args, **kwargs)
return wrapper
def _prevent_inplace_argument_in_function_calls(obj, attributes):
for attr in attributes:
member = getattr(obj, attr)
setattr(obj, attr, _prevent_inplace_operation(member))
pytest单元测试
import immutable_pandas
import importlib
import warnings
import pandas as pd
import pytest
def create_immutable_dataframe() -> pd.DataFrame:
# Cannot be used as a fixture because pytest copies objects transparently, which makes the tests flaky
immutable_dataframe = pd.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7]})
make_dataframe_immutable(immutable_dataframe)
return immutable_dataframe
def test_immutable_dataframe_cannot_change_with_direct_access():
immutable_dataframe = create_immutable_dataframe()
immutable_dataframe2 = immutable_dataframe.query("x == 2")
with warnings.catch_warnings():
warnings.simplefilter("ignore")
immutable_dataframe2["moshe"] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe.x = 2
with pytest.raises(ImmutablePandas):
immutable_dataframe["moshe"] = 56
with pytest.raises(ImmutablePandas):
immutable_dataframe.insert(0, "z", [1, 2, 3, 4])
def test_immutable_dataframe_cannot_change_with_inplace_operations():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.eval("y=x+1", inplace=True)
with pytest.raises(ImmutablePandas):
immutable_dataframe.assign(y=2, inplace=True)
def test_immutable_dataframe_cannot_change_with_loc():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.loc[2] = 1
with pytest.raises(ImmutablePandas):
immutable_dataframe.iloc[1] = 4
def test_immutable_dataframe_cannot_change_with_columns_access():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"][2] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"].loc[2] = 123
def test_immutable_dataframe_cannot_del_column():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
del immutable_dataframe["x"]
def test_immutable_dataframe_cannot_be_modified_through_values():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ValueError, match="read-only"):
immutable_dataframe.values[0, 0] = 1
with pytest.raises(ValueError, match="read-only"):
immutable_dataframe.as_matrix()[0, 0] = 1
def test_immutable_series_cannot_change_with_loc():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.loc[0] = 1
with pytest.raises(ImmutablePandas):
series.iloc[0] = 1
def test_immutable_series_cannot_change_with_inplace_operations():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.sort_index(inplace=True)
with pytest.raises(ImmutablePandas):
series.sort_values(inplace=True)
with pytest.raises(ImmutablePandas):
series.astype(int, inplace=True)
def test_series_cannot_be_modeified_through_values():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ValueError, match="read-only"):
series.get_values()[0] = 1234
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ValueError, match="read-only"):
series.values[0] = 1234
def test_reloading_module_immutable_pandas_does_not_break_immutability():
# We need to test the effects of reloading the module, because we modify the global variable
# _NDFrameIndexer.__init__ upon every reload of the module.
df = create_immutable_dataframe()
df2 = df.copy()
immutable_pandas2 = importlib.reload(immutable_pandas)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df.astype(int, inplace=True)
df2.astype(int, inplace=True)
immutable_pandas2.make_dataframe_immutable(df2)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df2.astype(int, inplace=True)
编辑:这是在pandas版本1.0.5和1.1.4上测试的更新。
"""
Two methods to make pandas objects immutable.
make_dataframe_immutable()
make_series_immutable()
"""
import functools
import numpy as np
import pandas as pd
from pandas.core.indexing import _iLocIndexer
from pandas.core.indexing import _LocIndexer
from pandas.core.indexing import IndexingMixin
def make_dataframe_immutable(df: pd.DataFrame):
"""
Makes the given DataFrame immutable.
I.e. after calling this method - one cannot modify the dataframe using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(df, "_is_immutable", False):
return
df._is_immutable = True
df._set_value = functools.wraps(df._set_value)(_raise_immutable_exception)
df._setitem_slice = functools.wraps(df._setitem_slice)(_raise_immutable_exception)
df._setitem_frame = functools.wraps(df._setitem_frame)(_raise_immutable_exception)
df._setitem_array = functools.wraps(df._setitem_array)(_raise_immutable_exception)
df._set_item = functools.wraps(df._set_item)(_raise_immutable_exception)
if hasattr(df, "_mgr"):
# pandas==1.1.4
df._mgr.idelete = functools.wraps(df._mgr.idelete)(_raise_immutable_exception)
elif hasattr(df, "_data"):
# pandas==1.0.5
df._data.delete = functools.wraps(df._data.delete)(_raise_immutable_exception)
df.update = functools.wraps(df.update)(_raise_immutable_exception)
df.insert = functools.wraps(df.insert)(_raise_immutable_exception)
df._get_item_cache = _make_result_immutable(df._get_item_cache)
# prevent modification through numpy arrays
df._data.as_array = _make_numpy_result_readonly(df._data.as_array)
_prevent_inplace_argument_in_function_calls(
df,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.DataFrame, attr, None), '__code__') and
# 'inplace' in getattr(pd.DataFrame, attr).__code__.co_varnames]
(
"bfill",
"clip",
"drop",
"drop_duplicates",
"dropna",
"eval",
"ffill",
"fillna",
"interpolate",
"mask",
"query",
"replace",
"reset_index",
"set_axis",
"set_index",
"sort_index",
"sort_values",
"where",
"astype",
"assign",
"reindex",
"rename",
),
)
def make_series_immutable(series: pd.Series):
"""
Makes the given Series immutable.
I.e. after calling this method - one cannot modify the series using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(series, "_is_immutable", False):
return
series._is_immutable = True
series._set_with_engine = functools.wraps(series._set_with_engine)(_raise_immutable_exception)
series._set_with = functools.wraps(series._set_with)(_raise_immutable_exception)
# prevent modification through numpy arrays
series._data.external_values = _make_numpy_result_readonly(series._data.external_values)
series._data.internal_values = _make_numpy_result_readonly(series._data.internal_values)
_prevent_inplace_argument_in_function_calls(
series,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.Series, attr, None), '__code__') and
# 'inplace' in getattr(pd.Series, attr).__code__.co_varnames]
(
"astype",
"bfill",
"clip",
"drop",
"drop_duplicates",
"dropna",
"ffill",
"fillna",
"interpolate",
"mask",
"replace",
"reset_index",
"set_axis",
"sort_index",
"sort_values",
"where",
"_set_name",
),
)
class ImmutablePandas(Exception):
pass
def _raise_immutable_exception(*args, **kwargs):
raise ImmutablePandas(f"Cannot modify immutable dataframe. Please use df.copy()")
def _get_df_or_series_from_args(args):
if len(args) >= 2 and (isinstance(args[1], pd.DataFrame) or isinstance(args[1], pd.Series)):
return args[1]
def _protect_indexer(loc_func):
def wrapper(*arg, **kwargs):
res = loc_func(*args, **kwargs)
return res
def _safe__init__(cls, self, *args, **kwargs):
super(cls, self).__init__(*args, **kwargs)
df_or_series = _get_df_or_series_from_args(args)
if df_or_series is not None:
if getattr(df_or_series, "_is_immutable", False):
self._get_setitem_indexer = functools.wraps(self._get_setitem_indexer)(_raise_immutable_exception)
@functools.wraps(IndexingMixin.loc)
def _safe_loc(self):
loc = _LocIndexer("loc", self)
if getattr(self, "_is_immutable", False):
# Edit also loc._setitem_with_indexer
loc._get_setitem_indexer = functools.wraps(loc._get_setitem_indexer)(_raise_immutable_exception)
return loc
@functools.wraps(IndexingMixin.iloc)
def _safe_iloc(self):
iloc = _iLocIndexer("iloc", self)
if getattr(self, "_is_immutable", False):
# Edit also iloc._setitem_with_indexer
iloc._get_setitem_indexer = functools.wraps(iloc._get_setitem_indexer)(_raise_immutable_exception)
return iloc
# wraps
pd.DataFrame.loc = property(_safe_loc)
pd.Series.loc = property(_safe_loc)
pd.DataFrame.iloc = property(_safe_iloc)
pd.Series.iloc = property(_safe_iloc)
def _make_numpy_result_readonly(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, np.ndarray):
res.flags.writeable = False
return res
return wrapper
def _make_result_immutable(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, pd.Series):
make_series_immutable(res)
return res
return wrapper
def _prevent_inplace_operation(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# TODO: here we assume that in-place is not given as a positional.
# remove this assumption, either by hard-coding the position for each method or by parsing the
# function signature.
if kwargs.get("inplace", False):
_raise_immutable_exception()
return func(*args, **kwargs)
return wrapper
def _prevent_inplace_argument_in_function_calls(obj, attributes):
for attr in attributes:
member = getattr(obj, attr)
setattr(obj, attr, _prevent_inplace_operation(member))
还有pytest文件
import importlib
import warnings
import pandas as pd
import pytest
import immutable_pandas
from immutable_pandas import ImmutablePandas
from immutable_pandas import make_dataframe_immutable
from immutable_pandas import make_series_immutable
def create_immutable_dataframe() -> pd.DataFrame:
# Cannot be used as a fixture because pytest copies objects transparently, which makes the tests flaky
immutable_dataframe = pd.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7]})
make_dataframe_immutable(immutable_dataframe)
return immutable_dataframe
def test_immutable_dataframe_cannot_change_with_direct_access():
immutable_dataframe = create_immutable_dataframe()
immutable_dataframe2 = immutable_dataframe.query("x == 2")
with warnings.catch_warnings():
warnings.simplefilter("ignore")
immutable_dataframe2["moshe"] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe.x = 2
with pytest.raises(ImmutablePandas):
immutable_dataframe["moshe"] = 56
with pytest.raises(ImmutablePandas):
immutable_dataframe.insert(0, "z", [1, 2, 3, 4])
def test_immutable_dataframe_cannot_change_with_inplace_operations():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.eval("y=x+1", inplace=True)
with pytest.raises(ImmutablePandas):
immutable_dataframe.assign(y=2, inplace=True)
def test_immutable_dataframe_cannot_change_with_loc():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.loc[2] = 1
with pytest.raises(ImmutablePandas):
immutable_dataframe.iloc[1] = 4
def test_immutable_dataframe_cannot_change_with_columns_access():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"][2] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"].loc[2] = 123
def test_immutable_dataframe_cannot_del_column():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
del immutable_dataframe["x"]
def test_immutable_dataframe_cannot_be_modified_through_values():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ValueError, match="read-only"):
immutable_dataframe.values[0, 0] = 1
# with pytest.raises(ValueError, match="read-only"):
# immutable_dataframe.as_matrix()[0, 0] = 1
def test_immutable_series_cannot_change_with_loc():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.loc[0] = 1
with pytest.raises(ImmutablePandas):
series.iloc[0] = 1
def test_immutable_series_cannot_change_with_inplace_operations():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.sort_index(inplace=True)
with pytest.raises(ImmutablePandas):
series.sort_values(inplace=True)
with pytest.raises(ImmutablePandas):
series.astype(int, inplace=True)
def test_series_cannot_be_modeified_through_values():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ValueError, match="read-only"):
series.values[0] = 1234
def test_reloading_module_immutable_pandas_does_not_break_immutability():
# We need to test the effects of reloading the module, because we modify the global variable
# pd.DataFrame.loc, pd.DataFrame.iloc,
# pd.Series.loc, pd.Series.iloc
# upon every reload of the module.
df = create_immutable_dataframe()
df2 = df.copy()
immutable_pandas2 = importlib.reload(immutable_pandas)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df.astype(int, inplace=True)
immutable_pandas2.make_dataframe_immutable(df2)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df2.astype(int, inplace=True)
def test_at_and_iat_crash():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(immutable_pandas.ImmutablePandas):
immutable_dataframe.iat[0, 0] = 1
with pytest.raises(immutable_pandas.ImmutablePandas):
immutable_dataframe.at[0, "x"] = 1
如果你真的想让 DataFrame
像个不可变的对象,而不是使用 @Joop 提出的 copy
方法(我推荐这个方法),你可以基于以下结构来构建。
注意,这只是一个起点。
这个结构基本上是一个代理数据对象,它隐藏了所有可能改变状态的东西,并且允许自己被哈希(也就是生成一个唯一的标识符),所有相同原始数据的实例都会有相同的哈希值。可能有一些模块能更酷地实现下面的功能,但我觉得作为一个例子,这样做可以帮助理解。
一些警告:
根据代理对象的字符串表示方式的构造,两个不同的代理对象可能会得到相同的哈希值,不过这个实现与
DataFrame
及其他对象是兼容的。对原始对象的更改会影响代理对象。
如果另一个对象也问“我和你相等吗”,会导致一些麻烦的无限递归(这就是为什么
list
有特殊情况的原因)。这个
DataFrame
代理的辅助工具只是个开始,问题是任何改变原始对象状态的方法都不能被允许,或者需要通过辅助工具手动重写,或者在实例化_ReadOnly
时完全被extraFilter
参数屏蔽。请查看DataFrameProxy.sort
。代理对象不会显示为代理类型的派生类型。
通用只读代理
这个可以用于任何对象。
import md5
import warnings
class _ReadOnly(object):
def __init__(self, obj, extraFilter=tuple()):
self.__dict__['_obj'] = obj
self.__dict__['_d'] = None
self.__dict__['_extraFilter'] = extraFilter
self.__dict__['_hash'] = int(md5.md5(str(obj)).hexdigest(), 16)
@staticmethod
def _cloak(obj):
try:
hash(obj)
return obj
except TypeError:
return _ReadOnly(obj)
def __getitem__(self, value):
return _ReadOnly._cloak(self._obj[value])
def __setitem__(self, key, value):
raise TypeError(
"{0} has a _ReadOnly proxy around it".format(type(self._obj)))
def __delitem__(self, key):
raise TypeError(
"{0} has a _ReadOnly proxy around it".format(type(self._obj)))
def __getattr__(self, value):
if value in self.__dir__():
return _ReadOnly._cloak(getattr(self._obj, value))
elif value in dir(self._obj):
raise AttributeError("{0} attribute {1} is cloaked".format(
type(self._obj), value))
else:
raise AttributeError("{0} has no {1}".format(
type(self._obj), value))
def __setattr__(self, key, value):
raise TypeError(
"{0} has a _ReadOnly proxy around it".format(type(self._obj)))
def __delattr__(self, key):
raise TypeError(
"{0} has a _ReadOnly proxy around it".format(type(self._obj)))
def __dir__(self):
if self._d is None:
self.__dict__['_d'] = [
i for i in dir(self._obj) if not i.startswith('set')
and i not in self._extraFilter]
return self._d
def __repr__(self):
return self._obj.__repr__()
def __call__(self, *args, **kwargs):
if hasattr(self._obj, "__call__"):
return self._obj(*args, **kwargs)
else:
raise TypeError("{0} not callable".format(type(self._obj)))
def __hash__(self):
return self._hash
def __eq__(self, other):
try:
return hash(self) == hash(other)
except TypeError:
if isinstance(other, list):
try:
return all(zip(self, other))
except:
return False
return other == self
DataFrame 代理
应该扩展更多方法,比如 sort
和过滤掉所有其他不需要的状态改变方法。
你可以只用一个 DataFrame
实例作为参数来实例化,或者像创建 DataFrame
时那样提供参数。
import pandas as pd
class DataFrameProxy(_ReadOnly):
EXTRA_FILTER = ('drop', 'drop_duplicates', 'dropna')
def __init__(self, *args, **kwargs):
if (len(args) == 1 and
not len(kwargs) and
isinstance(args, pd.DataFrame)):
super(DataFrameProxy, self).__init__(args[0],
DataFrameProxy.EXTRA_FILTER)
else:
super(DataFrameProxy, self).__init__(pd.DataFrame(*args, **kwargs),
DataFrameProxy.EXTRA_FILTER)
def sort(self, inplace=False, *args, **kwargs):
if inplace:
warnings.warn("Inplace sorting overridden")
return self._obj.sort(*args, **kwargs)
最后:
不过,虽然制作这个玩意儿很有趣,但为什么不简单地使用一个不被改变的 DataFrame
呢?如果它只对你可见,最好还是你自己确保不去改变它……
试试写类似这样的代码
class Bla(object):
def __init__(self):
self._df = pd.DataFrame(index=[1,2,3])
@property
def df(self):
return self._df.copy()
这样你可以通过 b.df 来获取数据框(df),但是你不能对它进行赋值。简单来说,你在这个类里有一个数据框,它的表现像是“不可变的数据框”,也就是说它阻止对原始数据的修改。不过,返回的对象仍然是一个可变的数据框,所以在其他方面它不会像不可变的那样表现。比如,你不能把它用作字典的键等等。
StaticFrame这个包(我也是作者之一)提供了一种类似Pandas的操作方式,并且实现了很多常见的Pandas功能,同时确保底层的NumPy数组和不可变的Series和Frame容器是不可更改的。
你可以通过使用static_frame.Frame.from_pandas(df)
将整个Pandas DataFrame转换为一个StaticFrame的Frame
,这样就可以把它变成一个真正只读的表格。
想了解这个方法的更多信息,可以查看StaticFrame的文档: https://static-frame.readthedocs.io/en/latest/api_detail/frame.html#frame-constructor