pandas 不可变 DataFrame

30 投票
4 回答
23182 浏览
提问于 2025-04-18 14:34

我想要一个不可变的数据框(DataFrame),用作程序中的参考表,也就是说在它最初创建好之后(在我的情况下,是在一个类的 def __init__() 方法里),它的属性应该是只读的,不能被修改。

我注意到索引对象是冻结的。

有没有办法让整个数据框变成不可变的呢?

4 个回答

5

通过查看pandas的实现方式和利用它的功能,我们可以对DataFrame对象进行一些修改,以实现特定的行为。我写了一个叫做 make_dataframe_immutable(dataframe) 的方法来解决这个问题。这是针对pandas版本0.25.3写的。

编辑:我还添加了针对pandas版本1.0.5和1.1.4的解决方案。

新的pandas版本可能需要一些调整,希望根据下面的测试内容来修改不会太难。

这个解决方案是新的,测试得还不够全面,任何反馈都非常欢迎。

如果有人能在这里发布一个相反的 make_dataframe_mutable() 方法,那就太好了。

import functools

import numpy as np
import pandas as pd
from pandas.core.indexing import _NDFrameIndexer


def make_dataframe_immutable(df: pd.DataFrame):
    """
    Makes the given DataFrame immutable.
    I.e. after calling this method - one cannot modify the dataframe using pandas interface.

    Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
    """
    if getattr(df, "_is_immutable", False):
        return
    df._is_immutable = True
    df._set_value = functools.wraps(df._set_value)(_raise_immutable_exception)
    df._setitem_slice = functools.wraps(df._setitem_slice)(_raise_immutable_exception)
    df._setitem_frame = functools.wraps(df._setitem_frame)(_raise_immutable_exception)
    df._setitem_array = functools.wraps(df._setitem_array)(_raise_immutable_exception)
    df._set_item = functools.wraps(df._set_item)(_raise_immutable_exception)
    df._data.delete = functools.wraps(df._data.delete)(_raise_immutable_exception)
    df.update = functools.wraps(df.update)(_raise_immutable_exception)
    df.insert = functools.wraps(df.insert)(_raise_immutable_exception)

    df._get_item_cache = _make_result_immutable(df._get_item_cache)

    # prevent modification through numpy arrays
    df._data.as_array = _make_numpy_result_readonly(df._data.as_array)

    _prevent_inplace_argument_in_function_calls(
        df,
        # This list was obtained by manual inspection +
        #  [attr for attr in dir(d) if hasattr(getattr(pd.DataFrame, attr, None), '__code__') and
        #  'inplace' in getattr(pd.DataFrame, attr).__code__.co_varnames]
        (
            'bfill',
            'clip',
            'clip_lower',
            'clip_upper',
            'drop',
            'drop_duplicates',
            'dropna',
            'eval',
            'ffill',
            'fillna',
            'interpolate',
            'mask',
            'query',
            'replace',
            'reset_index',
            'set_axis',
            'set_index',
            'sort_index',
            'sort_values',
            'where',
            "astype",
            "assign",
            "reindex",
            "rename",
        ),
    )


def make_series_immutable(series: pd.Series):
    """
    Makes the given Series immutable.
    I.e. after calling this method - one cannot modify the series using pandas interface.


    Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
    """
    if getattr(series, "_is_immutable", False):
        return
    series._is_immutable = True
    series._set_with_engine = functools.wraps(series._set_with_engine)(_raise_immutable_exception)
    series._set_with = functools.wraps(series._set_with)(_raise_immutable_exception)
    series.set_value = functools.wraps(series.set_value)(_raise_immutable_exception)

    # prevent modification through numpy arrays
    series._data.external_values = _make_numpy_result_readonly(series._data.external_values)
    series._data.internal_values = _make_numpy_result_readonly(series._data.internal_values)
    series._data.get_values = _make_numpy_result_readonly(series._data.get_values)

    _prevent_inplace_argument_in_function_calls(
        series,
        # This list was obtained by manual inspection +
        #  [attr for attr in dir(d) if hasattr(getattr(pd.Series, attr, None), '__code__') and
        #  'inplace' in getattr(pd.Series, attr).__code__.co_varnames]
        (
            "astype",
            'bfill',
            'clip',
            'clip_lower',
            'clip_upper',
            'drop',
            'drop_duplicates',
            'dropna',
            'ffill',
            'fillna',
            'interpolate',
            'mask',
            'replace',
            'reset_index',
            'set_axis',
            'sort_index',
            'sort_values',
            "valid",
            'where',
            "_set_name",
        ),
    )


class ImmutablePandas(Exception):
    pass


def _raise_immutable_exception(*args, **kwargs):
    raise ImmutablePandas(f"Cannot modify immutable dataframe. Please use df.copy()")


def _get_df_or_series_from_args(args):
    if len(args) >= 2 and (isinstance(args[1], pd.DataFrame) or isinstance(args[1], pd.Series)):
        return args[1]


def _safe__init__(self, *args, **kwargs):
    super(_NDFrameIndexer, self).__init__(*args, **kwargs)
    df_or_series = _get_df_or_series_from_args(args)
    if df_or_series is not None:
        if getattr(df_or_series, "_is_immutable", False):
            self._get_setitem_indexer = functools.wraps(self._get_setitem_indexer)(_raise_immutable_exception)


# This line is the greatest foul in this module - as it performs a global patch.
# Notice that a reload of this module incurs overriding this variable again and again. It is supported.
_NDFrameIndexer.__init__ = functools.wraps(_NDFrameIndexer.__init__)(_safe__init__)


def _make_numpy_result_readonly(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        res = func(*args, **kwargs)
        if isinstance(res, np.ndarray):
            res.flags.writeable = False
        return res

    return wrapper


def _make_result_immutable(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        res = func(*args, **kwargs)
        if isinstance(res, pd.Series):
            make_series_immutable(res)
        return res

    return wrapper


def _prevent_inplace_operation(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # TODO: here we assume that in-place is not given as a positional.
        #  remove this assumption, either by hard-coding the position for each method or by parsing the
        #  function signature.
        if kwargs.get("inplace", False):
            _raise_immutable_exception()
        return func(*args, **kwargs)

    return wrapper


def _prevent_inplace_argument_in_function_calls(obj, attributes):
    for attr in attributes:
        member = getattr(obj, attr)
        setattr(obj, attr, _prevent_inplace_operation(member))


pytest单元测试

import immutable_pandas
import importlib
import warnings

import pandas as pd
import pytest



def create_immutable_dataframe() -> pd.DataFrame:
    # Cannot be used as a fixture because pytest copies objects transparently, which makes the tests flaky
    immutable_dataframe = pd.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7]})
    make_dataframe_immutable(immutable_dataframe)
    return immutable_dataframe


def test_immutable_dataframe_cannot_change_with_direct_access():
    immutable_dataframe = create_immutable_dataframe()
    immutable_dataframe2 = immutable_dataframe.query("x == 2")
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        immutable_dataframe2["moshe"] = 123
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.x = 2
    with pytest.raises(ImmutablePandas):
        immutable_dataframe["moshe"] = 56
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.insert(0, "z", [1, 2, 3, 4])


def test_immutable_dataframe_cannot_change_with_inplace_operations():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.eval("y=x+1", inplace=True)
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.assign(y=2, inplace=True)


def test_immutable_dataframe_cannot_change_with_loc():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.loc[2] = 1
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.iloc[1] = 4


def test_immutable_dataframe_cannot_change_with_columns_access():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        immutable_dataframe["x"][2] = 123
    with pytest.raises(ImmutablePandas):
        immutable_dataframe["x"].loc[2] = 123


def test_immutable_dataframe_cannot_del_column():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        del immutable_dataframe["x"]


def test_immutable_dataframe_cannot_be_modified_through_values():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ValueError, match="read-only"):
        immutable_dataframe.values[0, 0] = 1
    with pytest.raises(ValueError, match="read-only"):
        immutable_dataframe.as_matrix()[0, 0] = 1


def test_immutable_series_cannot_change_with_loc():
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    with pytest.raises(ImmutablePandas):
        series.loc[0] = 1
    with pytest.raises(ImmutablePandas):
        series.iloc[0] = 1


def test_immutable_series_cannot_change_with_inplace_operations():
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    with pytest.raises(ImmutablePandas):
        series.sort_index(inplace=True)
    with pytest.raises(ImmutablePandas):
        series.sort_values(inplace=True)
    with pytest.raises(ImmutablePandas):
        series.astype(int, inplace=True)


def test_series_cannot_be_modeified_through_values():
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    with pytest.raises(ValueError, match="read-only"):
        series.get_values()[0] = 1234
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    with pytest.raises(ValueError, match="read-only"):
        series.values[0] = 1234


def test_reloading_module_immutable_pandas_does_not_break_immutability():
    # We need to test the effects of reloading the module, because we modify the global variable
    #       _NDFrameIndexer.__init__ upon every reload of the module.
    df = create_immutable_dataframe()
    df2 = df.copy()
    immutable_pandas2 = importlib.reload(immutable_pandas)
    with pytest.raises(immutable_pandas2.ImmutablePandas):
        df.astype(int, inplace=True)
    df2.astype(int, inplace=True)
    immutable_pandas2.make_dataframe_immutable(df2)
    with pytest.raises(immutable_pandas2.ImmutablePandas):
        df2.astype(int, inplace=True)


编辑:这是在pandas版本1.0.5和1.1.4上测试的更新。

"""
Two methods to make pandas objects immutable.
    make_dataframe_immutable()
    make_series_immutable()
"""
import functools

import numpy as np
import pandas as pd
from pandas.core.indexing import _iLocIndexer
from pandas.core.indexing import _LocIndexer
from pandas.core.indexing import IndexingMixin


def make_dataframe_immutable(df: pd.DataFrame):
    """
    Makes the given DataFrame immutable.
    I.e. after calling this method - one cannot modify the dataframe using pandas interface.

    Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
    """
    if getattr(df, "_is_immutable", False):
        return
    df._is_immutable = True
    df._set_value = functools.wraps(df._set_value)(_raise_immutable_exception)
    df._setitem_slice = functools.wraps(df._setitem_slice)(_raise_immutable_exception)
    df._setitem_frame = functools.wraps(df._setitem_frame)(_raise_immutable_exception)
    df._setitem_array = functools.wraps(df._setitem_array)(_raise_immutable_exception)
    df._set_item = functools.wraps(df._set_item)(_raise_immutable_exception)
    if hasattr(df, "_mgr"):
        # pandas==1.1.4
        df._mgr.idelete = functools.wraps(df._mgr.idelete)(_raise_immutable_exception)
    elif hasattr(df, "_data"):
        # pandas==1.0.5
        df._data.delete = functools.wraps(df._data.delete)(_raise_immutable_exception)
    df.update = functools.wraps(df.update)(_raise_immutable_exception)
    df.insert = functools.wraps(df.insert)(_raise_immutable_exception)

    df._get_item_cache = _make_result_immutable(df._get_item_cache)

    # prevent modification through numpy arrays
    df._data.as_array = _make_numpy_result_readonly(df._data.as_array)

    _prevent_inplace_argument_in_function_calls(
        df,
        # This list was obtained by manual inspection +
        #  [attr for attr in dir(d) if hasattr(getattr(pd.DataFrame, attr, None), '__code__') and
        #  'inplace' in getattr(pd.DataFrame, attr).__code__.co_varnames]
        (
            "bfill",
            "clip",
            "drop",
            "drop_duplicates",
            "dropna",
            "eval",
            "ffill",
            "fillna",
            "interpolate",
            "mask",
            "query",
            "replace",
            "reset_index",
            "set_axis",
            "set_index",
            "sort_index",
            "sort_values",
            "where",
            "astype",
            "assign",
            "reindex",
            "rename",
        ),
    )


def make_series_immutable(series: pd.Series):
    """
    Makes the given Series immutable.
    I.e. after calling this method - one cannot modify the series using pandas interface.


    Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
    """
    if getattr(series, "_is_immutable", False):
        return
    series._is_immutable = True
    series._set_with_engine = functools.wraps(series._set_with_engine)(_raise_immutable_exception)
    series._set_with = functools.wraps(series._set_with)(_raise_immutable_exception)

    # prevent modification through numpy arrays
    series._data.external_values = _make_numpy_result_readonly(series._data.external_values)
    series._data.internal_values = _make_numpy_result_readonly(series._data.internal_values)

    _prevent_inplace_argument_in_function_calls(
        series,
        # This list was obtained by manual inspection +
        #  [attr for attr in dir(d) if hasattr(getattr(pd.Series, attr, None), '__code__') and
        #  'inplace' in getattr(pd.Series, attr).__code__.co_varnames]
        (
            "astype",
            "bfill",
            "clip",
            "drop",
            "drop_duplicates",
            "dropna",
            "ffill",
            "fillna",
            "interpolate",
            "mask",
            "replace",
            "reset_index",
            "set_axis",
            "sort_index",
            "sort_values",
            "where",
            "_set_name",
        ),
    )


class ImmutablePandas(Exception):
    pass


def _raise_immutable_exception(*args, **kwargs):
    raise ImmutablePandas(f"Cannot modify immutable dataframe. Please use df.copy()")


def _get_df_or_series_from_args(args):
    if len(args) >= 2 and (isinstance(args[1], pd.DataFrame) or isinstance(args[1], pd.Series)):
        return args[1]


def _protect_indexer(loc_func):
    def wrapper(*arg, **kwargs):
        res = loc_func(*args, **kwargs)
        return res


def _safe__init__(cls, self, *args, **kwargs):
    super(cls, self).__init__(*args, **kwargs)
    df_or_series = _get_df_or_series_from_args(args)
    if df_or_series is not None:
        if getattr(df_or_series, "_is_immutable", False):
            self._get_setitem_indexer = functools.wraps(self._get_setitem_indexer)(_raise_immutable_exception)


@functools.wraps(IndexingMixin.loc)
def _safe_loc(self):
    loc = _LocIndexer("loc", self)
    if getattr(self, "_is_immutable", False):
        # Edit also loc._setitem_with_indexer
        loc._get_setitem_indexer = functools.wraps(loc._get_setitem_indexer)(_raise_immutable_exception)
    return loc


@functools.wraps(IndexingMixin.iloc)
def _safe_iloc(self):
    iloc = _iLocIndexer("iloc", self)
    if getattr(self, "_is_immutable", False):
        # Edit also iloc._setitem_with_indexer
        iloc._get_setitem_indexer = functools.wraps(iloc._get_setitem_indexer)(_raise_immutable_exception)
    return iloc


# wraps
pd.DataFrame.loc = property(_safe_loc)
pd.Series.loc = property(_safe_loc)
pd.DataFrame.iloc = property(_safe_iloc)
pd.Series.iloc = property(_safe_iloc)


def _make_numpy_result_readonly(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        res = func(*args, **kwargs)
        if isinstance(res, np.ndarray):
            res.flags.writeable = False
        return res

    return wrapper


def _make_result_immutable(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        res = func(*args, **kwargs)
        if isinstance(res, pd.Series):
            make_series_immutable(res)
        return res

    return wrapper


def _prevent_inplace_operation(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # TODO: here we assume that in-place is not given as a positional.
        #  remove this assumption, either by hard-coding the position for each method or by parsing the
        #  function signature.
        if kwargs.get("inplace", False):
            _raise_immutable_exception()
        return func(*args, **kwargs)

    return wrapper


def _prevent_inplace_argument_in_function_calls(obj, attributes):
    for attr in attributes:
        member = getattr(obj, attr)
        setattr(obj, attr, _prevent_inplace_operation(member))


还有pytest文件

import importlib
import warnings

import pandas as pd
import pytest

import immutable_pandas
from immutable_pandas import ImmutablePandas
from immutable_pandas import make_dataframe_immutable
from immutable_pandas import make_series_immutable


def create_immutable_dataframe() -> pd.DataFrame:
    # Cannot be used as a fixture because pytest copies objects transparently, which makes the tests flaky
    immutable_dataframe = pd.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7]})
    make_dataframe_immutable(immutable_dataframe)
    return immutable_dataframe


def test_immutable_dataframe_cannot_change_with_direct_access():
    immutable_dataframe = create_immutable_dataframe()
    immutable_dataframe2 = immutable_dataframe.query("x == 2")
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        immutable_dataframe2["moshe"] = 123
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.x = 2
    with pytest.raises(ImmutablePandas):
        immutable_dataframe["moshe"] = 56
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.insert(0, "z", [1, 2, 3, 4])


def test_immutable_dataframe_cannot_change_with_inplace_operations():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.eval("y=x+1", inplace=True)
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.assign(y=2, inplace=True)


def test_immutable_dataframe_cannot_change_with_loc():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.loc[2] = 1
    with pytest.raises(ImmutablePandas):
        immutable_dataframe.iloc[1] = 4


def test_immutable_dataframe_cannot_change_with_columns_access():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        immutable_dataframe["x"][2] = 123
    with pytest.raises(ImmutablePandas):
        immutable_dataframe["x"].loc[2] = 123


def test_immutable_dataframe_cannot_del_column():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ImmutablePandas):
        del immutable_dataframe["x"]


def test_immutable_dataframe_cannot_be_modified_through_values():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(ValueError, match="read-only"):
        immutable_dataframe.values[0, 0] = 1
    # with pytest.raises(ValueError, match="read-only"):
    #     immutable_dataframe.as_matrix()[0, 0] = 1


def test_immutable_series_cannot_change_with_loc():
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    with pytest.raises(ImmutablePandas):
        series.loc[0] = 1
    with pytest.raises(ImmutablePandas):
        series.iloc[0] = 1


def test_immutable_series_cannot_change_with_inplace_operations():
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    with pytest.raises(ImmutablePandas):
        series.sort_index(inplace=True)
    with pytest.raises(ImmutablePandas):
        series.sort_values(inplace=True)
    with pytest.raises(ImmutablePandas):
        series.astype(int, inplace=True)


def test_series_cannot_be_modeified_through_values():
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    series = pd.Series([1, 2, 3, 4])
    make_series_immutable(series)
    with pytest.raises(ValueError, match="read-only"):
        series.values[0] = 1234


def test_reloading_module_immutable_pandas_does_not_break_immutability():
    # We need to test the effects of reloading the module, because we modify the global variable
    #       pd.DataFrame.loc, pd.DataFrame.iloc,
    #       pd.Series.loc, pd.Series.iloc
    #       upon every reload of the module.
    df = create_immutable_dataframe()
    df2 = df.copy()
    immutable_pandas2 = importlib.reload(immutable_pandas)
    with pytest.raises(immutable_pandas2.ImmutablePandas):
        df.astype(int, inplace=True)
    immutable_pandas2.make_dataframe_immutable(df2)
    with pytest.raises(immutable_pandas2.ImmutablePandas):
        df2.astype(int, inplace=True)


def test_at_and_iat_crash():
    immutable_dataframe = create_immutable_dataframe()
    with pytest.raises(immutable_pandas.ImmutablePandas):
        immutable_dataframe.iat[0, 0] = 1
    with pytest.raises(immutable_pandas.ImmutablePandas):
        immutable_dataframe.at[0, "x"] = 1


5

如果你真的想让 DataFrame 像个不可变的对象,而不是使用 @Joop 提出的 copy 方法(我推荐这个方法),你可以基于以下结构来构建。

注意,这只是一个起点。

这个结构基本上是一个代理数据对象,它隐藏了所有可能改变状态的东西,并且允许自己被哈希(也就是生成一个唯一的标识符),所有相同原始数据的实例都会有相同的哈希值。可能有一些模块能更酷地实现下面的功能,但我觉得作为一个例子,这样做可以帮助理解。

一些警告:

  • 根据代理对象的字符串表示方式的构造,两个不同的代理对象可能会得到相同的哈希值,不过这个实现与 DataFrame 及其他对象是兼容的。

  • 对原始对象的更改会影响代理对象。

  • 如果另一个对象也问“我和你相等吗”,会导致一些麻烦的无限递归(这就是为什么 list 有特殊情况的原因)。

  • 这个 DataFrame 代理的辅助工具只是个开始,问题是任何改变原始对象状态的方法都不能被允许,或者需要通过辅助工具手动重写,或者在实例化 _ReadOnly 时完全被 extraFilter 参数屏蔽。请查看 DataFrameProxy.sort

  • 代理对象不会显示为代理类型的派生类型。

通用只读代理

这个可以用于任何对象。

import md5                                                                                              
import warnings                                                                                         

class _ReadOnly(object):                                                                                

    def __init__(self, obj, extraFilter=tuple()):                                                       

        self.__dict__['_obj'] = obj                                                                     
        self.__dict__['_d'] = None                                                                      
        self.__dict__['_extraFilter'] = extraFilter                                                     
        self.__dict__['_hash'] = int(md5.md5(str(obj)).hexdigest(), 16)                                 

    @staticmethod                                                                                       
    def _cloak(obj):                                                                                    
        try:                                                                                            
            hash(obj)                                                                                   
            return obj                                                                                  
        except TypeError:                                                                               
            return _ReadOnly(obj)                                                                       

    def __getitem__(self, value):                                                                       

        return _ReadOnly._cloak(self._obj[value])                                                       

    def __setitem__(self, key, value):                                                                  

        raise TypeError(                                                                                
            "{0} has a _ReadOnly proxy around it".format(type(self._obj)))                              

    def __delitem__(self, key):                                                                         

        raise TypeError(                                                                                
            "{0} has a _ReadOnly proxy around it".format(type(self._obj)))                              

    def __getattr__(self, value):                                                                       

        if value in self.__dir__():                                                                     
            return _ReadOnly._cloak(getattr(self._obj, value))                                          
        elif value in dir(self._obj):                                                                   
            raise AttributeError("{0} attribute {1} is cloaked".format(                                 
                type(self._obj), value))                                                                
        else:                                                                                           
            raise AttributeError("{0} has no {1}".format(                                               
                type(self._obj), value))                                                                

    def __setattr__(self, key, value):                                                                  

        raise TypeError(                                                                                
            "{0} has a _ReadOnly proxy around it".format(type(self._obj)))                              

    def __delattr__(self, key):                                                                         

        raise TypeError(                                                                                
            "{0} has a _ReadOnly proxy around it".format(type(self._obj)))                              

    def __dir__(self):                                                                                  

        if self._d is None:                                                                             
            self.__dict__['_d'] = [                                                                     
                i for i in dir(self._obj) if not i.startswith('set')                                    
                and i not in self._extraFilter]                                                         
        return self._d                                                                                  

    def __repr__(self):                                                                                 

        return self._obj.__repr__()                                                                     

    def __call__(self, *args, **kwargs):                                                                

        if hasattr(self._obj, "__call__"):                                                              
            return self._obj(*args, **kwargs)                                                           
        else:                                                                                           
            raise TypeError("{0} not callable".format(type(self._obj)))                                 

    def __hash__(self):                                                                                 

        return self._hash                                                                               

    def __eq__(self, other):                                                                            

        try:                                                                                            
            return hash(self) == hash(other)                                                            
        except TypeError:                                                                               
            if isinstance(other, list):                                                                 
                try:                                                                                    
                    return all(zip(self, other))                                                        
                except:                                                                                 
                    return False                                                                        
            return other == self    

DataFrame 代理

应该扩展更多方法,比如 sort 和过滤掉所有其他不需要的状态改变方法。

你可以只用一个 DataFrame 实例作为参数来实例化,或者像创建 DataFrame 时那样提供参数。

import pandas as pd

class DataFrameProxy(_ReadOnly):                                                                        

    EXTRA_FILTER = ('drop', 'drop_duplicates', 'dropna')                                                

    def __init__(self, *args, **kwargs):                                                                

        if (len(args) == 1 and                                                                          
                not len(kwargs) and                                                                     
                isinstance(args, pd.DataFrame)):                                                        

            super(DataFrameProxy, self).__init__(args[0],                                               
                DataFrameProxy.EXTRA_FILTER)                                                            

        else:                                                                                           

            super(DataFrameProxy, self).__init__(pd.DataFrame(*args, **kwargs),                         
                DataFrameProxy.EXTRA_FILTER)                                                            



    def sort(self, inplace=False, *args, **kwargs):                                                     

        if inplace:                                                                                     
            warnings.warn("Inplace sorting overridden")                                                 

        return self._obj.sort(*args, **kwargs) 

最后:

不过,虽然制作这个玩意儿很有趣,但为什么不简单地使用一个不被改变的 DataFrame 呢?如果它只对你可见,最好还是你自己确保不去改变它……

13

试试写类似这样的代码

class Bla(object):
    def __init__(self):
        self._df = pd.DataFrame(index=[1,2,3])

    @property
    def df(self):
        return self._df.copy()

这样你可以通过 b.df 来获取数据框(df),但是你不能对它进行赋值。简单来说,你在这个类里有一个数据框,它的表现像是“不可变的数据框”,也就是说它阻止对原始数据的修改。不过,返回的对象仍然是一个可变的数据框,所以在其他方面它不会像不可变的那样表现。比如,你不能把它用作字典的键等等。

22

StaticFrame这个包(我也是作者之一)提供了一种类似Pandas的操作方式,并且实现了很多常见的Pandas功能,同时确保底层的NumPy数组和不可变的Series和Frame容器是不可更改的。

你可以通过使用static_frame.Frame.from_pandas(df)将整个Pandas DataFrame转换为一个StaticFrame的Frame,这样就可以把它变成一个真正只读的表格。

想了解这个方法的更多信息,可以查看StaticFrame的文档: https://static-frame.readthedocs.io/en/latest/api_detail/frame.html#frame-constructor

撰写回答