使用QSortFilterProxyModel时,PySide GUI冻结

0 投票
1 回答
58 浏览
提问于 2025-04-14 17:38

我正在开发一个使用PySide6和Python3的应用程序,里面有一个QTableView,使用了一个自定义的模型DataFrameTableModel。我想让这个表格支持过滤功能,所以我还使用了QSortFilterProxyModel

其中一个要求是根据不同的条件来过滤,比如说过滤出在x列中值大于等于5的所有行。为了表示过滤条件,我实现了一个类DataFrameFilter,这个类基本上就是存储一些像{column: 'Price', operator: 'eq', value: 12}这样的信息。为了应用这个自定义的过滤格式,我创建了一个类DataFrameSortFilterProxyModel,它继承自QSortFilterProxyModel

from enum import Enum
from PySide6.QtCore import QAbstractTableModel, QSortFilterProxyModel, QModelIndex, Qt
import pandas as pd

class DataFrameTableModel(QAbstractTableModel):
    def __init__(self, df: pd.DataFrame = None):
        super(DataFrameTableModel, self).__init__()

        self._df: pd.DataFrame = df

    def rowCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._df is None:
            return 0
        
        return self._df.shape[0]
        

    def columnCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._df is None:
            return 0

        return self._df.shape[1]

    def data(self, index: QModelIndex, role: int = ...) -> object:
        if index.isValid() and self._df is not None:
            value = self._df.iloc[index.row(), index.column()]

            if role == Qt.ItemDataRole.DisplayRole:
                return str(value)
            elif role == Qt.ItemDataRole.UserRole:
                return value

    def headerData(self, section: int, orientation: Qt.Orientation, role: int = ...) -> object:
        if self._df is not None:
            if role == Qt.ItemDataRole.DisplayRole:
                if orientation == Qt.Orientation.Horizontal:
                    return str(self._df.columns[section])
                else:
                    return str(self._df.index[section])
            elif role == Qt.ItemDataRole.UserRole:
                if orientation == Qt.Orientation.Horizontal:
                    return self._df.columns[section]
                else:
                    return self._df.index[section]

    def flags(self, index: QModelIndex) -> Qt.ItemFlag:
        return Qt.ItemFlag.ItemIsSelectable | Qt.ItemFlag.ItemIsEnabled
    
    @property
    def df(self) -> pd.DataFrame:
        return self._df
    
    @df.setter
    def df(self, value: pd.DataFrame):
        self._df = value
        self.layoutChanged.emit()

class DataFrameFilterOperation(Enum):
    EQUAL = "eq"
    NOT_EQUAL = "ne"
    GREATER_THAN = "gt"
    GREATER_THAN_OR_EQUAL = "ge"
    LESS_THAN = "lt"
    LESS_THAN_OR_EQUAL = "le"

class DataFrameFilter:
    def __init__(self, column: str, column_index: int, operation: DataFrameFilterOperation, value):
        self._column = column
        self._column_index = column_index
        self._operation = operation
        self._value = value

    @property
    def column(self) -> str:
        return self._column
    
    @property
    def column_index(self) -> int:
        return self._column_index

    @property
    def operation(self) -> DataFrameFilterOperation:
        return self._operation

    @property
    def value(self):
        return self._value
    
    def __eq__(self, value: object) -> bool:
        if not isinstance(value, DataFrameFilter):
            return False
        
        return self._column == value.column and self._column_index == value.column_index and self._operation == value.operation and self._value == value.value
    
    def __ne__(self, __value: object) -> bool:
        return not self.__eq__(__value)

class DataFrameSortFilterProxyModel(QSortFilterProxyModel):
    OPERATIONS = {
        DataFrameFilterOperation.EQUAL: lambda x, y: x == y,
        DataFrameFilterOperation.NOT_EQUAL: lambda x, y: x != y,
        DataFrameFilterOperation.GREATER_THAN: lambda x, y: x > y,
        DataFrameFilterOperation.GREATER_THAN_OR_EQUAL: lambda x, y: x >= y,
        DataFrameFilterOperation.LESS_THAN: lambda x, y: x < y,
        DataFrameFilterOperation.LESS_THAN_OR_EQUAL: lambda x, y: x <= y
    }

    def __init__(self):
        super(DataFrameSortFilterProxyModel, self).__init__()

        self._filters = []

    def filterAcceptsRow(self, source_row: int, source_parent: QModelIndex) -> bool:
        result = []

        for filter in self._filters:
            value = self.sourceModel().index(source_row, filter.column_index, source_parent).data(Qt.ItemDataRole.UserRole)
            result.append(self.OPERATIONS[filter.operation](value, filter.value))
        
        return all(result)
    
    def lessThan(self, left: QModelIndex, right: QModelIndex) -> bool:
        left_value = left.data(Qt.ItemDataRole.UserRole)
        right_value = right.data(Qt.ItemDataRole.UserRole)

        return left_value < right_value
    
    def add_filter(self, filter: DataFrameFilter):
        self._filters.append(filter)
        self.invalidate()

    def remove_filter(self, filter: DataFrameFilter):
        self._filters.remove(filter)
        self.invalidate()
    
    def clear_filters(self):
        self._filters.clear()
        self.invalidate()

问题:基本上,对于小数据集来说,一切都运行得很好。但问题是,对于较大的数据集(大约60000行),过滤明显需要很长时间,导致界面卡顿几秒钟。我考虑把过滤逻辑放到一个单独的线程(QThread)中,但界面只能在主线程中操作,而且因为编辑模型也会改变界面,所以我不能在第二个线程中修改模型。

如果过滤需要几秒钟也没关系,只是界面在这段时间内不应该卡住,这样就可以显示一个进度条或类似的东西。有什么建议或解决方案吗?

编辑 03/11/24

我想出了一个自定义的解决方案,通过实现一个自定义的QTableModel,完全不使用QSortFilterProxyModel。我的想法是把过滤操作放到一个由自定义模型的方法控制的第二个QThread中。模型本身在过滤完成之前不会被修改。这个单独的线程会返回最终过滤后的DataFrame,并将其应用到启动了该线程的模型实例上。这意味着界面不再卡顿,可以显示和控制加载动画,通过Qt信号来实现。

model.py:

from enum import Enum
from PySide6.QtCore import QAbstractTableModel, QModelIndex, Qt, QThreadPool, Signal
import pandas as pd
from thread import DataFrameFilterTask

class DataFrameTableModel(QAbstractTableModel):
    beginFiltering = Signal()
    endFiltering = Signal()
    beginSorting = Signal()
    endSorting = Signal()
    beginTransforming = Signal()
    endTransforming = Signal()

    def __init__(self, base_df: pd.DataFrame = None):
        super(DataFrameTableModel, self).__init__()

        self._base_df: pd.DataFrame = base_df
        self._transformed_df: pd.DataFrame = None
        self._filters: list[DataFrameFilter] = []
        self._is_filtering = False
        self._is_sorting = False
    
    def rowCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._current_df is None:
            return 0
        
        return self._current_df.shape[0]

    def columnCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._current_df is None:
            return 0

        return self._current_df.shape[1]

    def data(self, index: QModelIndex, role: int = ...) -> object:
        if index.isValid() and self._current_df is not None:
            value = self._current_df.iloc[index.row(), index.column()]

            if role == Qt.ItemDataRole.DisplayRole:
                return str(value)
            elif role == Qt.ItemDataRole.UserRole:
                return value

    def headerData(self, section: int, orientation: Qt.Orientation, role: int = ...) -> object:
        if self._current_df is not None:
            if role == Qt.ItemDataRole.DisplayRole:
                if orientation == Qt.Orientation.Horizontal:
                    return str(self._current_df.columns[section])
                else:
                    return str(self._current_df.index[section])
            elif role == Qt.ItemDataRole.UserRole:
                if orientation == Qt.Orientation.Horizontal:
                    return self._current_df.columns[section]
                else:
                    return self._current_df.index[section]

    def flags(self, index: QModelIndex) -> Qt.ItemFlag:
        return Qt.ItemFlag.ItemIsSelectable | Qt.ItemFlag.ItemIsEnabled

    @property
    def base_df(self) -> pd.DataFrame:
        return self._base_df
    
    @base_df.setter
    def base_df(self, value: pd.DataFrame):
        self._base_df = value
        self._transformed_df = None
        self.layoutChanged.emit()

    @property
    def transformed_df(self) -> pd.DataFrame:
        return self._transformed_df
    
    @property
    def filters(self) -> list[DataFrameFilter]:
        return self._filters
    
    @property
    def is_filtering(self) -> bool:
        return self._is_filtering
    
    @property
    def is_sorting(self) -> bool:
        return self._is_sorting
    
    @property
    def is_transforming(self) -> bool:
        return self._is_filtering or self._is_sorting
    
    @property
    def _current_df(self) -> pd.DataFrame:
        return self._base_df if self._transformed_df is None else self._transformed_df
    
    def add_filter(self, filter: DataFrameFilter):
        self._filters.append(filter)
        self._apply_filters()

    def remove_filter(self, filter: DataFrameFilter):
        self._filters.remove(filter)
        self._apply_filters()
    
    def clear_filters(self):
        self._filters.clear()
        self._apply_filters()
    
    def _apply_filters(self):
        self.beginFiltering.emit()
        self._is_filtering = True

        task = DataFrameFilterTask(self._base_df.copy(deep=True), self._filters)
        task.signals.data.connect(self._on_filter_task_data)
        task.signals.finished.connect(self._on_filter_task_finished)
        task.signals.error.connect(self._on_filter_task_error)

        QThreadPool.globalInstance().start(task)
    
    def _on_filter_task_data(self, df: pd.DataFrame):
        self.beginResetModel()
        self._transformed_df = df
        self.endResetModel()
    
    def _on_filter_task_finished(self):
        self._is_filtering = False
        self.endFiltering.emit()
    
    def _on_filter_task_error(self, error: tuple[Exception, type, str]):
        raise error[0]

thread.py:

import sys
import traceback
from PySide6.QtCore import QRunnable, Signal, QObject
import pandas as pd

class DataFrameFilterTaskSignals(QObject):
    finished = Signal()
    error = Signal(Exception)
    data = Signal(pd.DataFrame)

class DataFrameFilterTask(QRunnable):
    OPERATIONS = {
        "eq": lambda x, y: x == y,
        "ne": lambda x, y: x != y,
        "lt": lambda x, y: x < y,
        "le": lambda x, y: x <= y,
        "gt": lambda x, y: x > y,
        "ge": lambda x, y: x >= y
    }

    def __init__(self, df: pd.DataFrame, filters: list):
        super(DataFrameFilterTask, self).__init__()

        self.signals = DataFrameFilterTaskSignals()
        self._df = df
        self._filters = filters
    
    def run(self):
        try:
            for filter in self._filters:
                if self._df[filter.column].dtype != type(filter.value):
                    self._df = self._df[self._df[filter.column].apply(lambda x: type(x) == type(filter.value))]
                    self._df = self._df[self.OPERATIONS[filter.operation.value](self._df[filter.column], filter.value)]
                else:
                    self._df = self._df[self.OPERATIONS[filter.operation.value](self._df[filter.column], filter.value)]

            self.signals.data.emit(self._df)
        except:
            traceback.print_exc()
            exctype, value = sys.exc_info()[:2]
            self.signals.error.emit((value, exctype, traceback.format_exc()))
        finally:
            self.signals.finished.emit()

对我的解决方案有什么看法吗?它确实没有出现卡顿的问题,但我不确定这是否是常见的做法,还是一种快速且不太规范的解决方案……是否更好的做法是保持DataFrameTableModel不变,创建一个继承自QAbstractProxyModel的第二个DataFrameSortFilterProxyModel,在里面处理线程逻辑,并通过设置过滤后的DataFrame来更新sourceModel()(一个DataFrameTableModel实例),以遵循关注点分离的原则?

1 个回答

-2

在Qt中,主线程是专门用来处理图形界面的。就像你之前提到的,对于小数据集来说,界面不会卡住,这是因为处理速度很快,你根本看不出卡顿的情况,但当数据集变大时,就会需要更多时间来计算,这时候你就会注意到界面卡住了。解决这个问题的唯一、也是正确的方法就是把计算放到另一个线程去做。

你需要做的其实是创建一个工作类,这个类里会实例化你用来进行计算的类。我会模仿你的工作类和图形界面。

假设你用来计算的类叫做DataFrameSortFilterProxyModel,而你不想去改动这个类的继承关系。

class DataFrameSortFilterProxyModel():
      progress = QtCore.Signal(int)
      finished = QtCore.Signal()
      def filtering(self):
          datasetLength = 1000000000
          for i in range(datasetLength):
              # do the actual work
              emit progress(i/datasetLength)
          emit finished()

现在在你的工作类里,你实例化这个计算类:

class Worker(QObject):
    finished = QtCore.Signal()
    progress = QtCore.Signal(int)

    def __init__(self):
        self.mySortingModel = DataFrameSortFilterProxyModel()
        self.mySortingModel.progress.connect(self.updateProgress)
        self.mySortingModel.finished.connect(self.onFinished)
        self.mySortingMode.filtering()

    def updateProgress(self, numProgress):
        emit progress(numProgress)

    def onFinished(self):
        emit finished()

接下来我们回到你的用户界面类,你提到它应该有一个进度条。为了简单起见,我会用QLabel,但你也可以使用QProgressBar,或者用带有GIF动画的控件:

    from PySide2.QtCore import QThread
    ...
    ...
    self.progressText = QLabel("0%")
    ...
    def calculate():
        # instantiate different thread
        self.thread = QThread()
        # create a worker
        self.worker = Worker()
        # move worker to the thread
        self.worker.moveToThread(self.thread)
        # connect all signals and slots
        self.thread.started.connect(self.worker.run)
        self.worker.finished.connect(self.thread.quit)
        self.worker.finished.connect(self.worker.deleteLater)
        self.thread.finished.connect(self.thread.deleteLater)
        self.worker.progress.connect(self.updateProgress)
        
        # start the thread so the actual work is started
        self.thread.start()

        # if you don't want to calculations be bullied by user you can disable action that leads to calculations for example if you have button you would say
        self.calculateBtn.setEnabled(False)
    
        #connect signals what happens in between and after the calcs are over
        self.thread.finished.connect(
            lambda: self.calculateBtn.setEnabled(True)
        )
        self.thread.finished.connect(
            lambda: self.progressText.setText("0%")
        )

def updateProgress(self, progressNum):
    self.progressText.setText(f'{progressNum}%')

这样应该能解决你界面卡住的问题,但不幸的是,你需要重新构建你的计算类,以便能够获取计算进度的信息。别忘了在self.thread.finished时更新你的实际图表,以便显示数据。

撰写回答