使用QSortFilterProxyModel时,PySide GUI冻结
我正在开发一个使用PySide6和Python3的应用程序,里面有一个QTableView,使用了一个自定义的模型DataFrameTableModel
。我想让这个表格支持过滤功能,所以我还使用了QSortFilterProxyModel
。
其中一个要求是根据不同的条件来过滤,比如说过滤出在x列中值大于等于5的所有行。为了表示过滤条件,我实现了一个类DataFrameFilter
,这个类基本上就是存储一些像{column: 'Price', operator: 'eq', value: 12}
这样的信息。为了应用这个自定义的过滤格式,我创建了一个类DataFrameSortFilterProxyModel
,它继承自QSortFilterProxyModel
。
from enum import Enum
from PySide6.QtCore import QAbstractTableModel, QSortFilterProxyModel, QModelIndex, Qt
import pandas as pd
class DataFrameTableModel(QAbstractTableModel):
def __init__(self, df: pd.DataFrame = None):
super(DataFrameTableModel, self).__init__()
self._df: pd.DataFrame = df
def rowCount(self, parent: QModelIndex = ...) -> int:
if parent.isValid() or self._df is None:
return 0
return self._df.shape[0]
def columnCount(self, parent: QModelIndex = ...) -> int:
if parent.isValid() or self._df is None:
return 0
return self._df.shape[1]
def data(self, index: QModelIndex, role: int = ...) -> object:
if index.isValid() and self._df is not None:
value = self._df.iloc[index.row(), index.column()]
if role == Qt.ItemDataRole.DisplayRole:
return str(value)
elif role == Qt.ItemDataRole.UserRole:
return value
def headerData(self, section: int, orientation: Qt.Orientation, role: int = ...) -> object:
if self._df is not None:
if role == Qt.ItemDataRole.DisplayRole:
if orientation == Qt.Orientation.Horizontal:
return str(self._df.columns[section])
else:
return str(self._df.index[section])
elif role == Qt.ItemDataRole.UserRole:
if orientation == Qt.Orientation.Horizontal:
return self._df.columns[section]
else:
return self._df.index[section]
def flags(self, index: QModelIndex) -> Qt.ItemFlag:
return Qt.ItemFlag.ItemIsSelectable | Qt.ItemFlag.ItemIsEnabled
@property
def df(self) -> pd.DataFrame:
return self._df
@df.setter
def df(self, value: pd.DataFrame):
self._df = value
self.layoutChanged.emit()
class DataFrameFilterOperation(Enum):
EQUAL = "eq"
NOT_EQUAL = "ne"
GREATER_THAN = "gt"
GREATER_THAN_OR_EQUAL = "ge"
LESS_THAN = "lt"
LESS_THAN_OR_EQUAL = "le"
class DataFrameFilter:
def __init__(self, column: str, column_index: int, operation: DataFrameFilterOperation, value):
self._column = column
self._column_index = column_index
self._operation = operation
self._value = value
@property
def column(self) -> str:
return self._column
@property
def column_index(self) -> int:
return self._column_index
@property
def operation(self) -> DataFrameFilterOperation:
return self._operation
@property
def value(self):
return self._value
def __eq__(self, value: object) -> bool:
if not isinstance(value, DataFrameFilter):
return False
return self._column == value.column and self._column_index == value.column_index and self._operation == value.operation and self._value == value.value
def __ne__(self, __value: object) -> bool:
return not self.__eq__(__value)
class DataFrameSortFilterProxyModel(QSortFilterProxyModel):
OPERATIONS = {
DataFrameFilterOperation.EQUAL: lambda x, y: x == y,
DataFrameFilterOperation.NOT_EQUAL: lambda x, y: x != y,
DataFrameFilterOperation.GREATER_THAN: lambda x, y: x > y,
DataFrameFilterOperation.GREATER_THAN_OR_EQUAL: lambda x, y: x >= y,
DataFrameFilterOperation.LESS_THAN: lambda x, y: x < y,
DataFrameFilterOperation.LESS_THAN_OR_EQUAL: lambda x, y: x <= y
}
def __init__(self):
super(DataFrameSortFilterProxyModel, self).__init__()
self._filters = []
def filterAcceptsRow(self, source_row: int, source_parent: QModelIndex) -> bool:
result = []
for filter in self._filters:
value = self.sourceModel().index(source_row, filter.column_index, source_parent).data(Qt.ItemDataRole.UserRole)
result.append(self.OPERATIONS[filter.operation](value, filter.value))
return all(result)
def lessThan(self, left: QModelIndex, right: QModelIndex) -> bool:
left_value = left.data(Qt.ItemDataRole.UserRole)
right_value = right.data(Qt.ItemDataRole.UserRole)
return left_value < right_value
def add_filter(self, filter: DataFrameFilter):
self._filters.append(filter)
self.invalidate()
def remove_filter(self, filter: DataFrameFilter):
self._filters.remove(filter)
self.invalidate()
def clear_filters(self):
self._filters.clear()
self.invalidate()
问题:基本上,对于小数据集来说,一切都运行得很好。但问题是,对于较大的数据集(大约60000行),过滤明显需要很长时间,导致界面卡顿几秒钟。我考虑把过滤逻辑放到一个单独的线程(QThread
)中,但界面只能在主线程中操作,而且因为编辑模型也会改变界面,所以我不能在第二个线程中修改模型。
如果过滤需要几秒钟也没关系,只是界面在这段时间内不应该卡住,这样就可以显示一个进度条或类似的东西。有什么建议或解决方案吗?
编辑 03/11/24
我想出了一个自定义的解决方案,通过实现一个自定义的QTableModel
,完全不使用QSortFilterProxyModel
。我的想法是把过滤操作放到一个由自定义模型的方法控制的第二个QThread
中。模型本身在过滤完成之前不会被修改。这个单独的线程会返回最终过滤后的DataFrame
,并将其应用到启动了该线程的模型实例上。这意味着界面不再卡顿,可以显示和控制加载动画,通过Qt信号来实现。
model.py
:
from enum import Enum
from PySide6.QtCore import QAbstractTableModel, QModelIndex, Qt, QThreadPool, Signal
import pandas as pd
from thread import DataFrameFilterTask
class DataFrameTableModel(QAbstractTableModel):
beginFiltering = Signal()
endFiltering = Signal()
beginSorting = Signal()
endSorting = Signal()
beginTransforming = Signal()
endTransforming = Signal()
def __init__(self, base_df: pd.DataFrame = None):
super(DataFrameTableModel, self).__init__()
self._base_df: pd.DataFrame = base_df
self._transformed_df: pd.DataFrame = None
self._filters: list[DataFrameFilter] = []
self._is_filtering = False
self._is_sorting = False
def rowCount(self, parent: QModelIndex = ...) -> int:
if parent.isValid() or self._current_df is None:
return 0
return self._current_df.shape[0]
def columnCount(self, parent: QModelIndex = ...) -> int:
if parent.isValid() or self._current_df is None:
return 0
return self._current_df.shape[1]
def data(self, index: QModelIndex, role: int = ...) -> object:
if index.isValid() and self._current_df is not None:
value = self._current_df.iloc[index.row(), index.column()]
if role == Qt.ItemDataRole.DisplayRole:
return str(value)
elif role == Qt.ItemDataRole.UserRole:
return value
def headerData(self, section: int, orientation: Qt.Orientation, role: int = ...) -> object:
if self._current_df is not None:
if role == Qt.ItemDataRole.DisplayRole:
if orientation == Qt.Orientation.Horizontal:
return str(self._current_df.columns[section])
else:
return str(self._current_df.index[section])
elif role == Qt.ItemDataRole.UserRole:
if orientation == Qt.Orientation.Horizontal:
return self._current_df.columns[section]
else:
return self._current_df.index[section]
def flags(self, index: QModelIndex) -> Qt.ItemFlag:
return Qt.ItemFlag.ItemIsSelectable | Qt.ItemFlag.ItemIsEnabled
@property
def base_df(self) -> pd.DataFrame:
return self._base_df
@base_df.setter
def base_df(self, value: pd.DataFrame):
self._base_df = value
self._transformed_df = None
self.layoutChanged.emit()
@property
def transformed_df(self) -> pd.DataFrame:
return self._transformed_df
@property
def filters(self) -> list[DataFrameFilter]:
return self._filters
@property
def is_filtering(self) -> bool:
return self._is_filtering
@property
def is_sorting(self) -> bool:
return self._is_sorting
@property
def is_transforming(self) -> bool:
return self._is_filtering or self._is_sorting
@property
def _current_df(self) -> pd.DataFrame:
return self._base_df if self._transformed_df is None else self._transformed_df
def add_filter(self, filter: DataFrameFilter):
self._filters.append(filter)
self._apply_filters()
def remove_filter(self, filter: DataFrameFilter):
self._filters.remove(filter)
self._apply_filters()
def clear_filters(self):
self._filters.clear()
self._apply_filters()
def _apply_filters(self):
self.beginFiltering.emit()
self._is_filtering = True
task = DataFrameFilterTask(self._base_df.copy(deep=True), self._filters)
task.signals.data.connect(self._on_filter_task_data)
task.signals.finished.connect(self._on_filter_task_finished)
task.signals.error.connect(self._on_filter_task_error)
QThreadPool.globalInstance().start(task)
def _on_filter_task_data(self, df: pd.DataFrame):
self.beginResetModel()
self._transformed_df = df
self.endResetModel()
def _on_filter_task_finished(self):
self._is_filtering = False
self.endFiltering.emit()
def _on_filter_task_error(self, error: tuple[Exception, type, str]):
raise error[0]
thread.py
:
import sys
import traceback
from PySide6.QtCore import QRunnable, Signal, QObject
import pandas as pd
class DataFrameFilterTaskSignals(QObject):
finished = Signal()
error = Signal(Exception)
data = Signal(pd.DataFrame)
class DataFrameFilterTask(QRunnable):
OPERATIONS = {
"eq": lambda x, y: x == y,
"ne": lambda x, y: x != y,
"lt": lambda x, y: x < y,
"le": lambda x, y: x <= y,
"gt": lambda x, y: x > y,
"ge": lambda x, y: x >= y
}
def __init__(self, df: pd.DataFrame, filters: list):
super(DataFrameFilterTask, self).__init__()
self.signals = DataFrameFilterTaskSignals()
self._df = df
self._filters = filters
def run(self):
try:
for filter in self._filters:
if self._df[filter.column].dtype != type(filter.value):
self._df = self._df[self._df[filter.column].apply(lambda x: type(x) == type(filter.value))]
self._df = self._df[self.OPERATIONS[filter.operation.value](self._df[filter.column], filter.value)]
else:
self._df = self._df[self.OPERATIONS[filter.operation.value](self._df[filter.column], filter.value)]
self.signals.data.emit(self._df)
except:
traceback.print_exc()
exctype, value = sys.exc_info()[:2]
self.signals.error.emit((value, exctype, traceback.format_exc()))
finally:
self.signals.finished.emit()
对我的解决方案有什么看法吗?它确实没有出现卡顿的问题,但我不确定这是否是常见的做法,还是一种快速且不太规范的解决方案……是否更好的做法是保持DataFrameTableModel
不变,创建一个继承自QAbstractProxyModel
的第二个DataFrameSortFilterProxyModel
,在里面处理线程逻辑,并通过设置过滤后的DataFrame
来更新sourceModel()
(一个DataFrameTableModel
实例),以遵循关注点分离的原则?
1 个回答
在Qt中,主线程是专门用来处理图形界面的。就像你之前提到的,对于小数据集来说,界面不会卡住,这是因为处理速度很快,你根本看不出卡顿的情况,但当数据集变大时,就会需要更多时间来计算,这时候你就会注意到界面卡住了。解决这个问题的唯一、也是正确的方法就是把计算放到另一个线程去做。
你需要做的其实是创建一个工作类,这个类里会实例化你用来进行计算的类。我会模仿你的工作类和图形界面。
假设你用来计算的类叫做DataFrameSortFilterProxyModel,而你不想去改动这个类的继承关系。
class DataFrameSortFilterProxyModel():
progress = QtCore.Signal(int)
finished = QtCore.Signal()
def filtering(self):
datasetLength = 1000000000
for i in range(datasetLength):
# do the actual work
emit progress(i/datasetLength)
emit finished()
现在在你的工作类里,你实例化这个计算类:
class Worker(QObject):
finished = QtCore.Signal()
progress = QtCore.Signal(int)
def __init__(self):
self.mySortingModel = DataFrameSortFilterProxyModel()
self.mySortingModel.progress.connect(self.updateProgress)
self.mySortingModel.finished.connect(self.onFinished)
self.mySortingMode.filtering()
def updateProgress(self, numProgress):
emit progress(numProgress)
def onFinished(self):
emit finished()
接下来我们回到你的用户界面类,你提到它应该有一个进度条。为了简单起见,我会用QLabel,但你也可以使用QProgressBar,或者用带有GIF动画的控件:
from PySide2.QtCore import QThread
...
...
self.progressText = QLabel("0%")
...
def calculate():
# instantiate different thread
self.thread = QThread()
# create a worker
self.worker = Worker()
# move worker to the thread
self.worker.moveToThread(self.thread)
# connect all signals and slots
self.thread.started.connect(self.worker.run)
self.worker.finished.connect(self.thread.quit)
self.worker.finished.connect(self.worker.deleteLater)
self.thread.finished.connect(self.thread.deleteLater)
self.worker.progress.connect(self.updateProgress)
# start the thread so the actual work is started
self.thread.start()
# if you don't want to calculations be bullied by user you can disable action that leads to calculations for example if you have button you would say
self.calculateBtn.setEnabled(False)
#connect signals what happens in between and after the calcs are over
self.thread.finished.connect(
lambda: self.calculateBtn.setEnabled(True)
)
self.thread.finished.connect(
lambda: self.progressText.setText("0%")
)
def updateProgress(self, progressNum):
self.progressText.setText(f'{progressNum}%')
这样应该能解决你界面卡住的问题,但不幸的是,你需要重新构建你的计算类,以便能够获取计算进度的信息。别忘了在self.thread.finished时更新你的实际图表,以便显示数据。