功能略有不同的重复数据消除代码

# First function def fmeasure_kfold1(array, nfolds): ret = [] # Kfold1 and kfold2 both have this outer loop for train_index, test_index in KFold(len(array), nfolds): correlation = analyze(array[train_index]) for build in array[test_index]: # <- All functions have this loop # Retrieved tests is calculated inside the build loop in kfold1 retrieved_tests = get_tests(set(build['modules']), correlation) relevant_tests = set(build['tests']) fval = calc_f(relevant_tests, retrieved_tests) if fval is not None: ret.append(fval) return ret # Second function def fmeasure_kfold2(array, nfolds): ret = [] # Kfold1 and kfold2 both have this outer loop for train_index, test_index in KFold(len(array), nfolds): correlation = analyze(array[train_index]) # Retrieved tests is calculated outside the build loop in kfold2 retrieved_tests = _sum_tests(correlation) for build in array[test_index]: # <- All functions have this loop relevant_tests = set(build['tests']) fval = calc_f(relevant_tests, retrieved_tests) if fval is not None: ret.append(fval) return ret # Third function def fmeasure_all(array): ret = [] for build in array: # <- All functions have this loop relevant = set(build['tests']) fval = calc_f2(relevant) # <- Instead of calc_f, I call calc_f2 if fval is not None: ret.append(fval) return ret

def calc_f(relevant, retrieved): """Calculate the F-measure given relevant and retrieved tests.""" recall = len(relevant & retrieved)/len(relevant) prec = len(relevant & retrieved)/len(retrieved) fmeasure = f_measure(recall, prec) return (fmeasure, recall, prec) def calc_f2(relevant, nbr_tests=1000): """Calculate the F-measure given relevant tests.""" recall = 1 prec = len(relevant) / nbr_tests fmeasure = f_measure(recall, prec) return (fmeasure, recall, prec)

3条回答

网友
1楼 · 编辑于 2024-05-23 14:44:26

典型的解决方案是识别算法的各个部分，并使用Template method design pattern在子类中实现不同的阶段。我完全不理解您的代码，但我假设会有makeGlobalRetrievedTests()和makeIndividualRetrievedTests()这样的方法？你知道吗

网友
2楼 · 编辑于 2024-05-23 14:44:26

一种方法是将每个内部循环作为一个函数来编写，然后将外部循环作为一个单独的函数来接收其他循环作为参数。这与排序函数（接收用于比较两个元素的函数）中的操作非常接近。你知道吗
当然，困难的部分是找出所有函数之间究竟有什么共同点，这并不总是简单的。你知道吗

网友
3楼 · 编辑于 2024-05-23 14:44:26

有一个公共函数，它需要一个额外的参数来控制计算retrieved_tests的位置，这也会起作用。你知道吗

例如

def fmeasure_kfold_generic(array, nfolds, mode):
    ret = []

    # Kfold1 and kfold2 both have this outer loop
    for train_index, test_index in KFold(len(array), nfolds):
        correlation = analyze(array[train_index])

        # Retrieved tests is calculated outside the build loop in kfold2
        if mode==2:
            retrieved_tests = _sum_tests(correlation)

        for build in array[test_index]:  # <- All functions have this loop
            # Retrieved tests is calculated inside the build loop in kfold1
            if mode==1:
                retrieved_tests = get_tests(set(build['modules']), correlation)

            relevant_tests = set(build['tests'])
            fval = calc_f(relevant_tests, retrieved_tests)
            if fval is not None:
                ret.append(fval)

相关问题更多 >

编程相关推荐

热门问题

热门文章