python并行比较2个csv文件

import csv #read csv files f1= file('host.csv','r') f2= file('master.csv','r') f3= file('results.csv','w') c1=csv.reader(f1) c2=csv.reader(f2) next(c2, None) c3=csv.writer(f3) #for loop compare row in host csv file master_list = list(c2) for row in c1: row=1 found = False colA = str(row[0]) #protocol colB = str(row[11]) colC = str(row[12]) colD = str(row[13]) colE = str(row[14]) #loop in each row of master csv file for master_row in master_list: results_row=row colBf2 = str(master_row[4]) colCf2 = str(master_row[5]) colDf2 = str(master_row[6]) colEf2 = str(master_row[7]) colFf2 = str(master_row[3]) #check condition if colA == 'icmp': #sub condiontion if colB == colBf2 and colD == colDf2: results_row.append(colFf2) found = True break row = row + 1 else: if colB == colBf2 and colD == colDf2 and colE == colEf2: results_row.append(colFf2) found = True break row =row+1 if not found: results_row.append('Not Match') c3.writerow(results_row) f1.close() f2.close() f3.close()

1条回答

网友

1楼 · 发布于 2024-06-16 08:25:32

昂贵的任务是为每个主机行重新扫描主表的内部循环。由于python执行协作多线程（您可以搜索“python GIL”），一次只能运行一个线程，因此多个线程不会加快cpu绑定的操作。您可以生成子进程，但是您必须权衡将数据提供给工作进程的成本和速度增益。你知道吗

或者，优化你的代码。与其并行运行，不如索引主节点。你可以把100000条记录的昂贵扫描换成快速的字典查找。你知道吗

我冒昧地在代码中添加了with子句，以节省几行代码，并且跳过了colA等等。。。（改为使用命名索引）以保持代码较小。你知道吗

import csv

# columns of interest
A, B, C, D, E, F = 0, 11, 12, 13, 14, 3

# read and index column F in master by (B,D) and (B,D,E), discarding
# duplicates for those keys
col_index = {}
with open('master.csv') as master:
    next(master)
    for row in csv.reader(master):
        key = row[B], row[D]
        if key not in col_index:
            col_index[key] = row[F]
        key = row[B], row[D], row[E]
        if key not in col_index:
            col_index[key] = row[F]

#read csv files
with open('host.csv') as f1, open('results.csv','w') as f3: 
    c1=csv.reader(f1)
    c3=csv.writer(f3) 
    for row in c1:
        if row[A] == "icmp":
            indexer = (row[B], row[D])
        else:
            indexer = (row[B], row[D], row[E])
        row.append(col_index.get(indexer, 'Not Match'))
        c3.writerow(row)

相关问题更多 >

编程相关推荐

热门问题

热门文章