如何将文件a中唯一的行合并到文件b中？

2 投票

4 回答

705 浏览

提问于 2025-04-16 14:16

这个问题在这里以某种形式被问过，但我想要的并不完全是那个。我的情况是这样的：我已经有一个文件，叫做 file_a，现在我正在创建另一个文件 file_b。file_a 的大小总是比 file_b 大。file_b 中会有一些重复的行（所以 file_a 中也会有），但两个文件都会有一些独特的行。我想做的是：只把 file_a 中独特的行复制/合并到 file_b 中，然后对行进行排序，这样 file_b 就变成了最新的，包含所有独特的条目。原始文件的大小都不应该超过 10MB。我该怎么做才能最有效（和最快）地完成这个？

我在想类似这样的做法，它确实能合并。

#!/usr/bin/env python

import os, time, sys

# Convert Date/time to epoch
def toEpoch(dt):
    dt_ptrn = '%d/%m/%y %H:%M:%S'
    return int(time.mktime(time.strptime(dt, dt_ptrn)))

# input files
o_file = "file_a"
c_file = "file_b"
n_file = [o_file,c_file]

m_file = "merged.file"

for x in range(len(n_file)):
    P = open(n_file[x],"r")
    output = P.readlines()
    P.close()

    # Sort the output, order by 2nd last field
    #sp_lines = [ line.split('\t') for line in output ]
    #sp_lines.sort( lambda a, b: cmp(toEpoch(a[-2]),toEpoch(b[-2])) )

    F = open(m_file,'w') 
    #for line in sp_lines:
    for line in output:
        if "group_" in line:
            F.write(line)
    F.close()

但是，它有以下问题：

没有只包含独特的行
没有按倒数第二个字段排序
还引入了第三个文件，即 m_file

顺便说一下（长话短说）：我不能在这里使用 sorted()，因为我用的是 v2.3，很不幸。输入文件看起来是这样的：

On 23/03/11 00:40:03
JobID   Group.User          Ctime   Wtime   Status  QDate               CDate
===================================================================================
430792  group_atlas.pltatl16    0   32  4   02/03/11 21:52:38   02/03/11 22:02:15
430793  group_atlas.atlas084    30  472 4   02/03/11 21:57:43   02/03/11 22:09:35
430794  group_atlas.atlas084    12  181 4   02/03/11 22:02:37   02/03/11 22:05:42
430796  group_atlas.atlas084    8   185 4   02/03/11 22:02:38   02/03/11 22:05:46

我试着用 cmp() 按倒数第二个字段排序，但我觉得它不工作，可能是因为输入文件的前三行。

有人能帮忙吗？谢谢！！！

更新 1：

为了将来参考，按照 Jakob 的建议，这里是完整的脚本。它运行得很好。

#!/usr/bin/env python

import os, time, sys
from sets import Set as set

def toEpoch(dt):
    dt_ptrn = '%d/%m/%y %H:%M:%S'
    return int(time.mktime(time.strptime(dt, dt_ptrn)))

def yield_lines(fileobj):
    #I want to discard the headers
    for i in xrange(3):
        fileobj.readline()
    #
    for line in fileobj:
        yield line

def app(path1, path2):
    file1 = set(yield_lines(open(path1)))
    file2 = set(yield_lines(open(path2)))
    return file1.union(file2)

# Input files
o_file = "testScript/03"
c_file = "03.bak"
m_file = "finished.file"

print time.strftime('%H:%M:%S', time.localtime())

# Sorting the output, order by 2nd last field
sp_lines = [ line.split('\t') for line in app(o_file, c_file) ]
sp_lines.sort( lambda a, b: cmp(toEpoch(a[-2]),toEpoch(b[-2])) )

F = open(m_file,'w')
print "No. of lines: ",len(sp_lines)

for line in sp_lines:

    MF = '\t'.join(line)
    F.write(MF)
F.close()

处理 145244 行花了大约 2 分 47 秒。

[testac1@serv07 ~]$ ./uniq-merge.py 
17:19:21
No. of lines:  145244
17:22:08

谢谢！！

更新 2：

嗨 eyquem，这是我运行你的脚本时收到的错误信息。

来自第一个脚本：

[testac1@serv07 ~]$ ./uniq-merge_2.py 
  File "./uniq-merge_2.py", line 44
    fm.writelines( '\n'.join(v)+'\n' for k,v in output )
                                       ^
SyntaxError: invalid syntax

来自第二个脚本：

[testac1@serv07 ~]$ ./uniq-merge_3.py 
  File "./uniq-merge_3.py", line 24
    output = sett(line.rstrip() for line in fa)
                                  ^
SyntaxError: invalid syntax

谢谢！！

更新 3：

之前的那个根本没有对列表进行排序。感谢 eyquem 指出这一点。现在它可以了。这是对 Jakob 版本的进一步修改 - 我把 set:app(path1, path2) 转换成了 list:myList()，然后对 myList 应用了 sort( lambda ... )，以按倒数第二个字段对合并的文件进行排序。这是最终的脚本。

#!/usr/bin/env python

import os, time, sys
from sets import Set as set

def toEpoch(dt):
    # Convert date/time to epoch
    dt_ptrn = '%d/%m/%y %H:%M:%S'
    return int(time.mktime(time.strptime(dt, dt_ptrn)))

def yield_lines(fileobj):
    # Discard the headers (1st 3 lines)
    for i in xrange(3):
        fileobj.readline()

    for line in fileobj:
        yield line

def app(path1, path2):
    # Remove duplicate lines
    file1 = set(yield_lines(open(path1)))
    file2 = set(yield_lines(open(path2)))
    return file1.union(file2)

print time.strftime('%H:%M:%S', time.localtime())

# I/O files
o_file = "testScript/03"
c_file = "03.bak"
m_file = "finished.file"

# Convert set into to list
myList = list(app(o_file, c_file))

# Sort the list by the date
sp_lines = [ line.split('\t') for line in myList ]
sp_lines.sort( lambda a, b: cmp(toEpoch(a[-2]),toEpoch(b[-2])) )

F = open(m_file,'w')
print "No. of lines: ",len(sp_lines)

# Finally write to the outFile
for line in sp_lines:
    MF = '\t'.join(line)
    F.write(MF)
F.close()

速度没有提升，处理同样的 145244 行花了 2 分 50 秒。如果有人看到改进的空间，请告诉我。感谢 Jakob 和 eyquem 的帮助。谢谢！！

更新 4：

为了将来参考，这是一个修改版的 eyquem，它比之前的版本运行得更好、更快。

#!/usr/bin/env python

import os, sys, re
from sets import Set as sett
from time import mktime, strptime, strftime

def sorting_merge(o_file, c_file, m_file ):

    # RegEx for Date/time filed
    pat = re.compile('[0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d')

    def kl(lines,pat = pat):
        # match only the next to last field
        line = lines.split('\t')
        line = line[-2]
        return mktime(strptime((pat.search(line).group()),'%d/%m/%y %H:%M:%S'))

    output = sett()
    head = []

    # Separate the header & remove the duplicates
    def rmHead(f_n):
        f_n.readline()
        for line1 in f_n:
            if pat.search(line1):  break
            else:  head.append(line1) # line of the header
        for line in f_n:
            output.add(line.rstrip())
        output.add(line1.rstrip())
        f_n.close()

    fa = open(o_file, 'r')
    rmHead(fa)

    fb = open(c_file, 'r')
    rmHead(fb)

    # Sorting date-wise
    output = [ (kl(line),line.rstrip()) for line in output if line.rstrip() ]
    output.sort()

    fm = open(m_file,'w')
    # Write to the file & add the header
    fm.write(strftime('On %d/%m/%y %H:%M:%S\n')+(''.join(head[0]+head[1])))
    for t,line in output:
        fm.write(line + '\n')
    fm.close()


c_f = "03_a"
o_f = "03_b"

sorting_merge(o_f, c_f, 'outfile.txt')

这个版本快多了 - 处理 145244 行只需 6.99 秒，而之前的版本需要 2 分 47 秒，使用的是 lambda a, b: cmp()。感谢 eyquem 的支持。谢谢！！

文件处理脚本优化性能提升数据去重文件合并唯一行重复行行排序

4 个回答

最后的代码，希望是最后一次了。

因为我发现了一段非常厉害的代码。

首先，我创建了两个文件“xxA.txt”和“yyB.txt”，每个文件有30行，总共大约有30000行内容，如下所示：

430559  group_atlas.atlas084    12  181 4       04/03/10 01:38:02   02/03/11 22:05:42
430502  group_atlas.atlas084    12  181 4       23/01/10 21:45:05   02/03/11 22:05:42
430544  group_atlas.atlas084    12  181 4       17/06/11 12:58:10   02/03/11 22:05:42
430566  group_atlas.atlas084    12  181 4       25/03/10 23:55:22   02/03/11 22:05:42

使用以下代码：

创建 AB.py

from random import choice

n = tuple( str(x) for x in xrange(500,600))
days = ('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16',
        '17','18','19','20','21','22','23','24','25','26','27','28')
# not '29','30,'31' to avoid problems with strptime() on last days of february
months = days[0:12]
hours = days[0:23]
ms = ['00','01','02','03','04','05','06','07','09'] + [str(x) for x in xrange(10,60)]

repeat = 30000

with open('xxA.txt','w') as f:
    # 430794  group_atlas.atlas084    12  181 4     02/03/11 22:02:37   02/03/11 22:05:42
    ch = ('On 23/03/11 00:40:03\n'
          'JobID   Group.User          Ctime   Wtime   Status  QDate               CDate\n'
          '===================================================================================\n')
    f.write(ch)
    for i in xrange(repeat):
        line  = '430%s  group_atlas.atlas084    12  181 4   \t%s/%s/%s %s:%s:%s\t02/03/11 22:05:42\n' %\
                (choice(n),
                 choice(days),choice(months),choice(('10','11')),
                 choice(hours),choice(ms),choice(ms))
        f.write(line)


with open('yyB.txt','w') as f:
    # 430794  group_atlas.atlas084    12  181 4     02/03/11 22:02:37   02/03/11 22:05:42
    ch = ('On 25/03/11 13:45:24\n'
          'JobID   Group.User          Ctime   Wtime   Status  QDate               CDate\n'
          '===================================================================================\n')
    f.write(ch)
    for i in xrange(repeat):
        line  = '430%s  group_atlas.atlas084    12  181 4   \t%s/%s/%s %s:%s:%s\t02/03/11 22:05:42\n' %\
                (choice(n),
                 choice(days),choice(months),choice(('10','11')),
                 choice(hours),choice(ms),choice(ms))
        f.write(line)

with open('xxA.txt') as g:
    print 'readlines of xxA.txt :',len(g.readlines())
    g.seek(0,0)
    print 'set of xxA.txt :',len(set(g))

with open('yyB.txt') as g:
    print 'readlines of yyB.txt :',len(g.readlines())
    g.seek(0,0)
    print 'set of yyB.txt :',len(set(g))

然后我运行了这三个程序：

“merging regex.py”

#!/usr/bin/env python

from time import clock,mktime,strptime,strftime
from sets import Set
import re

infunc = []

def sorting_merge(o_file, c_file, m_file ):
    infunc.append(clock()) #infunc[0]
    pat = re.compile('([0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d)')
    output = Set()

    def rmHead(filename, a_set):
        f_n = open(filename, 'r')
        f_n.readline()
        head = []
        for line in f_n:
            head.append(line) # line of the header
            if line.strip('= \r\n')=='':  break
        for line in f_n:
            a_set.add(line.rstrip())
        f_n.close()
        return head

    infunc.append(clock()) #infunc[1]
    head = rmHead(o_file, output)
    infunc.append(clock()) #infunc[2]
    head = rmHead(c_file, output)
    infunc.append(clock()) #infunc[3]
    if '' in output:  output.remove('')

    infunc.append(clock()) #infunc[4]
    output = [ (mktime(strptime(pat.search(line).group(),'%d/%m/%y %H:%M:%S')),line)
               for line in output ]
    infunc.append(clock()) #infunc[5]
    output.sort()
    infunc.append(clock()) #infunc[6]

    fm = open(m_file,'w')
    fm.write(strftime('On %d/%m/%y %H:%M:%S\n')+(''.join(head)))
    for t,line in output:
        fm.write(line + '\n')
    fm.close()
    infunc.append(clock()) #infunc[7]



c_f = "xxA.txt"
o_f = "yyB.txt"

t1 = clock()
sorting_merge(o_f, c_f, 'zz_mergedr.txt')
t2 = clock()
print 'merging regex'
print 'total time of execution :',t2-t1
print '              launching :',infunc[1] - t1
print '            preparation :',infunc[1] - infunc[0]
print '    reading of 1st file :',infunc[2] - infunc[1]
print '    reading of 2nd file :',infunc[3] - infunc[2]
print '      output.remove(\'\') :',infunc[4] - infunc[3]
print 'creation of list output :',infunc[5] - infunc[4]
print '      sorting of output :',infunc[6] - infunc[5]
print 'writing of merging file :',infunc[7] - infunc[6]
print 'closing of the function :',t2-infunc[7]

“merging split.py”

#!/usr/bin/env python

from time import clock,mktime,strptime,strftime
from sets import Set

infunc = []

def sorting_merge(o_file, c_file, m_file ):
    infunc.append(clock()) #infunc[0]
    output = Set()

    def rmHead(filename, a_set):
        f_n = open(filename, 'r')
        f_n.readline()
        head = []
        for line in f_n:
            head.append(line) # line of the header
            if line.strip('= \r\n')=='':  break
        for line in f_n:
            a_set.add(line.rstrip())
        f_n.close()
        return head

    infunc.append(clock()) #infunc[1]
    head = rmHead(o_file, output)
    infunc.append(clock()) #infunc[2]
    head = rmHead(c_file, output)
    infunc.append(clock()) #infunc[3]
    if '' in output:  output.remove('')

    infunc.append(clock()) #infunc[4]
    output = [ (mktime(strptime(line.split('\t')[-2],'%d/%m/%y %H:%M:%S')),line)
               for line in output ]
    infunc.append(clock()) #infunc[5]
    output.sort()
    infunc.append(clock()) #infunc[6]

    fm = open(m_file,'w')
    fm.write(strftime('On %d/%m/%y %H:%M:%S\n')+(''.join(head)))
    for t,line in output:
        fm.write(line + '\n')
    fm.close()
    infunc.append(clock()) #infunc[7]



c_f = "xxA.txt"
o_f = "yyB.txt"

t1 = clock()
sorting_merge(o_f, c_f, 'zz_mergeds.txt')
t2 = clock()
print 'merging split'
print 'total time of execution :',t2-t1
print '              launching :',infunc[1] - t1
print '            preparation :',infunc[1] - infunc[0]
print '    reading of 1st file :',infunc[2] - infunc[1]
print '    reading of 2nd file :',infunc[3] - infunc[2]
print '      output.remove(\'\') :',infunc[4] - infunc[3]
print 'creation of list output :',infunc[5] - infunc[4]
print '      sorting of output :',infunc[6] - infunc[5]
print 'writing of merging file :',infunc[7] - infunc[6]
print 'closing of the function :',t2-infunc[7]

“merging killer”

#!/usr/bin/env python

from time import clock,strftime
from sets import Set
import re

infunc = []

def sorting_merge(o_file, c_file, m_file ):
    infunc.append(clock()) #infunc[0]
    patk = re.compile('([0123]\d)/([01]\d)/(\d{2}) ([012]\d:[0-6]\d:[0-6]\d)')
    output = Set()

    def rmHead(filename, a_set):
        f_n = open(filename, 'r')
        f_n.readline()
        head = []
        for line in f_n:
            head.append(line) # line of the header
            if line.strip('= \r\n')=='':  break
        for line in f_n:
            a_set.add(line.rstrip())
        f_n.close()
        return head

    infunc.append(clock()) #infunc[1]
    head = rmHead(o_file, output)
    infunc.append(clock()) #infunc[2]
    head = rmHead(c_file, output)
    infunc.append(clock()) #infunc[3]
    if '' in output:  output.remove('')

    infunc.append(clock()) #infunc[4]
    output = [ (patk.search(line).group(3,2,1,4),line)for line in output ]
    infunc.append(clock()) #infunc[5]
    output.sort()
    infunc.append(clock()) #infunc[6]

    fm = open(m_file,'w')
    fm.write(strftime('On %d/%m/%y %H:%M:%S\n')+(''.join(head)))
    for t,line in output:
        fm.write(line + '\n')
    fm.close()
    infunc.append(clock()) #infunc[7]



c_f = "xxA.txt"
o_f = "yyB.txt"

t1 = clock()
sorting_merge(o_f, c_f, 'zz_mergedk.txt')
t2 = clock()
print 'merging killer'
print 'total time of execution :',t2-t1
print '              launching :',infunc[1] - t1
print '            preparation :',infunc[1] - infunc[0]
print '    reading of 1st file :',infunc[2] - infunc[1]
print '    reading of 2nd file :',infunc[3] - infunc[2]
print '      output.remove(\'\') :',infunc[4] - infunc[3]
print 'creation of list output :',infunc[5] - infunc[4]
print '      sorting of output :',infunc[6] - infunc[5]
print 'writing of merging file :',infunc[7] - infunc[6]
print 'closing of the function :',t2-infunc[7]

结果如下：

merging regex
total time of execution : 14.2816595405
              launching : 0.00169211450059
            preparation : 0.00168093989599
    reading of 1st file : 0.163582242995
    reading of 2nd file : 0.141301478261
      output.remove('') : 2.37460347614e-05
     creation of output : 13.4460212122
      sorting of output : 0.216363532237
writing of merging file : 0.232923737514
closing of the function : 0.0797514767938

merging split
total time of execution : 13.7824474898
              launching : 4.10666718815e-05
            preparation : 2.70984161395e-05
    reading of 1st file : 0.154349784679
    reading of 2nd file : 0.136050810927
      output.remove('') : 2.06730184981e-05
     creation of output : 12.9691854691
      sorting of output : 0.218704332534
writing of merging file : 0.225259076223
closing of the function : 0.0788362766776

merging killer
total time of execution : 2.14315311024
              launching : 0.00206199391263
            preparation : 0.00205026057781
    reading of 1st file : 0.158711791582
    reading of 2nd file : 0.138976601775
      output.remove('') : 2.37460347614e-05
     creation of output : 0.621466415424
      sorting of output : 0.823161602941
writing of merging file : 0.227701565422
closing of the function : 0.171049393149

在运行“killer”程序时，排序输出的时间比之前长了4倍，但生成输出列表的时间却减少了21倍！总的来说，执行时间至少减少了85%。

回答于 2025-04-16 由 Python大师

分享举报

也许可以考虑这样的方式？

from sets import Set as set

def yield_lines(fileobj):
    #I want to discard the headers
    for i in xrange(3):
        fileobj.readline()

    for line in fileobj:
        yield line

def app(path1, path2):
    file1 = set(yield_lines(open(path1)))
    file2 = set(yield_lines(open(path2)))

    return file1.union(file2)

编辑：忘记提到用 :$ 了

回答于 2025-04-16 由 Python大师

分享举报

编辑 2

我之前的代码在 output = sett(line.rstrip() for line in fa) 和 output.sort(key=kl) 这两行上有问题。

而且，它们还有一些复杂的地方。

所以我考虑直接用 set() 函数来读取文件，这个方法是Jakob Bowyer在他的代码中使用的。

恭喜Jakob！（顺便说一下，Michal Chruszcz也很棒）：set() 是无敌的，它比逐行读取要快。

因此，我放弃了逐行读取文件的想法。

不过，我保留了避免使用 cmp() 函数进行排序的想法，因为文档中提到：

s.sort([cmpfunc=None])

sort() 方法接受一个可选参数，指定一个比较函数，用于比较两个参数（列表项）(...) 请注意，这会显著减慢排序过程

http://docs.python.org/release/2.3/lib/typesseq-mutable.html

然后，我成功地得到了一个包含元组 (t,line) 的列表，其中t 是

time.mktime(time.strptime(( 1st date-and-hour in line ,'%d/%m/%y %H:%M:%S'))

通过指令

output = [ (kl(line),line.rstrip()) for line in output]

获得的。

我测试了两个代码。第一个代码是通过正则表达式计算 行中的第一个日期和时间：

def kl(line,pat = pat):
    return time.mktime(time.strptime((pat.search(line).group()),'%d/%m/%y %H:%M:%S'))

output = [ (kl(line),line.rstrip()) for line in output if line.rstrip()]

output.sort()

第二个代码中 kl() 是：

def kl(line,pat = pat):
    return time.mktime(time.strptime(line.split('\t')[-2],'%d/%m/%y %H:%M:%S'))

。

结果是

执行时间：

第一个使用正则表达式的代码：0.03598秒

第二个使用split('\t')的代码：0.03580秒

也就是说，几乎是一样的

这个算法比使用 cmp() 函数的代码要快：

在这个代码中，行的集合 output 不是通过

output = [ (kl(line),line.rstrip()) for line in output]

转换为元组列表，而只是转换为不重复的行列表，然后用 mycmp() 函数进行排序（见文档）：

def mycmp(a,b):
    return cmp(time.mktime(time.strptime(a.split('\t')[-2],'%d/%m/%y %H:%M:%S')),
               time.mktime(time.strptime(b.split('\t')[-2],'%d/%m/%y %H:%M:%S')))

output = [ line.rstrip() for line in output] # not list(output) , to avoid the problem of newline of the last line of each file
output.sort(mycmp)

for line in output:
    fm.write(line+'\n')

执行时间为

0.11574秒

。

代码：

#!/usr/bin/env python

import os, time, sys, re
from sets import Set as sett

def sorting_merge(o_file , c_file, m_file ):

    pat = re.compile('[0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d'
                     '(?=[ \t]+[0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d)') 

    def kl(line,pat = pat):
        return time.mktime(time.strptime((pat.search(line).group()),'%d/%m/%y %H:%M:%S'))

    output = sett()
    head = []

    fa = open(o_file)
    fa.readline() # first line is skipped
    while True:
        line1 = fa.readline()
        mat1  = pat.search(line1)
        if not mat1: head.append(line1) # line1 is here a line of the header
        else: break # the loop ends on the first line1 not being a line of the heading
    output = sett( fa )
    fa.close()

    fb = open(c_file)
    while True:
        line1 = fb.readline()
        if pat.search(line1):  break
    output = output.union(sett( fb ))
    fb.close()

    output = [ (kl(line),line.rstrip()) for line in output]
    output.sort()

    fm = open(m_file,'w')
    fm.write(time.strftime('On %d/%m/%y %H:%M:%S\n')+(''.join(head)))
    for t,line in output:
        fm.write(line + '\n')
    fm.close()


te = time.clock()
sorting_merge('ytre.txt','tataye.txt','merged.file.txt')
print time.clock()-te

这次，我希望它能正确运行，唯一需要做的就是等待在比我测试的文件大得多的真实文件上的执行时间。

编辑 3

pat = re.compile('[0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d'
                 '(?=[ \t]+'
                 '[0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d'
                 '|'
                 '[ \t]+aborted/deleted)')

。

编辑 4

#!/usr/bin/env python

import os, time, sys, re
from sets import Set

def sorting_merge(o_file , c_file, m_file ):

    pat = re.compile('[0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d'
                     '(?=[ \t]+'
                     '[0123]\d/[01]\d/\d{2} [012]\d:[0-6]\d:[0-6]\d'
                     '|'
                     '[ \t]+aborted/deleted)')

    def kl(line,pat = pat):
        return time.mktime(time.strptime((pat.search(line).group()),'%d/%m/%y %H:%M:%S'))

    head = []
    output = Set()

    fa = open(o_file)
    fa.readline() # first line is skipped
    for line1 in fa:
        if pat.search(line1):  break # first line after the heading
        else:  head.append(line1) # line of the header
    for line in fa:
        output.add(line.rstrip())
    output.add(line1.rstrip())
    fa.close()

    fb = open(c_file)
    for line1 in fb:
        if pat.search(line1):  break
    for line in fb:
        output.add(line.rstrip())
    output.add(line1.rstrip())
    fb.close()

    if '' in output:  output.remove('')
    output = [ (kl(line),line) for line in output]
    output.sort()

    fm = open(m_file,'w')
    fm.write(time.strftime('On %d/%m/%y %H:%M:%S\n')+(''.join(head)))
    for t,line in output:
        fm.write(line+'\n')
    fm.close()

te = time.clock()
sorting_merge('A.txt','B.txt','C.txt')
print time.clock()-te

回答于 2025-04-16 由 Python大师

分享举报

如何将文件a中唯一的行合并到文件b中？

4 个回答

创建 AB.py

“merging regex.py”

“merging split.py”

“merging killer”

撰写回答