将文件指针倒带到上一个lin的开头

labtestnames = sorted(tmp) #Now read each line in the inFile and write into outFile ifd = open(inFile, "r") ofd = open(outFile, "w") #read the header header = ifd.readline() #Do nothing with this line. Skip #Write header into the output file nl = "mrn\tspecimen_id\tlab_number\tlogin_dt\tfluid" offset = len(nl.split("\t")) nl = nl + "\t" + "\t".join(labtestnames) ofd.write(nl+"\n") lenFields = len(nl.split("\t")) print "Reading the input file and converting into modified file for further processing (correlation analysis etc..)" prevTup = (0,0,0) rowComplete = 0 k=0 for line in ifd: k=k+1 if (k==200): break items = line.rstrip("\n").split("\t") if((items[0] =='')): continue newline= list('' for i in range(lenFields)) newline[0],newline[1],newline[3],newline[2],newline[4] = items[0], items[1], items[3], items[2], items[4] ltests = [] ltvals = [] while(cmp(prevTup, (items[0], items[1], items[3])) == 0): # If the same mrn, lab_number and specimen_id then fill the same row. else create a new row. ltests.append(items[6]) ltvals.append(items[7]) pos = ifd.tell() line = ifd.readline() prevTup = (items[0], items[1], items[3]) items = line.rstrip("\n").split("\t") rowComplete = 1 if (rowComplete == 1): #If the row is completed, prepare newline and write into outfile indices = [labtestnames.index(x) for x in ltests] j=0 ifd.seek(pos) for i in indices: newline[i+offset] = ltvals[j] j=j+1 if (rowComplete == 0): # currTup = (items[0], items[1], items[3]) ltests = items[6] ltvals = items[7] pos = ifd.tell() line = ifd.readline() items = line.rstrip("\n").split("\t") newTup = (items[0], items[1], items[3]) if(cmp(currTup, newTup) == 0): prevTup = currTup ifd.seek(pos) continue else: indices = labtestnames.index(ltests) newline[indices+offset] = ltvals ofd.write(newline+"\n")

2条回答

网友

1楼 · 编辑于 2024-05-15 12:35:55

这似乎是yield expressions的完美用例。考虑以下示例，该示例从文件中打印行，随机重复其中一些行：

def buflines(fp):
    r = None
    while True:
        r = yield r or next(fp)
        if r:
            yield None

from random import randint

with open('filename') as fp:
    buf = buflines(fp)
    for line in buf:
        print line
        if randint(1, 100) > 80:
            print 'ONCE AGAIN::'
            buf.send(line)

基本上，如果您想再次处理一个项目，您可以send将其返回到生成器。在下一次迭代中，您将再次读取同一项。在

网友

2楼 · 编辑于 2024-05-15 12:35:55

使用itertools.groupby可以更简单地处理这个问题。groupby可以对处理相同mrn、样本号和lab num的所有连续行进行聚类

执行此操作的代码是

for key, group in IT.groupby(reader, key = mykey):

其中reader迭代输入文件的行，mykey由

^{pr2}$

来自reader的每一行都被传递给mykey，所有具有相同键的行都聚集在同一个group中。在

在这一过程中，我们不妨使用csv module将每一行读入dict（我称之为row）。这使我们不必处理诸如line.rstrip("\n").split("\t")这样的低级字符串操作，而不是通过索引号（例如row[3]）来引用列，我们可以编写用更高级的术语（如row['lab_num']）来表示的代码。在

import itertools as IT
import csv

inFile = 'curious.dat'
outFile = 'curious.out'

def mykey(row):
    return (row['mrn'], row['specimen_id'], row['lab_num'])

fieldnames = 'mrn specimen_id date    lab_num Bilirubin   Lipase  Calcium Magnesium   Phosphate'.split()

with open(inFile, 'rb') as ifd:
    reader = csv.DictReader(ifd, delimiter = '\t')
    with open(outFile, 'wb') as ofd:
        writer = csv.DictWriter(
            ofd, fieldnames, delimiter = '\t', lineterminator = '\n', )
        writer.writeheader()
        for key, group in IT.groupby(reader, key = mykey):
            new = {}
            row = next(group)
            for key in ('mrn', 'specimen_id', 'date', 'lab_num'):
                new[key] = row[key]
                new[row['labtest']] = row['result_val']                
            for row in group:
                new[row['labtest']] = row['result_val']
            writer.writerow(new)

收益率

mrn specimen_id date    lab_num Bilirubin   Lipase  Calcium Magnesium   Phosphate
4419529 1614487 26.2675 5802791G    0.1             
3319529 1614487 26.2675 5802791G    0.3 153 8.1 2.1 4
5713871 682571  56.0779 9732266E                    4.1

这是我的代码

相关问题更多 >

编程相关推荐

热门问题

热门文章