
2024-05-12 20:32:16 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个大约16000行的文件。 它们都有相同的格式。以下是一个简单的示例:

ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00


ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00



cnt = 0
with open('test_file.pdb') as f1:
    with open('out','a') as f2:
        lines = f1.readlines()
        for i, line in enumerate(lines):
             if "DPPC" in line:
                   A = line.strip()[22:26]
                   if A[i] == A [i+1]:
                       cnt = cnt + 1
                   elif A[i] != A[i+1]:
                       cnt = 0


Tags: 文件inifas格式withline标识符




ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   20      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   20      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   20      23.050  20.800  11.000  1.00  0.00


import struct

# negative widths represent ignored padding fields
fieldwidths = 4, -4, 3, -2, 2, -2, 4, -3, 2, -6, 6, -2, 6, -2, 6, -2, 4, -2, 4
fmtstring = ' '.join('{}{}'.format(abs(fw), 'x' if fw < 0 else 's')
                                    for fw in fieldwidths)
fieldstruct = struct.Struct(fmtstring)
parse = fieldstruct.unpack_from  # a function to split line up into fields

with open('test_file.pdb') as f1:
    prev = parse(next(f1))[4]  # remember value of fifth field
    cnt = 1
    for line in f1:
        curr = parse(line)[4]  # get value of fifth field
        if curr == prev:  # same as last one?
            cnt += 1
            print('{} occurred {} times'.format(prev, cnt))
            prev = curr
            cnt = 1
    print('{} occurred {} times'.format(prev, cnt))  # for last line


18 occurred 9 times
19 occurred 7 times
20 occurred 3 times


data = []
with open('data.txt', 'r') as datafile:
    for line in datafile:
        if line:

keywordList = []
for line in data:
    line = line.split()
    if (line[4] not in keywordList):

counterList = []
for item in keywordList:
    counter = 0
    for line in data:
        line = line.split()
        if (line[4] == item):

for i in range(len(keywordList)):
    print("%s: %d"%(keywordList[i],counterList[i]));



data = """ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    139  C1  DPPC   18      17.250  58.420  10.850  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00
ATOM    189  C1  DPPC   19      23.050  20.800  11.000  1.00  0.00"""

# The last code seen in the 5th column.
code = None

# The count of lines of the current code.
count = 0

for line in data.split("\n"):
    # Get the 5th column.
    c = line.split()[4]

    # The code in the 5th column changed.
    if c != code:
        # If we aren't at the start of the file, print the count
        # for the code that just ended.
        if code:
            print("{}: {}".format(code, count))

        # Rember the new code.
        code = c

    # Count the line
    count = count + 1

# Print the count for the last code.
print("{}: {}".format(code, count))


18: 9
19: 19

相关问题 更多 >