擅长:python、mysql、java
<p>可能不是最好的方法,这取决于你的序列大小,但这将完成工作。在</p>
<pre><code>import re
data_file ="location_of_fasta_file"
sequence = ''
Valid = False
for line in open(data_file):
line = line.rstrip()
if re.match("^>",line):
if re.findall('^GTACAGTAGG',sequence) or re.findall('GTACAGTAGG$',sequence) or re.findall('^CAACGGTTTTGCC',sequence) or re.findall('CAACGGTTTTGCC$',sequence):
print header_line
print sequence
header_line=line
sequence = ''
continue
else:
sequence += line
# below is needed to allow printing of final sequence which is not followed by a new fasta entry
if re.findall('^GTACAGTAGG',sequence) or re.findall('GTACAGTAGG$',sequence) or re.findall('^CAACGGTTTTGCC',sequence) or re.findall('CAACGGTTTTGCC$',sequence):
print header_line
print sequence
</code></pre>