将数据从txt提取到csv的Python脚本

Wed Oct 7 09:00:01 UTC 2020 BSS 02:ca:fe:ca:ca:40(on ap0_1) freq: 2422 capability: IBSS (0x0012) signal: -60.00 dBm primary channel: 3 last seen: 30 ms ago BSS ac:86:74:0a:73:a8(on ap0_1) TSF: 229102338752 usec (2d, 15:38:22) freq: 2422 capability: ESS (0x0421) signal: -62.00 dBm primary channel: 3

Time | BSS | freq |capability |signal| primary channel | ----------------------------+---------------------------+------+-------------+------+-----------------+ Wed Oct 7 09:00:01 UTC 2020|02:ca:fe:ca:ca:40(on ap0_1)| 2422 |IBSS (0x0012)|-60.00| 3 | |ac:86:74:0a:73:a8(on ap0_1)| 2422 |IBSS (0x0012)|-62.00| 3 |

import csv import re fieldnames = ['TIME', 'BSS', 'FREQ','CAPABILITY', 'SIGNAL', 'CHANNEL'] re_fields = re.compile(r'({})+:\s(.*)'.format('|'.join(fieldnames)), re.I) with open('ap0_1.txt') as f_input, open('ap0_1.csv', 'w', newline='') as f_output: csv_output = csv.DictWriter(f_output, fieldnames= fieldnames) csv_output.writeheader() start = False for line in f_input: line = line.strip() if len(line): if 'BSS' in line: if start: start = False block.append(line) text_block = '\n'.join(block) for field, value in re_fields.findall(text_block): entry[field.upper()] = value if line[0] == 'on ap0_1': entry['BSS'] = block[0] csv_output.writerow(entry) else: start = True entry = {} block = [line] elif start: block.append(line)

3条回答

网友

1楼 · 编辑于 2024-05-29 04:03:47

使用str.startswith

Ex:

import csv

fieldnames = ('TIME', 'BSS', 'freq','capability', 'signal', 'primary channel')
with open(filename) as f_input, open(outfile,'w', newline='') as f_output:
    csv_output = csv.DictWriter(f_output, fieldnames= fieldnames)
    csv_output.writeheader()
    result = {"TIME": next(f_input).strip()}   #Get Time, First Line
    for line in f_input:
        line = line.strip()
        if line.startswith(fieldnames):
            if line.startswith('BSS'):
                key, value = line.split(" ", 1)
            else:
                key, value = line.split(": ")
            result[key] = value
            
    csv_output.writerow(result)

按注释编辑

如果您有以上文本的多个块

import re
import csv

week_ptrn = re.compile(r"\b(" + "|".join(('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')) + r")\b")
fieldnames = ('TIME', 'BSS', 'freq','capability', 'signal', 'primary channel')

with open(filename) as f_input, open(outfile,'w', newline='') as f_output:
    csv_output = csv.DictWriter(f_output, fieldnames= fieldnames)
    csv_output.writeheader()
    result = []    #Get Time, First Line
    for line in f_input:
        line = line.strip()
        week = week_ptrn.match(line)
        if week:
            result.append({"TIME": line})
            
        if line.startswith(fieldnames):
            if line.startswith('BSS'):
                key, value = line.split(" ", 1)
            else:
                key, value = line.split(": ")
            result[-1][key] = value
            
    csv_output.writerows(result)

网友

2楼 · 编辑于 2024-05-29 04:03:47

你试图用“时间”来搜索时间。但输入数据中没有“时间”。因此，空时间输出是一种自然现象

而且我觉得跟线也有问题

            if line[0] == 'on ap0_1':
                entry['BSS'] = block[0]

在我看来，您试图找到BSS ac:86:74:0a:73:a8(on ap0_1)中的on ap0_1。但是第[0]行是“BSS”，它是['BSS'，'ac:86:74:0a:73:a8（on'，'ap0_1'）的第一行。应该这样改变：

            if 'on ap0_1' in block[0]:
                entry['BSS'] = block[0][4:].lstrip()

网友

3楼 · 编辑于 2024-05-29 04:03:47

这是我的代码版本

import csv, re

fieldnames = ['TIME', 'BSS', 'FREQ','CAPABILITY', 'SIGNAL', 'CHANNEL']
re_fields = re.compile(r'({})+:\s(.*)'.format('|'.join(fieldnames)), re.I)

with open('ap0_1.txt') as f_input, open('ap0_1.csv', 'w', newline='') as f_output:
    csv_output = csv.DictWriter(f_output, fieldnames= fieldnames)
    csv_output.writeheader()
    start = False
 
    time_condition = lambda @l: l.startswith('Mon') or l.startswith('Tue') or \ 
                     l.startswith('Wed') or l.startswith('Thu') or l.startswith('Fri') \ 
                     or l.startswith('Sat') or l.startswith('Sun')
    
    row = dict{}
    for line in f_input:
        line = line.strip()
        if not line:
            continue
        elif time_condition(line):
            row['TIME'] = line
        else:
            # not sure how you define the start of a new block, say, it is by 'BSS' string
            key, value = line.split(' ', 1) # split one time exactly
            key = key.rstrip(':').upper()
            if key == 'BSS' and row:
                row = (row.get(k, '') for k in fieldnames)
                csv_output.writerow(row)
                row = dict()
  
            row[key.upper()] = value
    row = (row.get(k, '') for k in fieldnames)
    csv_output.writerow(row)

看起来“\n”会创建空行

相关问题更多 >

编程相关推荐

热门问题

热门文章