在python中如何从大括号之间的文件中只提取特定的结构集

2024-04-24 10:34:48 发布

您现在位置:Python中文网/ 问答频道 /正文

a.有一个场景,在我的函数中,读入一个包含如下所示的c结构列表的文件,读取该文件并提取每个结构的{}括号之间的所有信息,并将它们存储在数组中。你知道吗

package whatstruct;

  typedef struct packed {
    int [4:0] version;
    char party;
    float parity; 
    char ccc; 
    int [7:0] spend;
  } party_s;

  typedef struct packed {
    int [5:0] cod3;
    int [1:0] group;
    int [51:0] duty;
    char calloff; 
    char selfi;
    int [11:0] snap; 
    int [5:0] longtrip;
    int [1:0] whattodo;
    int [9:0] sky;
    int [7:0] yahoo;
    int [5:0] hurrey;
    int [3:0] appology;
    int [1:0] temp;
    int [2:0] iddd;  
    float dontknow; 
  } dict_s;

  typedef struct packed {
    int [4:0] jan;
    int [12:0] feb;
    int [1:0] mar;
    logic mar; 
    logic april; 
  } months_c;

  typedef struct packed {
    var apple;
    int mango;
    float banana;
    int lichi
  } fruits_s; 
endpackage: whatstruct

函数读入两个参数,一个是包含结构的文件,第二个参数只包含需要从中提取行的结构名列表。为了提取{}之间的所有信息,请尝试下面给出的一些示例。你知道吗

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
import re
import logging
import warnings
import os.path
import gzip


readstructfile = None
filename = None
structnames = None


def readfileanddump(filename, structnames):
    readstructfile = open(filename, "r+")

    test_str = readstructfile.readlines() 

    matches = re.finditer(r"\{(.*?)\}", str(test_str), re.MULTILINE | re.DOTALL)

    for matchNum, match in enumerate(matches):
        for groupNum in range(0, len(match.groups())):
            print (match.group(1))

    # for lines in readstructfile:
    #     print lines

readfileanddump("structpkg.c", all);

c.能够提取{}之间的所有信息,如果第二个参数提供为“all”,则转储这些信息。但是,如果传递的第二个参数是包含结构名称的字符串,则只提取那些需要提取的结构。我不知道怎么做。是否可以使用regexp?或者其他更好的方法?你知道吗

任何建议都非常有用!你知道吗

更新代码:

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
from openpyxl import Workbook, load_workbook
from openpyxl.compat import range
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment, PatternFill, Border, Color
from openpyxl.styles.colors import YELLOW 
from openpyxl.styles.borders import Border, Side
import re
import logging
import warnings
import os.path
import gzip

#import xlwt
#import xlrd 

readstructfile = None
filename = None
structnames = []
filename_and_structnames_l = [] 
global found_struct
global found_struct_idx


found_struct_idx = {} 
found_struct = 0

def readfileanddump(filename_and_structnames):
    global found_struct
    filename_and_structnames_l = filename_and_structnames.split(",") 

    if len(filename_and_structnames_l) < 2:
        filename = filename_and_structnames_l[0]
        structnames.append('all')
        print "1. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 1 and len(filename_and_structnames_l) < 3:
        filename = filename_and_structnames_l[0]
        structnames.append(filename_and_structnames_l[1])
        print "2. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 2: 
        filename = filename_and_structnames_l[0]
        for i in range (1, len(filename_and_structnames_l)):
            structnames.append(filename_and_structnames_l[i])
        print "3. Value of filename %s and structnames %s"%(filename, str(structnames))


    if (len(structnames) == 1) and (structnames[0] == 'all'):
        readstructfile = open(filename, "r+")
        test_str = readstructfile.readlines() 
        matches = re.finditer(r"\{(.*?)\}", str(test_str), re.MULTILINE | re.DOTALL)

        for matchNum, match in enumerate(matches):
            for groupNum in range(0, len(match.groups())):
                print (match.group(1))
                # match_group_t = str(str(re.sub('[A-Za-z0-9_[]:]+', '', str(match.group(1)))).strip())
                # match_group_t = match.groups(1)
                #for i in range (len(match_group_t)):
                    # print match_group_t[i].replace("\n","")
                    # print str(str(re.sub('[^A-Za-z0-9[:]]+[\r\n]+', ' ', str(match_group_t[i]))))
                    # print re.sub(r"(?<=[a-z])\r?\n"," ", match_group_t[i]) 
                    # print ''.join(ch for ch in match_group_t[i] if not ch.isspace())
                    # print ''.join(match_group_t[i].strip().split())
                    #print(re.sub(r"(?:[;\n']|\s{2,})",r'',match_group_t[i])[2:])
                # print match_group_t
                print(re.sub(r"(?:[;\n']|\s{2,})",r'',match.group(1))[2:])
    else:
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
                matches = re.finditer(r"\{(.*?)\}.*", str(lines), re.MULTILINE | re.DOTALL)
                print "Value in matches", matches
            if found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                found_struct_t = str(str(re.sub('[^A-Za-z0-9_]+', ' ', str(lines))).strip()).split(" ")

                for i in range (len(structnames)):
                    if structnames[i] == found_struct_t[0]:
                        # print "value of found_struct_t", found_struct_t
                        # #found_struct_idx.append(found_struct_t[0]) 
                        # found_struct_idx[structnames[i]] = i 
                        # print "Value of found_struct_idx", found_struct_idx
                        # break

                        print "Value in found_struct_t", found_struct_t 
                        # for matchNum, match in enumerate(matches):
                        #     for groupNum in range(0, len(match.groups())):
                        #         print (match.group(1))

readfileanddump('alldetailspkg');

Tags: andinimportreforlenmatchgroup
1条回答
网友
1楼 · 发布于 2024-04-24 10:34:48

按照我的要求工作:

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
import re
import logging
import warnings
import os.path
import gzip

#import xlwt
#import xlrd 

readstructfile = None
filename = None
structnames = []
filename_and_structnames_l = [] 
global found_struct
global found_struct_idx
global temp_struct
global final_struct

found_struct_idx = {} 
found_struct = 0
temp_struct = []
final_struct = [] 

def readfileanddump(filename_and_structnames):
    global found_struct
    global temp_struct
    global final_struct
    filename_and_structnames_l = filename_and_structnames.split(",") 

    if len(filename_and_structnames_l) < 2:
        filename = filename_and_structnames_l[0]
        structnames.append('all')
        print "1. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 1 and len(filename_and_structnames_l) < 3:
        filename = filename_and_structnames_l[0]
        structnames.append(filename_and_structnames_l[1])
        print "2. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 2: 
        filename = filename_and_structnames_l[0]
        for i in range (1, len(filename_and_structnames_l)):
            structnames.append(filename_and_structnames_l[i])
        print "3. Value of filename %s and structnames %s"%(filename, str(structnames))


    if (len(structnames) == 1) and (structnames[0] == 'all'):
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
            elif found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                print "Value of temp_struct",temp_struct
                final_struct = temp_struct
                print "Value of final_struct", final_struct
            elif found_struct == 1:
                print "Value of lines",lines
                lines = str(lines.strip()).replace(";","")
                print "Value of lines b4",lines
                print "Value of lines a8",lines.strip()
                #print(re.sub(r"(?:[;]|\s{2,})",r'',lines)[2:])
                temp_struct.append(lines)
    else:
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
                temp_struct = None; temp_struct = []
            elif found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                reached_struct = re.sub(r'.*}.|;',r'',lines)
                for i in range (len(structnames)):
                    print "Value of structnames[%d] %s and lines is %s"%(i, structnames[i], reached_struct)
                    if str(structnames[i]).strip() == str(reached_struct).strip():
                        for i in range (len(temp_struct)):
                            final_struct.append(temp_struct[i])
                        print "Value of temp_struct",temp_struct
                        print "Value of final_struct",final_struct
            elif found_struct == 1:
                # print "Value of lines",lines
                lines = str(lines.strip()).replace(";","")
                # print "Value of lines b4 strip",lines
                # print "Value of lines a8 strip",lines.strip()
                temp_struct.append(lines)

相关问题 更多 >