数据集布局错误

xx4 <--ID 070414 <--DateStrong 5.6 10 <--Force Ratio Sample Rate: 50/s <--Garbage 220.68 0.14 17.80 92.20 220.80 0.02 9.40 9.40 224.32 0.14 14.60 72.20 227.08 0.14 26.60 130.60 227.78 0.08 19.60 62.00 228.04 0.18 40.40 257.20 231.22 0.12 14.00 61.20

import os import sys import csv import pandas as pd import numpy as np import itertools as it import benFuncts.BenFuncts as bf #My own functions import matplotlib.pyplot as plt ID = [] ID_dict = {} DATE = [] FORCE = [] RATIO = [] TIME = [] DURR = [] pF = [] TOF = [] ED7 = [] ED6 = [] ED5 = [] ED4 = [] h = 'DATE', 'DAYNUM', 'RATIO', 'CRIT', 'TOTRESP', 'CRITRESP', 'PELLETS', 'AVG_PF', 'AVG_TOF' Crit = {} MastList = [] rd_files = [] # List of file strings # Makes the main file path in this case: # /Users/benlibman/Desktop/EffortDemandTests/EffortDemandPyTests/ path = str(os.getcwd()) + '/' # List of files in the working directory (see path above) mainDir = os.listdir(str(os.getcwd()) + '/') # Pulls the list files from the mainDir (above) ID = [i for i in mainDir if len(i) <= 3 and 'ED' in i] # f_Out = csv.writer(open('MainFile', 'wa'), delimiter=',') # f_Out = open('MainFile', 'wa') # , quoting=csv.QUOTE_NONE) f_In = csv.reader(open('ED7', 'rb'), delimiter='\t') def mkPath(): for row in f_In: for i in row: if len(i) > 1: rd_files.append(path + str(i)) mP = mkPath() # pdmF = pd.read_csv('MainFile', sep='\t', engine='python') # with open('ED7120214', 'r') as f: df = pd.read_csv(open('ED7120214', 'r'), sep='\t', skiprows=5, usecols=( 0, 1, 2, 3), names=('TIME', 'DURR', 'pF', 'TOF')) frCR = pd.read_csv(open('ED7120214', 'r'), sep=' ', skiprows=(0, 1, 3), skipfooter=( len(df)), engine='python', index_col=False, names=('FORCE', 'RATIO')) date_index = pd.read_csv(open('ED7120214', 'r'), squeeze=True, sep=' ', skiprows=( 0, 2, 3), skipfooter=(len(df)), engine='python', index_col=False, names=('DATE', 'NaN')) id_index = pd.read_csv(open('ED7120214', 'r'), squeeze=True, sep=' ', skiprows=( 1, 2, 3), skipfooter=(len(df)), engine='python', index_col=False, names=('ID', 'NaN')) pDF = pd.DataFrame(df) for row in pDF.TIME: TIME.append(row) for row in pDF.DURR: DURR.append(row) for row in pDF.pF: pF.append(row) for row in pDF.TOF: TOF.append(row) print pDF.pF.mean() FORCE.append(frCR.FORCE) RATIO.append(frCR.RATIO) DATE.append(list(date_index.DATE)) ID_dict.update(id_index.ID) DATE = [str(i).strip('[]') for i in DATE] # ED7.append(FORCE) # ED7.append(DATE) # ED7.append(RATIO) ED7.append(TIME) ED7.append(DURR) ED7.append(pF) ED7.append(TOF) Dt = bf.addCol(range(len(TIME)), DATE) with open('MainFile', 'wa') as mf: pDF.to_csv(mf, header=True, index_names=True, names=( 'DATE', 'DAYNUM', 'TIME', 'DURR', 'pF', 'TOF'))

1条回答

网友

1楼 · 发布于 2024-04-25 00:54:40

如果您所要做的只是重新格式化数据并将其写回一个文件，那么这应该适用于示例中的文件格式：

with open('data.txt') as in_file, open('new.txt', 'w') as out_file:
    # get the dataset identifiers
    ID = in_file.next().strip()
    date_strong = in_file.next().strip()
    force_ratio = in_file.next().strip()
    force_ratio1, force_ratio2 = force_ratio.split()
    in_file.next()  # Garbage line
    # example data has two blank lines
    in_file.next()
    in_file.next()
    dataset_id = (ID, date_strong, force_ratio1, force_ratio2)
    # iterate over the records
    for line in in_file:
        # prepend the dataset id
        record = list(dataset_id)
        record.extend(line.split())
        # write to the new file
        out_file.write(','.join(record) + '\n')

相关问题更多 >

编程相关推荐

热门问题

热门文章