CSV查找使用两个CSV文件并生成新fi

2024-04-25 23:46:15 发布

您现在位置:Python中文网/ 问答频道 /正文

我对python世界非常陌生,我有一个不知道如何面对的问题。我所关注的表的示例可以在下面的链接中找到:https://i.stack.imgur.com/mm1it.png

我有一个csv文件,其中包含一个与t1中类似的表(来自链接),充当一个数据库(这只是整个数据库的一个片段)。在

我有另一个表(t2),我想用它作为我的搜索参数。在

我想创建一个查找和返回程序,它将使用t2作为查看t1的方法,并提供一个输出csv文件,该文件为我提供了所有a参数,并用t2中分配给它们的数值替换物种_a、物种b和物种c中的离子。如果物种没有值,那么它应该返回a-1。最后一个表看起来与此类似:t3。在

这种复杂方法的原因是我积累了一个数据库,它的格式与我正在使用的软件不同。我不能简单地将数据库离子更改为t2中的数字,因为每次我用软件开始新的运行时,我都会根据系统中考虑的材料来分配数字值。在


Tags: 文件csv方法数据库示例参数软件链接
3条回答

我认为你应该看看熊猫图书馆。在

http://pandas.pydata.org/pandas-docs/stable/io.html#io-read-csv-table

有更简洁的方法来实现这一点(您可以创建一个字典并将列中的值重新映射为ID),但是您的问题似乎是关于连接数据,因此下面是一个pandas如何连接.csv文件的示例:

df1 = pd.read_csv('../path/t1.csv')
df2 = pd.read_csv('../path/t2.csv')

combined = pd.merge(df1, df2, how='left', left_on='species_a', right_on='aq_species')

combined = pd.merge(combined, df2, how='left', left_on='species_b', right_on='aq_species')

combined = pd.merge(combined, df3, how='left', left_on='species_c', right_on='aq_species')

#this will output 3 ion_id columns which you can then rename

combined.rename(columns={'ion_id' : 'species_a_id', 'ion_id_x' : 'species_b_id', 'ion_id_y' : 'species_c_id'}, inplace=True)

combined.to_csv('../path/t3.csv', index=False)

我自己能够用一段更长的代码来解决这个问题:

import csv
import itertools
from itertools import izip

def all_species_combination():
        'Creates all possible ion combinations so that they can compare against the database'
        with open('t2', 'rb') as f:
            reader = csv.reader(f)
            # to replace empty spacies within list entries
            global combined
            text = []
            species_a = []
            species_b = []
            species_c = []
            combined = []
            # obtaining three lists of the aqueous species so that they can be combined later
            for row in reader:
                species_a.append(row[1])
                species_b.append(row[1])
                species_c.append(row[1])


            # Iteratively combines the three separate species lists to provide a combined list
            list(itertools.product(species_a, species_b, species_c))
            for x in itertools.product(species_a, species_b, species_c):
                combined.append(x)

def lookup_ion_combos():
        'Checks the database csv file whether potential database entries are available'
        with open('t1', 'rb') as f:
            reader = csv.reader(f)
            global species_lookup
            species_lookup = []
            pitzer_id = []
            species_a = []
            species_b = []
            species_c = []

            for row in combined:
                species_a.append(row[0])
                species_b.append(row[1])
                species_c.append(row[2])
            # Iteratively goes through every row of ion combinations and compares to t1.
            for row in reader:
                for i in range(len(species_a)):
                    if row[0] == species_a[i]:
                        if row[1] == species_b[i]: 
                                if row[2] == species_c[i]: 
                                        species_lookup.append(row[0:11])
def gems_ions():
'Creates dictionary of t2 to replace corresponding species'
with open(‘t2.csv’, 'rb') as g:
    reader = csv.reader(g)
    global ion_id, atom, dictionary, zero_dict
    ion_id = []
    atom = []

    for row in reader:
        ion_id.append(row[0])
        atom.append(row[1])

    ion_id = [w.replace(' ','').replace('@','') for w in ion_id]
    atom = [w.replace(' ','').replace('@','') for w in atom]

    dictionary = dict(zip(atom, ion_id))

DATA = {"records": [dictionary]}
for name, datalist in DATA.iteritems():  # Or items() in Python 3.x
    for datadict in datalist:
        for key, value in datadict.items():
            if value == '0':
                datadict[key] = 'zero'

zero_dict = {'zero':'0'}

all_species_combination()
lookup_ion_combos()
gems_ions()

这使我能够生成一个新的数组,其中所有查找到的离子都与它们各自的值相关联。然后我创建了一个字典,将它们转换为基于t2的数值。在

谢谢你的帮忙!在

这是未测试,因为我没有你的CSV文件。csv模块是你的朋友。在

import sys
import csv

# Create a map of species to ion ID
species_map = {}
with open('t2.csv') as fin:
    reader = csv.reader(fin)
    for row in reader:
        species_map[row[1]] = row[0]

# Write the output mapping species names to IDs
fieldnames = ['species_a', 'species_b', 'species_c', 'A0',  'A1',  'A2',  'A3',  'A4',  'A5', 'interaction_type']
writer = csv.DictWriter(sys.stdout, fieldnames)
writer.writeheader()
with open('t1.csv') as fin:
    reader = csv.DictReader(fin)
    for row in reader:
        row['species_a'] = species_map.get(row['species_a'], -1)
        row['species_b'] = species_map.get(row['species_b'], -1)
        row['species_c'] = species_map.get(row['species_c'], -1)
        writer.writerow(row)

相关问题 更多 >