在数据表中应用函数

2024-04-25 15:19:41 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试在CSV文档中应用多个函数。我想有一个第一个函数,重新发送数据到其他函数根据您的列的值

数据(测试.csv):

sentence,language

.,fr

.,en

.,en

.,it

.,es

.,fr

.,fr

.,fr

.,es

.,ge

.,fr

.,fr

"Prezzi",it

"it's not expensive",en

"prix à baisser",fr

"casi 50 euros la alfombra es cara",es

"Prix,fr

"PREZZI più bassi",it

"Preis",ge

"Precio",es

"Price",en

"es ist nicht teuer",fr

脚本:

import string
import pandas as pd

def main(dataset):

    dataset = pd.read_csv(dataset, sep =',')

    text = dataset['sentence'] 
    language = dataset['language'] 

    for language in dataset:

        if language == 'fr':
            cleanText_FR()

        if language == 'es':
            cleanText_ES()

        if language == 'it':
            cleanText_IT()

        if language == 'en':
            cleanText_EN()

        if language == 'ge':
            cleanText_EN()

def cleanText_FR():

    text_lower = text.str.lower()

    punct = string.punctuation
    pattern = r"[{}]".format(punct)
    text_no_punct = text_lower.str.replace(pattern, ' ')

    text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
    text_no_blancks = text_no_blancks.str.rstrip()

    text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)

    text_cluster_random = text_no_small.sample(n=1000)

    text_list = text_cluster_random.tolist()


    return text_list

def cleanText_ES():

    text_lower = text.str.lower()

    punct = string.punctuation
    pattern = r"[{}]".format(punct)
    text_no_punct = text_lower.str.replace(pattern, ' ')

    text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
    text_no_blancks = text_no_blancks.str.rstrip()

    text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)

    text_cluster_random = text_no_small.sample(n=1000)

    text_list = text_cluster_random.tolist()

    return text_list

def cleanText_IT():

    text_lower = text.str.lower()

    punct = string.punctuation
    pattern = r"[{}]".format(punct)
    text_no_punct = text_lower.str.replace(pattern, ' ')

    text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
    text_no_blancks = text_no_blancks.str.rstrip()

    text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)

    text_cluster_random = text_no_small.sample(n=1000)

    text_list = text_cluster_random.tolist()

    return text_list

def cleanText_EN():

    text_lower = text.str.lower()

    punct = string.punctuation
    pattern = r"[{}]".format(punct)
    text_no_punct = text_lower.str.replace(pattern, ' ') 

    text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
    text_no_blancks = text_no_blancks.str.rstrip()

    text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)

    text_cluster_random = text_no_small.sample(n=1000)

    text_list = text_cluster_random.tolist()


    return text_list

def cleanText_GE():

    text_lower = text.str.lower()

    punct = string.punctuation
    pattern = r"[{}]".format(punct)
    text_no_punct = text_lower.str.replace(pattern, ' ')

    text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
    text_no_blancks = text_no_blancks.str.rstrip()

    text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)

    text_cluster_random = text_no_small.sample(n=1000)

    text_list = text_cluster_random.tolist()

    return text_list

main("test.csv")

我没有任何结果

In [3]: runfile('/home/marin/Bureau/preprocess/preprocess.py', wdir='/home/marin/Bureau/preprocess')

In [4]:

我希望我所有的数据都能作为输出。你知道吗

我的问题不是重复的!不是Python!


Tags: notextesrandomfrlowerlanguagereplace
2条回答

使用.iterrows()遍历数据帧,如下所示:

dataset = pd.read_csv(dataset, sep =',')

for num, row in dataset.iterrows():
    text = row['sentence']
    language = row['language']
    #if statements and language clean method calls go here

相关问题 更多 >