我正在尝试在CSV文档中应用多个函数。我想有一个第一个函数,重新发送数据到其他函数根据您的列的值
数据(测试.csv):
sentence,language
.,fr
.,en
.,en
.,it
.,es
.,fr
.,fr
.,fr
.,es
.,ge
.,fr
.,fr
"Prezzi",it
"it's not expensive",en
"prix à baisser",fr
"casi 50 euros la alfombra es cara",es
"Prix,fr
"PREZZI più bassi",it
"Preis",ge
"Precio",es
"Price",en
"es ist nicht teuer",fr
脚本:
import string
import pandas as pd
def main(dataset):
dataset = pd.read_csv(dataset, sep =',')
text = dataset['sentence']
language = dataset['language']
for language in dataset:
if language == 'fr':
cleanText_FR()
if language == 'es':
cleanText_ES()
if language == 'it':
cleanText_IT()
if language == 'en':
cleanText_EN()
if language == 'ge':
cleanText_EN()
def cleanText_FR():
text_lower = text.str.lower()
punct = string.punctuation
pattern = r"[{}]".format(punct)
text_no_punct = text_lower.str.replace(pattern, ' ')
text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
text_no_blancks = text_no_blancks.str.rstrip()
text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)
text_cluster_random = text_no_small.sample(n=1000)
text_list = text_cluster_random.tolist()
return text_list
def cleanText_ES():
text_lower = text.str.lower()
punct = string.punctuation
pattern = r"[{}]".format(punct)
text_no_punct = text_lower.str.replace(pattern, ' ')
text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
text_no_blancks = text_no_blancks.str.rstrip()
text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)
text_cluster_random = text_no_small.sample(n=1000)
text_list = text_cluster_random.tolist()
return text_list
def cleanText_IT():
text_lower = text.str.lower()
punct = string.punctuation
pattern = r"[{}]".format(punct)
text_no_punct = text_lower.str.replace(pattern, ' ')
text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
text_no_blancks = text_no_blancks.str.rstrip()
text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)
text_cluster_random = text_no_small.sample(n=1000)
text_list = text_cluster_random.tolist()
return text_list
def cleanText_EN():
text_lower = text.str.lower()
punct = string.punctuation
pattern = r"[{}]".format(punct)
text_no_punct = text_lower.str.replace(pattern, ' ')
text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
text_no_blancks = text_no_blancks.str.rstrip()
text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)
text_cluster_random = text_no_small.sample(n=1000)
text_list = text_cluster_random.tolist()
return text_list
def cleanText_GE():
text_lower = text.str.lower()
punct = string.punctuation
pattern = r"[{}]".format(punct)
text_no_punct = text_lower.str.replace(pattern, ' ')
text_no_blancks = text_no_punct.replace('\s+', ' ', regex=True)
text_no_blancks = text_no_blancks.str.rstrip()
text_no_duplicate = text_no_blancks.drop_duplicates(keep=False)
text_cluster_random = text_no_small.sample(n=1000)
text_list = text_cluster_random.tolist()
return text_list
main("test.csv")
我没有任何结果
In [3]: runfile('/home/marin/Bureau/preprocess/preprocess.py', wdir='/home/marin/Bureau/preprocess')
In [4]:
我希望我所有的数据都能作为输出。你知道吗
我的问题不是重复的!不是Python!
使用.iterrows()遍历数据帧,如下所示:
相关问题 更多 >
编程相关推荐