Python:值错误：“她”不在列表中

email_one = open("email_one.txt", "r").read() email_two = open("email_two.txt", "r").read() email_three = open("email_three.txt", "r").read() email_four = open("email_four.txt", "r").read() negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable"] proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"] def uberCensor(email): split_email = email.split() for each_word in split_email: for each_term in negative_words: if each_term.lower() == each_word.lower(): split_email[split_email.index(each_term)-1] = ('x' * len(split_email[split_email.index(each_term)-1])) split_email[split_email.index(each_term)+1] = ('x' * len(split_email[split_email.index(each_term)+1])) split_email[split_email.index(each_term)] = ('x' * len(split_email[split_email.index(each_term)])) for each_word in split_email: for each_term in proprietary_terms: if each_term.lower() == each_word.lower(): split_email[split_email.index(each_term)-1] = ('x' * len(split_email[split_email.index(each_term)-1])) split_email[split_email.index(each_term)+1] = ('x' * len(split_email[split_email.index(each_term)+1])) split_email[split_email.index(each_term)] = ('x' * len(split_email[split_email.index(each_term)])) return ' '.join(split_email) print(uberCensor(email_four))

email_three = open("email_three.txt", "r").read() email_four = open("email_four.txt", "r").read() negative_words = ["concerned", "behind", "danger", "dangerous", "alarming", "alarmed", "out of control", "help", "unhappy", "bad", "upset", "awful", "broken", "damage", "damaging", "dismal", "distressed", "distressed", "concerning", "horrible", "horribly", "questionable"] proprietary_terms = ["she", "personality matrix", "sense of self", "self-preservation", "learning algorithm", "her", "herself"] def uberCensor(email): split_email = email.split() for each_word in split_email: for each_term in negative_words: if each_term in split_email: split_email[split_email.index(each_term)-1] = ('x' * len(split_email[split_email.index(each_term)-1])) split_email[split_email.index(each_term)+1] = ('x' * len(split_email[split_email.index(each_term)+1])) split_email[split_email.index(each_term)] = ('x' * len(split_email[split_email.index(each_term)])) if each_term.title() in split_email: split_email[split_email.index(each_term)-1] = ('x' * len(split_email[split_email.index(each_term)-1])) split_email[split_email.index(each_term)+1] = ('x' * len(split_email[split_email.index(each_term)+1])) split_email[split_email.index(each_term)] = ('x' * len(split_email[split_email.index(each_term)])) for each_word in split_email: for each_term in proprietary_terms: if each_term in split_email: split_email[split_email.index(each_term)-1] = ('x' * len(split_email[split_email.index(each_term)-1])) split_email[split_email.index(each_term)+1] = ('x' * len(split_email[split_email.index(each_term)+1])) split_email[split_email.index(each_term)] = ('x' * len(split_email[split_email.index(each_term)])) if each_term.title() in split_email: split_email[split_email.index(each_term)-1] = ('x' * len(split_email[split_email.index(each_term)-1])) split_email[split_email.index(each_term)+1] = ('x' * len(split_email[split_email.index(each_term)+1])) split_email[split_email.index(each_term)] = ('x' * len(split_email[split_email.index(each_term)])) return ' '.join(split_email) print(uberCensor(email_four))

2条回答

网友

1楼 · 编辑于 2024-05-18 23:40:27

在if each_term.title() in split_email:之后的代码中，您使用的是split_email.index(each_term)而不是split_email.index(each_term.title())。因此，您试图获取一个不存在的单词的索引（如果存在，则在上一个块中被替换）

由于在split_email和negative_words中循环每个单词，因此不需要使用in或index()。检查这两个词是否相等。使用enumerate()获取split_email中的索引

def uberCensor(email):
    split_email = email.split()
    for i, each_word in enumerate(split_email):    
        for each_term in negative_words:
            if each_term == each_word or each_term.title() == each_word: 
                if i > 0:
                    split_email[i-1] = ('x' * len(split_email[i-1]))
                split_email[i] = ('x' * len(each_word))
                if i < len(split_email)-1:
                    split_email[i+1] = ('x' * len(split_email[i+1]))
    for each_word in split_email:    
        for each_term in proprietary_terms:
            if each_term == each_word or each_term.title() == each_word: 
                if i > 0:
                    split_email[i-1] = ('x' * len(split_email[i-1]))
                split_email[i] = ('x' * len(each_word))
                if i < len(split_email)-1:
                    split_email[i+1] = ('x' * len(split_email[i+1]))

    return ' '.join(split_email)

网友

2楼 · 编辑于 2024-05-18 23:40:27

我使用chain将两个单词列表合并成一个iterable。正如@Barmar所说，你可以比较直接的单词，因为对于原始电子邮件中的每个单词，你都在循环每个经过审查的单词

我已经降低了每个被比较单词的大小写

我也找不到如何将.title()方法附加到每个单词，因为它们是字符串，但是请告诉我是否需要与.title()进行比较

from itertools import chain

negative_words = ['test']
proprietary_terms = ['2']

def uberCensor(email):
    words = email.split(' ') 
    for index, word in enumerate(words):
        for term in chain(negative_words, proprietary_terms): 
            if term.lower() == word.lower():
                words[index] = 'x' * len(term)
                if index > 0:
                    words[index-1] = 'x' * len(words[index-1])
                if index == len(words) - 2:
                    words[index+1] = 'x' * len(words[index+1]) 
    return ' '.join(words)

print(uberCensor('a 1 2 test'))

算法中有一个细微的变化，因此只能向后比较。如果您总是更改index+1，并且它也是一个受审查的单词，那么下一个单词将不匹配（因此，如果当前单词是最后一个单词之前的最后一个单词，则您只更改当前单词右侧的单词）

编辑

修正了重复x的错误

编辑

相关问题更多 >

编程相关推荐

热门问题

热门文章