条件频率分布

def calculateCFD(cfdconditions, cfdevents): # Write your code here from nltk.corpus import brown from nltk import ConditionalFreqDist from nltk.corpus import stopwords stopword = set(stopwords.words('english')) cdev_cfd = [ (genre, word.lower()) for genre in cfdconditions for word in brown.words(categories=genre) if word.lower() not in stopword] cdev_cfd = [list(x) for x in cdev_cfd] cdev_cfd = nltk.ConditionalFreqDist(cdev_cfd) a = cdev_cfd.tabulate(condition = cfdconditions, samples = cfdevents) inged_cfd = [ (genre, word.lower()) for genre in cfdconditions for word in brown.words(categories=genre) if (word.lower().endswith('ing') or word.lower().endswith('ed')) ] inged_cfd = [list(x) for x in inged_cfd] for wd in inged_cfd: if wd[1].endswith('ing') and wd[1] not in stopword: wd[1] = 'ing' elif wd[1].endswith('ed') and wd[1] not in stopword: wd[1] = 'ed' inged_cfd = nltk.ConditionalFreqDist(inged_cfd) b = inged_cfd.tabulate(cfdconditions, samples = ['ed','ing']) return(a,b)

many years adventure 24 32 fiction 29 44 science_fiction 11 16 ed ing adventure 3281 1844 fiction 2943 1767 science_fiction 574 293

good bad better adventure 39 9 30 fiction 60 17 27 mystery 45 13 29 science_fiction 14 1 4 ed ing adventure 3281 1844 fiction 2943 1767 mystery 2382 1374 science_fiction 574 293

3条回答

网友

1楼 · 编辑于 2024-05-15 03:13:22

试试这段代码，看看它是否有效

from nltk.corpus import brown,stopwords
def calculateCFD(cfdconditions, cfdevents):


# Write your code here
stopword = set(stopwords.words('english'))
cdev_cfd = nltk.ConditionalFreqDist([(genre, word.lower()) for genre in brown.categories() for word in brown.words(categories=genre) if not word.lower()  in stopword])
cdev_cfd.tabulate(conditions = cfdconditions, samples = cfdevents)
inged_cfd = [ (genre, word.lower()) for genre in brown.categories() for word in brown.words(categories=genre) if (word.lower().endswith('ing') or word.lower().endswith('ed')) ]
inged_cfd = [list(x) for x in inged_cfd]
for wd in inged_cfd:
    if wd[1].endswith('ing') and wd[1] not in stopword:
        wd[1] = 'ing'
    elif wd[1].endswith('ed') and wd[1] not in stopword:
        wd[1] = 'ed'
#print(inged_cfd)
inged_cfd = nltk.ConditionalFreqDist(inged_cfd)
#print(inged_cfd.conditions())    
inged_cfd.tabulate(conditions=cfdconditions, samples = ['ed','ing'])

网友

2楼 · 编辑于 2024-05-15 03:13:22

请尝试下面的代码

stop=stopwords.words('english')

temp = [[genre, word.lower()] for genre in cfdconditions for word in brown.words(categories=genre) if word.lower() not in stop]

cdev_cfd=nltk.ConditionalFreqDist(temp)
cdev_cfd.tabulate(conditions=cfdconditions,samples=cfdevents)

lst=[]
for i in temp:
    if i[1].endswith('ing'):
        lst.append((i[0],'ing'))

    elif i[1].endswith('ed'):
        lst.append((i[0],'ed'))

inged_cfd=nltk.ConditionalFreqDist(lst)      
inged_cfd.tabulate(conditions=cfdconditions,samples=['ed','ing'])

网友

3楼 · 编辑于 2024-05-15 03:13:22

像下面那样单独计算cdev_cfd，不要再将其转换为列表。剩下的代码看起来不错

cdev_cfd = nltk.ConditionalFreqDist([(genre, word.lower()) for genre in cfdconditions for word in brown.words(categories=genre) if word.lower() not in stopword])

相关问题更多 >

编程相关推荐

热门问题

热门文章