如何从马尔可夫链输出创建段落？

""" from: http://code.activestate.com/recipes/194364-the-markov-chain-algorithm/?in=lang-python """ import random; import sys; stopword = "\n" # Since we split on whitespace, this can never be a word stopsentence = (".", "!", "?",) # Cause a "new sentence" if found at the end of a word sentencesep = "\n" #String used to seperate sentences # GENERATE TABLE w1 = stopword w2 = stopword table = {} for line in sys.stdin: for word in line.split(): if word[-1] in stopsentence: table.setdefault( (w1, w2), [] ).append(word[0:-1]) w1, w2 = w2, word[0:-1] word = word[-1] table.setdefault( (w1, w2), [] ).append(word) w1, w2 = w2, word # Mark the end of the file table.setdefault( (w1, w2), [] ).append(stopword) # GENERATE SENTENCE OUTPUT maxsentences = 20 w1 = stopword w2 = stopword sentencecount = 0 sentence = [] while sentencecount < maxsentences: newword = random.choice(table[(w1, w2)]) if newword == stopword: sys.exit() if newword in stopsentence: print ("%s%s%s" % (" ".join(sentence), newword, sentencesep)) sentence = [] sentencecount += 1 else: sentence.append(newword) w1, w2 = w2, newword

elif newword in stopsentence: print ("%s%s" % (" ".join(sentence), newword), end=" ") sentence = [] # I have to be here to make the new sentence start as an empty list!!! sentencecount += 1 # increment the sentence counter

### # usage: python markov_sentences.py < input.txt > output.txt # from: http://code.activestate.com/recipes/194364-the-markov-chain-algorithm/?in=lang-python ### import random; import sys; stopword = "\n" # Since we split on whitespace, this can never be a word stopsentence = (".", "!", "?",) # Cause a "new sentence" if found at the end of a word sentencesep = "\n" #String used to seperate sentences # GENERATE TABLE w1 = stopword w2 = stopword table = {} for line in sys.stdin: for word in line.split(): if word[-1] in stopsentence: table.setdefault( (w1, w2), [] ).append(word[0:-1]) w1, w2 = w2, word[0:-1] word = word[-1] table.setdefault( (w1, w2), [] ).append(word) w1, w2 = w2, word # Mark the end of the file table.setdefault( (w1, w2), [] ).append(stopword) # GENERATE SENTENCE OUTPUT maxsentences = 20 w1 = stopword w2 = stopword sentencecount = 0 sentence = [] paragraphsep = "\n" count = random.randrange(1,5) while sentencecount < maxsentences: newword = random.choice(table[(w1, w2)]) # random word from word table if newword == stopword: sys.exit() if newword in stopsentence: print ("%s%s" % (" ".join(sentence), newword), end=" ") sentence = [] sentencecount += 1 # increment the sentence counter count -= 1 if count == 0: count = random.randrange(1,5) print (paragraphsep) # newline space else: sentence.append(newword) w1, w2 = w2, newword # EOF

2条回答

网友

1楼 · 编辑于 2024-05-15 05:03:21

你需要复制

sentence = []

回到

^{pr2}$

条款。在

所以

while paragraphs < maxparagraphs: # start outer loop, until maxparagraphs is reached
    w1 = stopword
    w2 = stopword
    stopsentence = (".", "!", "?",)
    sentence = []
    sentencecount = 0 # reset the inner 'while' loop counter to zero
    maxsentences = random.randrange(1,5) # random sentences per paragraph

    while sentencecount < maxsentences: # start inner loop, until maxsentences is reached
        newword = random.choice(table[(w1, w2)]) # random word from word table
        if newword == stopword: sys.exit()
        elif newword in stopsentence:
            print ("%s%s" % (" ".join(sentence), newword), end=" ")
            sentence = [] # I have to be here to make the new sentence start as an empty list!!!
            sentencecount += 1 # increment the sentence counter
        else:
            sentence.append(newword)
        w1, w2 = w2, newword
    print (paragraphsep) # newline space
    paragraphs = paragraphs + 1 # increment the paragraph counter

编辑

这里有一个不使用外部循环的解决方案。在

"""
    from:  http://code.activestate.com/recipes/194364-the-markov-chain-algorithm/?in=lang-python
"""

import random;
import sys;

stopword = "\n" # Since we split on whitespace, this can never be a word
stopsentence = (".", "!", "?",) # Cause a "new sentence" if found at the end of a word
sentencesep  = "\n" #String used to seperate sentences


# GENERATE TABLE
w1 = stopword
w2 = stopword
table = {}

for line in sys.stdin:
    for word in line.split():
        if word[-1] in stopsentence:
            table.setdefault( (w1, w2), [] ).append(word[0:-1])
            w1, w2 = w2, word[0:-1]
            word = word[-1]
        table.setdefault( (w1, w2), [] ).append(word)
        w1, w2 = w2, word
# Mark the end of the file
table.setdefault( (w1, w2), [] ).append(stopword)

# GENERATE SENTENCE OUTPUT
maxsentences  = 20

w1 = stopword
w2 = stopword
sentencecount = 0
sentence = []
paragraphsep == "\n\n"
count = random.randrange(1,5)

while sentencecount < maxsentences:
    newword = random.choice(table[(w1, w2)])
    if newword == stopword: sys.exit()
    if newword in stopsentence:
        print ("%s%s" % (" ".join(sentence), newword), end=" ")
        sentence = []
        sentencecount += 1
        count -= 1
        if count == 0:
            count = random.randrange(1,5)
            print (paragraphsep)
    else:
        sentence.append(newword)
    w1, w2 = w2, newword

网友

2楼 · 编辑于 2024-05-15 05:03:21

你明白这个守则吗？我打赌你可以找到打印句子的位，然后把它改为打印几个句子，而不返回。你可以在句子位周围再加一个while循环来获得多个段落。在

语法提示：

print 'hello'
print 'there'
hello
there

print 'hello',
print 'there'
hello there

print 'hello',
print 
print 'there'

关键是print语句末尾的逗号阻止行尾的返回，而空白print语句则打印返回值。在

相关问题更多 >

编程相关推荐

热门问题

热门文章