for word in script:
if word == speaker and word.isupper(): # you may want to check that this is on its own line as well.
recording = True
elif word in character_names and word.isupper(): # you may want to check that this is on its own line as well.
recording = False
if recording:
spoken_text += word + " "
import re
from collections import Counter, defaultdict
words_spoken = defaultdict(Counter)
currently_speaking = 'Narrator'
for line in SCRIPT.split('\n'):
name = line.replace('(CONT\'D)', '').strip()
if re.match('^[A-Z]+$', name):
currently_speaking = name
else:
words_spoken[currently_speaking].update(line.split())
import re
speaker = '' # current speaker
words = 0 # number of words on line
word_count = {} # dict of speakers and the number of words they speak
for line in script.split('\n'):
if re.match('^[ ]{19}[^ ]{1,}.*', line): # name of speaker
speaker = line.split(' (')[0][19:]
if re.match('^[ ]{6}[^ ]{1,}.*', line): # dialogue line
words = len(line.split())
if speaker in word_count:
word_count[speaker] += words
else:
word_count[speaker] = words
如果johndoe说55个单词,则生成格式为{'JOHN DOE':55}的dict。在
输出示例:
^{pr2}$
你的实现
下面是上述过程的一个版本,它近似于您的实现。在
import re
def wordsspoken(script,name):
word_count = 0
for line in script.split('\n'):
if re.match('^[ ]{19}[^ ]{1,}.*', line): # name of speaker
speaker = line.split(' (')[0][19:]
if re.match('^[ ]{6}[^ ]{1,}.*', line): # dialogue line
if speaker == name:
word_count += len(line.split())
print(word_count)
def main():
name = input("Enter name:")
wordsspoken(script, name)
name1 = input("Enter another name:")
wordsspoken(script, name1)
我会先向用户询问脚本中的所有名称。然后问他们想用哪一个名字。我会逐字搜索文本,直到找到想要的名称,然后将以下单词复制到变量中,直到找到与脚本中其他人匹配的名称。现在人们可以说出另一个字符的名字,但是如果你假设说的人的标题不是全部大写,就是在一行,那么文本应该很容易过滤。在
如果你只想通过一次脚本计算你的计数(我想可能会很长),你可以跟踪哪个角色在说话;设置一个小的状态机:
您可以使用更复杂的regex来检测说话人何时发生变化,但这应该能做到这一点。在
demo
我将概述如何生成一个dict,它可以给出所有演讲者所说的单词的数量,以及一个与现有实现近似的dict。在
一般用途
如果我们将单词定义为字符串中沿“”(空格)拆分的任何字符块。。。在
如果johndoe说55个单词,则生成格式为
{'JOHN DOE':55}
的dict。在输出示例:
^{pr2}$你的实现
下面是上述过程的一个版本,它近似于您的实现。在
相关问题 更多 >
编程相关推荐