
2024-05-16 17:49:04 发布

您现在位置:Python中文网/ 问答频道 /正文



Hollywood is a neighborhood in the central region of Los Angeles.


Los Angeles


import itertools
import string
import operator

text = "Take any tram, U-bahn or bus which stops at Düsseldorf Hauptbahnhof (HBF). Leave the station via the main exit Konrad Adenauer Platz, you will see trams and buses in front of the station. Walk up Friedrich Ebert Straße turning right into the third street which is the Oststraße."

def fold(it):
    def fold_impl(x, y):
        return itertools.starmap(operator.and_, zip(x, itertools.islice(y, 1, None)))
    return fold_impl(*itertools.tee(it))

def unfold(it):
    def unfold_impl(x, y):
        return itertools.starmap(operator.or_, zip(itertools.chain(x, [False]), itertools.chain([False], y)))
    return unfold_impl(*itertools.tee(it))

def ngrams(it, n):
    return it if n <= 1 else unfold(ngrams(fold(it), n - 1))

def ngrams_idx(it, n):
    return (sorted(x[0] for x in g) for k, g in itertools.groupby(enumerate(ngrams(it, n)), key=lambda x: x[1]) if k)

def booleanize(text_vec):
    return map(lambda x: x[0] in string.ascii_uppercase, text_vec)

def ngrams_phrase(text_vec, n):
    def word(text_vec, idx):
        return ' '.join(map(lambda i: text_vec[i], idx))
    return [word(text_vec, idx) for idx in ngrams_idx(booleanize(text_vec), n)]


Tags: thetextinimportreturndefitfold

我认为entry调用应该是ngram_phrase(text.split(), 2),OP正在查找所有出现的短语,这些短语的连续大写首字母的数量至少为2,例如,将代码片段与text一起运行将导致["Düsseldorf Hauptbahnhof", "Konrad Adenauer Platz", "Friedrich Ebert Straße"]。你知道吗


from itertools import takewhile

text = "Take any tram, U-bahn or bus which stops at Düsseldorf Hauptbahnhof (HBF). Leave the station via the main exit Konrad Adenauer Platz, you will see trams and buses in front of the station. Walk up Friedrich Ebert Straße turning right into the third street which is the Oststraße."

def take_upper(text):
    it = iter(text.split())
    return [[i]+list(takewhile(lambda x: x[0].isupper(), it)) for i in it if i[0].isupper()]

def remove_singles(text_uppers):
    return [l for l in text_uppers if len(l) > 1]



p = "Hollywood is a neighborhood in the central region of Los Angeles.".split()
t, _ = reduce(lambda (l, v), x: (l+[v, x], x) if v[0].isupper() and x[0].isupper() else (l, x), p, ([], "a"))
['Los', 'Angeles.']

相关问题 更多 >