字母游戏问题？

Question

最近在一次面试中，我遇到了以下问题：

写一个可以在命令行运行的脚本，使用python。
这个脚本应该能够接收两个单词作为命令行输入（或者你也可以选择让用户通过控制台输入这两个单词）。
给定这两个单词： a. 确保它们的长度相同 b. 确保它们都是你下载的有效英语单词字典中的单词。
如果满足条件，计算是否可以通过以下步骤从第一个单词到达第二个单词： a. 每次只能改变一个字母 b. 每次改变字母后，得到的新单词也必须在字典中存在 c. 不能添加或删除字母。
如果这两个单词可以互相到达，脚本应该打印出从一个单词到另一个单词的最短路径。
你可以使用/usr/share/dict/words作为你的单词字典。

我的解决方案是使用广度优先搜索来找到两个单词之间的最短路径。但显然这并不足以让我得到这份工作 :(

你们知道我可能哪里做错了吗？非常感谢。

import collections
import functools
import re

def time_func(func):
    import time

    def wrapper(*args, **kwargs):
        start = time.time()
        res = func(*args, **kwargs)
        timed = time.time() - start

        setattr(wrapper, 'time_taken', timed)
        return res

    functools.update_wrapper(wrapper, func)
    return wrapper

class OneLetterGame:
    def __init__(self, dict_path):
        self.dict_path = dict_path
        self.words = set()

    def run(self, start_word, end_word):
        '''Runs the one letter game with the given start and end words.
        '''
        assert len(start_word) == len(end_word), \
            'Start word and end word must of the same length.'

        self.read_dict(len(start_word))

        path = self.shortest_path(start_word, end_word)
        if not path:
            print 'There is no path between %s and %s (took %.2f sec.)' % (
                start_word, end_word, find_shortest_path.time_taken)
        else:
            print 'The shortest path (found in %.2f sec.) is:\n=> %s' % (
                self.shortest_path.time_taken, ' -- '.join(path))

    def _bfs(self, start):
        '''Implementation of breadth first search as a generator.

        The portion of the graph to explore is given on demand using get_neighboors.
        Care was taken so that a vertex / node is explored only once.
        '''
        queue = collections.deque([(None, start)])
        inqueue = set([start])

        while queue:
            parent, node = queue.popleft()
            yield parent, node

            new = set(self.get_neighbours(node)) - inqueue
            inqueue = inqueue | new
            queue.extend([(node, child) for child in new])

    @time_func
    def shortest_path(self, start, end):
        '''Returns the shortest path from start to end using bfs.
        '''
        assert start in self.words, 'Start word not in dictionnary.'
        assert end in self.words, 'End word not in dictionnary.'

        paths = {None: []}
        for parent, child in self._bfs(start):
            paths[child] = paths[parent] + [child]
            if child == end:
                return paths[child]
        return None

    def get_neighbours(self, word):
        '''Gets every word one letter away from the a given word.

        We do not keep these words in memory because bfs accesses 
        a given vertex only once.
        '''
        neighbours = []

        p_word = ['^' + word[0:i] + '\w' + word[i+1:] + '$' 
            for i, w in enumerate(word)]
        p_word = '|'.join(p_word)

        for w in self.words:
            if w != word and re.match(p_word, w, re.I|re.U):
                neighbours += [w]
        return neighbours

    def read_dict(self, size):
        '''Loads every word of a specific size from the dictionnary into memory.
        '''
        for l in open(self.dict_path):
            l = l.decode('latin-1').strip().lower()
            if len(l) == size:
                self.words.add(l)

if __name__ == '__main__':
    import sys
    if len(sys.argv) not in [3, 4]:
        print 'Usage: python one_letter_game.py start_word end_word'
    else:
        g = OneLetterGame(dict_path = '/usr/share/dict/words')
        try:
            g.run(*sys.argv[1:])
        except AssertionError, e:
            print e

感谢大家的精彩回答。我觉得真正让我困惑的是，我每次都要遍历字典中的所有单词来考虑可能的单词邻居。其实我可以使用反向索引，正如Duncan和Matt Anderson所指出的那样。A*算法也肯定会有所帮助。非常感谢，现在我知道我哪里做错了。

这里是使用反向索引的相同代码：

import collections
import functools
import re

def time_func(func):
    import time

    def wrapper(*args, **kwargs):
        start = time.time()
        res = func(*args, **kwargs)
        timed = time.time() - start

        setattr(wrapper, 'time_taken', timed)
        return res

    functools.update_wrapper(wrapper, func)
    return wrapper

class OneLetterGame:
    def __init__(self, dict_path):
        self.dict_path = dict_path
        self.words = {}

    def run(self, start_word, end_word):
        '''Runs the one letter game with the given start and end words.
        '''
        assert len(start_word) == len(end_word), \
            'Start word and end word must of the same length.'

        self.read_dict(len(start_word))

        path = self.shortest_path(start_word, end_word)
        if not path:
            print 'There is no path between %s and %s (took %.2f sec.)' % (
                start_word, end_word, self.shortest_path.time_taken)
        else:
            print 'The shortest path (found in %.2f sec.) is:\n=> %s' % (
                self.shortest_path.time_taken, ' -- '.join(path))

    def _bfs(self, start):
        '''Implementation of breadth first search as a generator.

        The portion of the graph to explore is given on demand using get_neighboors.
        Care was taken so that a vertex / node is explored only once.
        '''
        queue = collections.deque([(None, start)])
        inqueue = set([start])

        while queue:
            parent, node = queue.popleft()
            yield parent, node

            new = set(self.get_neighbours(node)) - inqueue
            inqueue = inqueue | new
            queue.extend([(node, child) for child in new])

    @time_func
    def shortest_path(self, start, end):
        '''Returns the shortest path from start to end using bfs.
        '''
        assert self.in_dictionnary(start), 'Start word not in dictionnary.'
        assert self.in_dictionnary(end), 'End word not in dictionnary.'

        paths = {None: []}
        for parent, child in self._bfs(start):
            paths[child] = paths[parent] + [child]
            if child == end:
                return paths[child]
        return None

    def in_dictionnary(self, word):
        for s in self.get_steps(word):
            if s in self.words:
                return True
        return False

    def get_neighbours(self, word):
        '''Gets every word one letter away from the a given word.
        '''
        for step in self.get_steps(word):
            for neighbour in self.words[step]:
                yield neighbour

    def get_steps(self, word):
        return (word[0:i] + '*' + word[i+1:] 
            for i, w in enumerate(word))

    def read_dict(self, size):
        '''Loads every word of a specific size from the dictionnary into an inverted index.
        '''
        for w in open(self.dict_path):
            w = w.decode('latin-1').strip().lower()
            if len(w) != size:
                continue
            for step in self.get_steps(w):
                if step not in self.words:
                    self.words[step] = [] 
                self.words[step].append(w)

if __name__ == '__main__':
    import sys
    if len(sys.argv) not in [3, 4]:
        print 'Usage: python one_letter_game.py start_word end_word'
    else:
        g = OneLetterGame(dict_path = '/usr/share/dict/words')
        try:
            g.run(*sys.argv[1:])
        except AssertionError, e:
            print e

还有一个时间比较：

% python one_letter_game_old.py happy hello 找到的最短路径（耗时 91.57秒）是：
=> happy -- harpy -- harps -- harts -- halts -- halls -- hells -- hello

% python one_letter_game.py happy hello 找到的最短路径（耗时 1.71秒）是：
=> happy -- harpy -- harps -- harts -- halts -- halls -- hells -- hello

字符串操作 A*算法广度优先搜索最短路径反向索引命令行输入字母游戏单词字典

字母游戏问题？

5 个回答

撰写回答