提高Python谜题的性能

Question

我收到一个谜题来面试，结果昨天得知我没有被选上（因为我没能让谜题表现得很好）。我想知道有没有人能帮我解决这个谜题，让我表现得更好。这个谜题是用Python写的，虽然我一两年前上过两节Python课，但相比我之前工作过的领域（18年的嵌入式C语言），我还是比较新手。任何帮助或建议都很感激，这样我可以从中学习。这个谜题是为了面试考虑而提交的。

谜题挑战描述如下：

把“单词”看作是任何由大写字母A-Z组成的序列（不局限于“字典单词”）。对于任何包含至少两个不同字母的单词，可以用相同的字母但不同的顺序组成其他单词（例如，STATIONARILY和ANTIROYALIST这两个都是字典单词；在我们这里，“AAIILNORSTTY”也是由这两个单词的字母组成的“单词”）。我们可以给每个单词分配一个数字，基于它在按字母顺序排列的所有由相同字母组成的单词列表中的位置。生成整个单词列表并找到所需的单词是一种方法，但如果单词很长，这样做会很慢。请写一个程序，接受一个单词作为命令行参数，并将其编号打印到标准输出。不要使用上述生成整个列表的方法。你的程序应该能够接受任何长度不超过20个字母的单词（可能有一些字母重复），并且使用的内存不超过1GB，运行时间不超过500毫秒。我们检查的任何答案都适合64位整数。

示例单词及其排名：
ABAB = 2
AAAB = 1
BAAA = 4
QUESTION = 24572
BOOKKEEPER = 10743
NONINTUITIVENESS = 8222334634
你的程序将根据运行速度和代码的清晰度进行评判。我们会运行你的程序，也会阅读源代码，所以你能做的任何让这个过程更简单的事情都会受到欢迎。

运行这个谜题的方法：你可以在命令行输入一个单词（这是当前的状态），或者如果你想从文件中读取上面提供的单词，可以注释掉raw_input以接受一个单词，然后取消注释那段代码来读取words.txt文件。

在程序的主要部分：

从命令行逐个输入单词 - 当前代码状态 - 将接受你的命令行单词输入 getInputFromCommandLine() --这样运行：命令行：python athenaPuzzleIterDeep.py

如果你想从words.txt文件读取输入，请取消注释以下内容，words.txt将与代码一起提供 --这样运行：命令行：python athenaPuzzleIterDeep.py --但你还必须确保words.txt文件与Python程序在同一目录下 wordList = loadWords()

wordNumberOrdering(wordList)

尝试过的性能提升方法，但效果不够好：迭代加深：尝试使用迭代加深来结合深度优先搜索（DFS）的空间优势和广度优先搜索（BFS）的时间和浅层解决方案优势。因此可以尝试在深度限制下运行DFS：先试深度为1，然后是2、3……等等。所以不构建整个图，而是在每个树的层级调用DFS来查看是否找到了解决方案。DFS会优先搜索树的左侧子节点，但最终会搜索每个节点，所以耗时较长而占用空间不多。然而，如果你使用BFS的层级限制思想，只按层构建树，然后用DFS搜索，这就是迭代加深的思路。

迭代加深并没有提供所需的性能提升。我还尝试引入优先队列的Python库，但在我的Linux版本上无法正确安装。

Words.txt文件内容：

ABAB
AAAB
BAAA
QUESTION
ABCDEFGHIJKLMNOPQRSTUVWXYZ
BOOKKEEPER
BOOKKEEPERS
STATIONARILY
NONINTUITIVENESS

这是代码：

import random
import string
from math import factorial
import itertools
from functools import update_wrapper
import time
import sys

sys.setrecursionlimit(5000)
# works for functions with hashable (immuatble) arguments
# Example usage: permutations = memoize(itertools.permutations)

ALPHABET_LETTERS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

globalMemCache = {}
def memoize(f):
# define "wrapper" function that checks cache for
# previously computed answer, only calling f if this
# is a new problem.
    def memf(*x):
        permutationsInCache = []
        if x not in memf.cache:
            memf.cache[x] = f(*x)
        return memf.cache[x]

     # initialize wrapper function's cache.  store cache as
     # attribute of function so we can look at its value.
     memf.cache = globalMemCache
     return memf

def isValidWord(word):
    lenWord = len(word)
    if (lenWord > 20):
        print "word > 20 letters is NOT acceptable as input"
        print " "
        return False
    elif (lenWord >= 11):
        print "word >= 11 letters is NOT acceptable as input for this current iterative deepening solution"
        print "my iterative deepening solution takes too much time and space for words >= 11 letters"
        print " "
        return False

    wordInAlphabet = True
    for letter in word:
        if (wordInAlphabet != True) or (letter not in ALPHABET_LETTERS):
            wordInAlphabet = False

    return wordInAlphabet

permutationsMemoized = memoize(itertools.permutations)

WORDLIST_FILENAME = "words.txt"

def loadWords():
    print "Loading word list from file..."
    inFile = open(WORDLIST_FILENAME, 'r', 0)
    wordList = []
    for line in inFile:
        wordList.append(line.strip().lower())
    print "  ", len(wordList), "words loaded."
    return wordList

def remove_duplicates(l):
    return list(set(l))

def printPath(path):
    result = ''
    for i in range(len(path)):
        if i == len(path) - 1:
            result = result + str(path[i])
        else:
            result = result + str(path[i]) + '->'
    return result

class Node(object):
    def __init__(self, name, index):
        self.name = str(name)
        self.index = index
    def getName(self):
        return self.name
    def getIndex(self):
        return self.index
    def __str__(self):
        return self.name

class Edge(object):
    def __init__(self, src, dest):
        self.src = src
        self.dest = dest
    def getSource(self):
        return self.src
    def getDestination(self):
        return self.dest
    def __str__(self):
        return str(self.src) + '->' + str(self.dest)

class Queue:
    def __init__(self):
        self.list = []

    def push(self,item):
        self.list.insert(0,item)

    def pop(self):
        return self.list.pop()

    def isEmpty(self):
        return len(self.list) == 0

def DFSShortest(graph, start, end, path = [], shortest = None, index = 1000):
    newGraph = graph
    path = path + [start]
    if str(start) == str(end):
        index = start.index
        newPath = path
        return newPath,index
    else:
        anyChildren = graph.childrenOf(start)
        if (anyChildren != None) and (index == 1000):
            for node in graph.childrenOf(start):
                if node not in path: #avoid cycles
                    if (shortest == None) or  ( (shortest != None) and (len(path) < len(shortest))) :
                        newPath,index = DFSShortest(newGraph,node,end,path)
                        if newPath != None:
                            shortest = newPath
                        if (index != 1000):
                            return shortest,index

        elif (anyChildren == None) and (index == 1000):
            newPath,index = DFSShortest(newGraph,graph.parents[start],end,path)
            if newPath != None:
                shortest = newPath
            if (index != 1000):
                return shortest,index

    return shortest,index

def BFS(graph, start, end, q):
    initPath = [start]
    q.append(initPath)
    while len(q) != 0:
        tmpPath = q.pop(0)
        lastNode = tmpPath[len(tmpPath) - 1]
        if str(lastNode) == str(end):
            return lastNode.index
        if (graph.childrenOf(lastNode) != []):
            printPath(graph.childrenOf(lastNode))
            for linkNode in graph.childrenOf(lastNode):
                if linkNode not in tmpPath:
                    newPath = tmpPath + [linkNode]
                    q.append(newPath)
    return None

class Digraph(object):
    def __init__(self):
        self.nodes = set([])
        self.edges = {}
        self.parents = {}
    def addNode(self, node):
        if node in self.nodes:
            raise ValueError('Duplicate node')
        else:
            self.nodes.add(node)
            self.edges[node] = []
            #print "added edges = [] for node " + str(node)
    def addEdge(self, edge):
        src = edge.getSource()
        dest = edge.getDestination()
        self.edges[src].append(dest)
        self.parents[dest] = src
    def childrenOf(self, node):
        if (self.edges[node]):
            return self.edges[node]   
        else:
            return None 
    def hasNode(self, node):
        return node in self.nodes
    def __str__(self):
        res = ''
        for k in self.edges:
            for d in self.edges[k]:
                res = res + str(k) + '->' + str(d) + '\n'
        return res[:-1]

class Graph(Digraph):
    def addEdge(self, edge):
        Digraph.addEdge(self, edge)

def addEdgesForTreesWith4Nodes(g,childNode,factorNum,i,lenList,wordNodes):
    if (i + factorNum + 1) < lenList:
        g.addEdge(Edge(wordNodes[childNode + 1],wordNodes[i + factorNum + 1]))
    if (i + factorNum + 2) < lenList:
        g.addEdge(Edge(wordNodes[childNode + 1],wordNodes[i + factorNum + 2]))

def addEdgesForTreesWithMoreThan4Nodes(g,childNode,factorNum,i,lenList,wordNodes, numChildrenNodesThisLevel, numChildrenNodesPreviousLevel):
    if (i + factorNum + numChildrenNodesPreviousLevel) < lenList:
        g.addEdge(Edge(wordNodes[childNode + i],wordNodes[i + factorNum + numChildrenNodesPreviousLevel]))
    if (i + factorNum + numChildrenNodesThisLevel + 1) < lenList:
        g.addEdge(Edge(wordNodes[childNode + i],wordNodes[i + factorNum + numChildrenNodesPreviousLevel + 1]))

"""
Can try using iterative deepening to get the DFS space advantage with BFS's time and shallow
solution advantage.  So can try running DFS with depth limits: try depth of tree = 1, then 2, 3,...etc
"""
"""
Also - you can avoid the log(n) overhead in DFS/BFS with a priority queue (had trouble downloaded and installing on my computer!)
"""
def iterativeDeepeningSolution(wordNodes, saveWord, saveWordTuple, lenList):
#rather than building entire graph, at each tree level, call DFS to see if solution found
#DFS will search going down left side of tree's child nodes first, but will eventually search
#every node, so takes too much time while not taking much space.  However, if you use the level
#limitation idea from BFS, only building the tree level by level and then searching it with DFS,
#that is the idea of iterative deepening.
    index = 0
    q = []
    shortest = None
    saveNodes = wordNodes

    i = 0
    totalNodes = 1
    numChildrenNodesPreviousLevel = 0

    while i < lenList:
        index = 0
        if (i > 0):
            numChildrenNodesPreviousLevel = numChildrenNodesThisLevel

        numChildrenNodesThisLevel = 2**i #number of children nodes at level

        if (i > 0):
            totalNodes += numChildrenNodesThisLevel

        if (numChildrenNodesThisLevel > 1) and (numChildrenNodesThisLevel <= 32): #only search 32 children nodes or less (level 5 of tree, 2**5 = 32):
            #print "build graph - if previous level already searched - just add this level of children nodes"
            if (numChildrenNodesThisLevel == 2): #new graph since none built when it was just a root node
                g = Graph()
                for n in range(numChildrenNodesThisLevel + 1):
                    g.addNode(wordNodes[n])
            else: #use graph from last level of children added - don't rebuild graph
                n = numChildrenNodesThisLevel - 1
                while (n < lenList) and (n < (totalNodes)):
                    g.addNode(wordNodes[n])
                    n += 1

        elif (numChildrenNodesThisLevel > 32): #only search 32 children nodes or less (level 5 of tree, 2**5 = 32)
            print "word graph just searched: " + str(saveWord)
            print "cannot go further searching in iterative deepening - tree will take too much space and time to search"
            print "Tree Level = " + str(i) + " num children at this level " + str(numChildrenNodesThisLevel) + " total nodes in graph " + str(totalNodes)
            print "Last Level Searched " + str(i - 1) + " num children at this level " + str(numChildrenNodesPreviousLevel) + " total nodes in graph " + str(totalNodes - numChildrenNodesThisLevel)
            print " "
            return

        if (numChildrenNodesThisLevel > 2):
            childNode = 0
            while childNode < numChildrenNodesPreviousLevel:
                if (childNode > 0):
                    factorNum = childNode * 2
                else:
                    factorNum = childNode
                if (numChildrenNodesThisLevel == 4):
                    addEdgesForTreesWith4Nodes(g,childNode,factorNum,i,lenList,wordNodes)
                elif (numChildrenNodesThisLevel > 4):                  addEdgesForTreesWithMoreThan4Nodes(g,childNode,factorNum,i,lenList,wordNodes,numChildrenNodesThisLevel,numChildrenNodesPreviousLevel)
                childNode += 1
            startNode = wordNodes[0]
            endNode = Node(str(saveWordTuple),0)
            index = 1000
            path,index = DFSShortest(g, startNode, endNode, q, shortest, index)
            if (index != 1000): #made up error - not searching 1000 nodes or more at this time - soln found
                print saveWord + " = " + str(index + 1)
                print " "
                return
            i += 1
            wordNodes = saveNodes

        elif (numChildrenNodesThisLevel == 2): #so new graph just formed of 3 nodes (including root) - no edges on it yet
        g.addEdge(Edge(wordNodes[0],wordNodes[1]))
        g.addEdge(Edge(wordNodes[0],wordNodes[2]))

        startNode = wordNodes[0]
        endNode = Node(str(saveWordTuple),0)
        index = 1000
        path,index = DFSShortest(g, startNode, endNode, q, shortest, index)
        if (index != 1000): #made up error - not searching 1000 nodes or more at this time - soln found
            print saveWord + " = " + str(index + 1)
            print " "
            return
        i += 1
        wordNodes = saveNodes
    elif (numChildrenNodesThisLevel == 1):
        startNode = wordNodes[0]
        oneNode = Node(str(saveWordTuple),0)
        if str(oneNode) == str(startNode):
            print saveWord + " = " + str(startNode.index + 1)
            print " "
            return
        else:
            i += 1
            wordNodes = saveNodes

def wordNumberOrdering(wordList):

    for word in wordList:
        permutationTuples = []
        withDupsList = []
        noDupsList = []
        noDupsStringList = []
        index = 0
        outputDict = {}
        saveWord = ""
        saveWordTuple = []

        wordLen = len(word)
        if (wordLen <= 10):
            saveWord = word
            saveWordTuple = tuple(saveWord,)
            permutationTuples = permutationsMemoized(word)
            for tupleStr in permutationTuples:
                withDupsList.append(tupleStr)
            noDupsList = remove_duplicates(withDupsList)
            lenList = len(noDupsList) 

            noDupsList.sort()
            wordNodes = []
            i = 0
            for name in noDupsList:
                wordNodes.append(Node(str(name),i))
                i += 1 #index of list to print when found for this puzzle
            iterativeDeepeningSolution(wordNodes, saveWord, saveWordTuple, lenList)

        elif (wordLen > 20):
            print word
            print "word length too long (> 20 chars): " + str(wordLen) 
            print " "
        elif (wordLen >= 11):
            print word
            print "word length too long for this current solution to puzzle (>= 11 chars): " + str(wordLen) 
            print " "

def oneWordInputFromCommandLineAtATime(word):
    permutationTuples = []
    withDupsList = []
    noDupsList = []
    noDupsStringList = []
    index = 0
    outputDict = {}
    saveWord = ""
    saveWordTuple = []

    saveWord = word
    saveWordTuple = tuple(saveWord,)
    permutationTuples = permutationsMemoized(word)

    for tupleStr in permutationTuples:
        withDupsList.append(tupleStr)
    noDupsList = remove_duplicates(withDupsList)
    lenList = len(noDupsList) 

    noDupsList.sort()
    wordNodes = []
    i = 0
    for name in noDupsList:
        wordNodes.append(Node(str(name),i))
        i += 1 #index of list to print when found for this puzzle
    iterativeDeepeningSolution(wordNodes, saveWord, saveWordTuple, lenList)

def getInputFromCommandLine():
    guessWord = ""
    guessWordLowCase = ""
    validWord = False

    takeInput = True
    while (takeInput == True):
        guessWord = raw_input('Enter word, or a "." to indicate that you are finished: ').decode('utf-8')
        guessWordLowCase = guessWord.lower()
        print "word being considered " + guessWordLowCase
        if (guessWordLowCase == "."):
            takeInput = False
        else: #otherwise consider this word as an input from command line
            validWord = isValidWord(guessWordLowCase)
            if (validWord == False):
                guessWordLowCase + " is INVALID"
                print "Invalid word, please try again"
                print "                                                "
            else:
                oneWordInputFromCommandLineAtATime(guessWordLowCase)
    print "Goodbye!"

if __name__ == '__main__':
#taking input word by word from command line
    getInputFromCommandLine()
#uncomment the following if you want to take the input from words.txt, a file of words to read in instead
    #wordList = loadWords()

    #wordNumberOrdering(wordList)

数据结构排序算法算法优化复杂度分析深度优先搜索广度优先搜索性能评估迭代加深

提高Python谜题的性能

3 个回答

撰写回答