遍历多个文本文件并比较

def Cheaters(): file = open("roster.txt", "r") L = [] for i in file: new = [i[:-1], ".txt"] new2 = "".join(new) if i not in L: L.append(new2) for j in L: try: file2 = open(j, "r") for n in file2: for m in file2: if n == m: print("Cheated") except: print("No work submitted")

1条回答

网友

1楼 · 发布于 2024-04-28 03:37:40

试试这个。您可能需要为您的文件结构修改它，但它应该是关闭的。他说

import re
from itertools import product

def hash_sentences(document):
    # remove all characters except those below, replace with a space
    # split into a list
    cleaned_text = re.sub(r'[^A-z0-9,;:\.\?! ]', ' ', document)
    sentences = re.split(r'[\?.!\.]', cleaned_text)

    # the less than 5 removes short sentences like "Dr."
    # return a hash of the sentences for comparison
    return [hash(s.strip().lower()) for s in sentences if len(s) > 5]  

def compare_documents(doc1, doc2):
    hash1 = hash_sentences(doc1)
    hash2 = hash_sentences(doc2)
    # return the percentage of sentences of doc1 that are in doc2
    return sum((h in hash2) for h in hash1) / float(len(hash1))

# get list of document file names
with open('roster.txt', 'r') as fp:
    doc_fnames = [d+'.txt' for d in fp.readlines()]

# create dictionay of file names and content
doc_dict = {}
for fname in doc_fnames:
    try:
        with open(fname, 'r') as fp:
            doc_dict[fname] = fp.read()
    except:
        print('No submission: %s' %fname)

# iterate through the pairs of documents
for doc_pair in product(doc_dict.keys(), doc_dict.keys()):
    pct = compare_documents(doc_dict[doc_pair[0]], doc_dict[doc_pair[1]])
    print('Percentage of %s sentences in %s: %0.2f%%' %(doc_pair[0], doc_pair[1], 100*pct))

相关问题更多 >

编程相关推荐

热门问题

热门文章

遍历多个文本文件并比较

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >