import os
def get_files(search_path):
for (dirpath, _, filenames) in os.walk(search_path):
for filename in filenames:
yield os.path.join(dirpath, filename)
list_files = get_files('.')
for filename in list_files:
print(filename)
def file_filter(filename, radical='', extension=''):
"Check if a filename matches a radical and extension"
if not filename:
return False
filename = filename.strip()
return(filename.startswith(radical) and filename.endswith(extension))
def dir_filter(dirname='', radical='', extension=''):
"Filter filenames in directory according to radical and extension"
if not dirname:
dirname = '.'
return [filename for filename in os.listdir(dirname)
if file_filter(filename, radical, extension)]
def list_files(path):
# returns a list of names (with extension, without full path) of all files
# in folder path
files = []
for name in os.listdir(path):
if os.path.isfile(os.path.join(path, name)):
files.append(name)
return files
import glob
txtfiles = []
for file in glob.glob("*.txt"):
txtfiles.append(file)
glob with list comprehension
import glob
mylist = [f for f in glob.glob("*.txt")]
glob with a function
import glob
def filebrowser():
return [f for f in glob.glob("*")]
x = filebrowser()
print(x)
>>> ['example.txt', 'fb.py', 'filebrowser.py', 'help']
glob extending the previous code
函数现在返回与作为参数传递的字符串匹配的文件列表
import glob
def filebrowser(word=""):
"""Returns a list with all files with the word/extension in it"""
file = []
for f in glob.glob("*"):
if word in f:
file.append(f)
return file
flist = filebrowser("example")
print(flist)
flist = filebrowser(".py")
print(flist)
>>> ['example.txt']
>>> ['fb.py', 'filebrowser.py']
import os
files_path = [os.path.abspath(x) for x in os.listdir()]
print(files_path)
>>> ['F:\\documenti\applications.txt', 'F:\\documenti\collections.txt']
Get the full path name of a type of file into all subdirectories with walk
我发现这对在许多目录中查找内容非常有用,它帮助我找到了一个我不记得名字的文件:
import os
# Getting the current work directory (cwd)
thisdir = os.getcwd()
# r=root, d=directories, f = files
for r, d, f in os.walk(thisdir):
for file in f:
if ".docx" in file:
print(os.path.join(r, file))
os.listdir(): get files in the current directory (Python 2)
Get files of a particular subdirectory with os.listdir()
import os
x = os.listdir("./content")
os.walk('.') - current directory
import os
arr = next(os.walk('.'))[2]
print(arr)
>>> ['5bs_Turismo1.pdf', '5bs_Turismo1.pptx', 'esperienza.txt']
next(os.walk('.')) and os.path.join('dir', 'file')
import os
arr = []
for d,r,f in next(os.walk("F:\\_python")):
for file in f:
arr.append(os.path.join(r,file))
for f in arr:
print(files)
>>> F:\\_python\\dict_class.py
>>> F:\\_python\\programmi.txt
next(os.walk('F:\\') - get the full path - list comprehension
[os.path.join(r,file) for r,d,f in next(os.walk("F:\\_python")) for file in f]
>>> ['F:\\_python\\dict_class.py', 'F:\\_python\\programmi.txt']
os.walk - get full path - all files in sub dirs**
x = [os.path.join(r,file) for r,d,f in os.walk("F:\\_python") for file in f]
print(x)
>>> ['F:\\_python\\dict.py', 'F:\\_python\\progr.txt', 'F:\\_python\\readl.py']
os.listdir() - get only txt files
arr_txt = [x for x in os.listdir() if x.endswith(".txt")]
print(arr_txt)
>>> ['work.txt', '3ebooks.txt']
Using glob to get the full path of the files
如果我需要文件的绝对路径:
from path import path
from glob import glob
x = [path(f).abspath() for f in glob("F:\\*.txt")]
for f in x:
print(f)
>>> F:\acquistionline.txt
>>> F:\acquisti_2018.txt
>>> F:\bootstrap_jquery_ecc.txt
Using os.path.isfile to avoid directories in the list
import os.path
listOfFiles = [f for f in os.listdir() if os.path.isfile(f)]
print(listOfFiles)
>>> ['a simple game.py', 'data.txt', 'decorator.py']
Using pathlib from Python 3.4
import pathlib
flist = []
for p in pathlib.Path('.').iterdir():
if p.is_file():
print(p)
flist.append(p)
>>> error.PNG
>>> exemaker.bat
>>> guiprova.mp3
>>> setup.py
>>> speak_gui2.py
>>> thumb.PNG
使用list comprehension:
flist = [p for p in pathlib.Path('.').iterdir() if p.is_file()]
或者,使用pathlib.Path()而不是pathlib.Path(".")
Use glob method in pathlib.Path()
import pathlib
py = pathlib.Path().glob("*.py")
for file in py:
print(file)
>>> stack_overflow_list.py
>>> stack_overflow_list_tkinter.py
Get all and only files with os.walk
import os
x = [i[2] for i in os.walk('.')]
y=[]
for t in x:
for f in t:
y.append(f)
print(y)
>>> ['append_to_list.py', 'data.txt', 'data1.txt', 'data2.txt', 'data_180617', 'os_walk.py', 'READ2.py', 'read_data.py', 'somma_defaltdic.py', 'substitute_words.py', 'sum_data.py', 'data.txt', 'data1.txt', 'data_180617']
Get only files with next and walk in a directory
import os
x = next(os.walk('F://python'))[2]
print(x)
>>> ['calculator.bat','calculator.py']
Get only directories with next and walk in a directory
import os
next(os.walk('F://python'))[1] # for the current dir use ('.')
>>> ['python3','others']
Get all the subdir names with walk
for r,d,f in os.walk("F:\\_python"):
for dirs in d:
print(dirs)
>>> .vscode
>>> pyexcel
>>> pyschool.py
>>> subtitles
>>> _metaprogramming
>>> .ipynb_checkpoints
os.scandir() from Python 3.5 and greater
import os
x = [f.name for f in os.scandir() if f.is_file()]
print(x)
>>> ['calculator.bat','calculator.py']
# Another example with scandir (a little variation from docs.python.org)
# This one is more efficient than os.listdir.
# In this case, it shows the files only in the current directory
# where the script is executed.
import os
with os.scandir() as i:
for entry in i:
if entry.is_file():
print(entry.name)
>>> ebookmaker.py
>>> error.PNG
>>> exemaker.bat
>>> guiprova.mp3
>>> setup.py
>>> speakgui4.py
>>> speak_gui2.py
>>> speak_gui3.py
>>> thumb.PNG
Examples:
Ex. 1: How many files are there in the subdirectories?
在本例中,我们查找包含在所有目录及其子目录中的文件数。
import os
def count(dir, counter=0):
"returns number of files in dir and subdirs"
for pack in os.walk(dir):
for f in pack[2]:
counter += 1
return dir + " : " + str(counter) + "files"
print(count("F:\\python"))
>>> 'F:\\\python' : 12057 files'
Ex.2: How to copy all files from a directory to another?
在计算机中排序的脚本,用于查找某一类型(默认值:pptx)的所有文件并将其复制到新文件夹中。
import os
import shutil
from path import path
destination = "F:\\file_copied"
# os.makedirs(destination)
def copyfile(dir, filetype='pptx', counter=0):
"Searches for pptx (or other - pptx is the default) files and copies them"
for pack in os.walk(dir):
for f in pack[2]:
if f.endswith(filetype):
fullpath = pack[0] + "\\" + f
print(fullpath)
shutil.copy(fullpath, destination)
counter += 1
if counter > 0:
print('-' * 30)
print("\t==> Found in: `" + dir + "` : " + str(counter) + " files\n")
for dir in os.listdir():
"searches for folders that starts with `_`"
if dir[0] == '_':
# copyfile(dir, filetype='pdf')
copyfile(dir, filetype='txt')
>>> _compiti18\Compito Contabilità 1\conti.txt
>>> _compiti18\Compito Contabilità 1\modula4.txt
>>> _compiti18\Compito Contabilità 1\moduloa4.txt
>>> ------------------------
>>> ==> Found in: `_compiti18` : 3 files
Ex. 3: How to get all the files in a txt file
如果要创建包含所有文件名的txt文件:
import os
mylist = ""
with open("filelist.txt", "w", encoding="utf-8") as file:
for eachfile in os.listdir():
mylist += eachfile + "\n"
file.write(mylist)
Example: txt with all the files of an hard drive
"""
We are going to save a txt file with all the files in your directory.
We will use the function walk()
"""
import os
# see all the methods of os
# print(*dir(os), sep=", ")
listafile = []
percorso = []
with open("lista_file.txt", "w", encoding='utf-8') as testo:
for root, dirs, files in os.walk("D:\\"):
for file in files:
listafile.append(file)
percorso.append(root + "\\" + file)
testo.write(file + "\n")
listafile.sort()
print("N. of files", len(listafile))
with open("lista_file_ordinata.txt", "w", encoding="utf-8") as testo_ordinato:
for file in listafile:
testo_ordinato.write(file + "\n")
with open("percorso.txt", "w", encoding="utf-8") as file_percorso:
for file in percorso:
file_percorso.write(file + "\n")
os.system("lista_file.txt")
os.system("lista_file_ordinata.txt")
os.system("percorso.txt")
All the file of C:\ in one text file
This is a shorter version of the previous code. Change the folder where to start finding the files if you need to start from another position. This code generate a 50 mb on text file on my computer with something less then 500.000 lines with files with the complete path.
import os
with open("file.txt", "w", encoding="utf-8") as filewrite:
for r, d, f in os.walk("C:\\"):
for file in f:
filewrite.write(f"{r + file}\n")
How to write a file with all paths in a folder of a type
import os
def searchfiles(extension='.ttf', folder='H:\\'):
"Create a txt file with all the file of a type"
with open(extension[1:] + "file.txt", "w", encoding="utf-8") as filewrite:
for r, d, f in os.walk(folder):
for file in f:
if file.endswith(extension):
filewrite.write(f"{r + file}\n")
# looking for png file (fonts) in the hard disk H:\
searchfiles('.png', 'H:\\')
>>> H:\4bs_18\Dolphins5.png
>>> H:\4bs_18\Dolphins6.png
>>> H:\4bs_18\Dolphins7.png
>>> H:\5_18\marketing html\assets\imageslogo2.png
>>> H:\7z001.png
>>> H:\7z002.png
(New) Find all files and open them with tkinter GUI
I just wanted to add in this 2019 a little app to search for all files in a dir and be able to open them by doubleclicking on the name of the file in the list.
import tkinter as tk
import os
def searchfiles(extension='.txt', folder='H:\\'):
"insert all files in the listbox"
for r, d, f in os.walk(folder):
for file in f:
if file.endswith(extension):
lb.insert(0, r + "\\" + file)
def open_file():
os.startfile(lb.get(lb.curselection()[0]))
root = tk.Tk()
root.geometry("400x400")
bt = tk.Button(root, text="Search", command=lambda:searchfiles('.png', 'H:\\'))
bt.pack()
lb = tk.Listbox(root)
lb.pack(fill="both", expand=1)
lb.bind("<Double-Button>", lambda x: open_file())
root.mainloop()
from glob import glob
# Return everything under C:\Users\admin that contains a folder called wlp.
glob('C:\Users\admin\*\wlp')
上面的情况很糟糕-路径已经被硬编码,并且只能在驱动器名和被硬编码到路径中的\之间的窗口上工作。
from glob import glob
from os.path import join
# Return everything under Users, admin, that contains a folder called wlp.
glob(join('Users', 'admin', '*', 'wlp'))
from glob import glob
from os.path import expanduser, join
# Return everything under the user directory that contains a folder called wlp.
glob(join(expanduser('~'), '*', 'wlp'))
这在所有平台上都非常有效。
另一个很好的例子,它可以完美地跨平台工作,并且可以做一些不同的事情:
from glob import glob
from os import getcwd
from os.path import join
# Return everything under the current directory that contains a folder called wlp.
glob(join(getcwd(), '*', 'wlp'))
import os
def get_filepaths(directory):
"""
This function will generate the file names in a directory
tree by walking the tree either top-down or bottom-up. For each
directory in the tree rooted at directory top (including top itself),
it yields a 3-tuple (dirpath, dirnames, filenames).
"""
file_paths = [] # List which will store all of the full filepaths.
# Walk the tree.
for root, directories, files in os.walk(directory):
for filename in files:
# Join the two strings in order to form the full filepath.
filepath = os.path.join(root, filename)
file_paths.append(filepath) # Add it to the list.
return file_paths # Self-explanatory.
# Run the above function and store its results in a variable.
full_file_paths = get_filepaths("/Users/johnny/Desktop/TEST")
import os
import fnmatch
def list_paths(folder='.', pattern='*', case_sensitive=False, subfolders=False):
"""Return a list of the file paths matching the pattern in the specified
folder, optionally including files inside subfolders.
"""
match = fnmatch.fnmatchcase if case_sensitive else fnmatch.fnmatch
walked = os.walk(folder) if subfolders else [next(os.walk(folder))]
return [os.path.join(root, f)
for root, dirnames, filenames in walked
for f in filenames if match(f, pattern)]
>>> import sys
>>> sys.version
'2.7.10 (default, Mar 8 2016, 15:02:46) [MSC v.1600 64 bit (AMD64)]'
>>> m = map(lambda x: x, [1, 2, 3]) # Just a dummy lambda function
>>> m, type(m)
([1, 2, 3], <type 'list'>)
>>> len(m)
3
>>> import sys
>>> sys.version
'3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)]'
>>> m = map(lambda x: x, [1, 2, 3])
>>> m, type(m)
(<map object at 0x000001B4257342B0>, <class 'map'>)
>>> len(m)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: object of type 'map' has no len()
>>> lm0 = list(m) # Build a list from the generator
>>> lm0, type(lm0)
([1, 2, 3], <class 'list'>)
>>>
>>> lm1 = list(m) # Build a list from the same generator
>>> lm1, type(lm1) # Empty list now - generator already consumed
([], <class 'list'>)
Return a list containing the names of the entries in the directory given by path. The list is in arbitrary order, and does not include the special entries '.' and '..' ...
>>> import os
>>> root_dir = "root_dir" # Path relative to current dir (os.getcwd())
>>>
>>> os.listdir(root_dir) # List all the items in root_dir
['dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']
>>>
>>> [item for item in os.listdir(root_dir) if os.path.isfile(os.path.join(root_dir, item))] # Filter items and only keep files (strip out directories)
['file0', 'file1']
一个更详细的示例(code\u os\u listdir.py):
import os
from pprint import pformat
def _get_dir_content(path, include_folders, recursive):
entries = os.listdir(path)
for entry in entries:
entry_with_path = os.path.join(path, entry)
if os.path.isdir(entry_with_path):
if include_folders:
yield entry_with_path
if recursive:
for sub_entry in _get_dir_content(entry_with_path, include_folders, recursive):
yield sub_entry
else:
yield entry_with_path
def get_dir_content(path, include_folders=True, recursive=True, prepend_folder_name=True):
path_len = len(path) + len(os.path.sep)
for item in _get_dir_content(path, include_folders, recursive):
yield item if prepend_folder_name else item[path_len:]
def _get_dir_content_old(path, include_folders, recursive):
entries = os.listdir(path)
ret = list()
for entry in entries:
entry_with_path = os.path.join(path, entry)
if os.path.isdir(entry_with_path):
if include_folders:
ret.append(entry_with_path)
if recursive:
ret.extend(_get_dir_content_old(entry_with_path, include_folders, recursive))
else:
ret.append(entry_with_path)
return ret
def get_dir_content_old(path, include_folders=True, recursive=True, prepend_folder_name=True):
path_len = len(path) + len(os.path.sep)
return [item if prepend_folder_name else item[path_len:] for item in _get_dir_content_old(path, include_folders, recursive)]
def main():
root_dir = "root_dir"
ret0 = get_dir_content(root_dir, include_folders=True, recursive=True, prepend_folder_name=True)
lret0 = list(ret0)
print(ret0, len(lret0), pformat(lret0))
ret1 = get_dir_content_old(root_dir, include_folders=False, recursive=True, prepend_folder_name=False)
print(len(ret1), pformat(ret1))
if __name__ == "__main__":
main()
Return an iterator of os.DirEntry objects corresponding to the entries in the directory given by path. The entries are yielded in arbitrary order, and the special entries '.' and '..' are not included.
Using scandir() instead of listdir() can significantly increase the performance of code that also needs file type or file attribute information, because os.DirEntry objects expose this information if the operating system provides it when scanning a directory. All os.DirEntry methods may perform a system call, but is_dir() and is_file() usually only require a system call for symbolic links; os.DirEntry.stat() always requires a system call on Unix but only requires one for symbolic links on Windows.
>>> import os
>>> root_dir = os.path.join(".", "root_dir") # Explicitly prepending current directory
>>> root_dir
'.\\root_dir'
>>>
>>> scandir_iterator = os.scandir(root_dir)
>>> scandir_iterator
<nt.ScandirIterator object at 0x00000268CF4BC140>
>>> [item.path for item in scandir_iterator]
['.\\root_dir\\dir0', '.\\root_dir\\dir1', '.\\root_dir\\dir2', '.\\root_dir\\dir3', '.\\root_dir\\file0', '.\\root_dir\\file1']
>>>
>>> [item.path for item in scandir_iterator] # Will yield an empty list as it was consumed by previous iteration (automatically performed by the list comprehension)
[]
>>>
>>> scandir_iterator = os.scandir(root_dir) # Reinitialize the generator
>>> for item in scandir_iterator :
... if os.path.isfile(item.path):
... print(item.name)
...
file0
file1
Generate the file names in a directory tree by walking the tree either top-down or bottom-up. For each directory in the tree rooted at directory top (including top itself), it yields a 3-tuple (dirpath, dirnames, filenames).
>>> import os
>>> root_dir = os.path.join(os.getcwd(), "root_dir") # Specify the full path
>>> root_dir
'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir'
>>>
>>> walk_generator = os.walk(root_dir)
>>> root_dir_entry = next(walk_generator) # First entry corresponds to the root dir (passed as an argument)
>>> root_dir_entry
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir', ['dir0', 'dir1', 'dir2', 'dir3'], ['file0', 'file1'])
>>>
>>> root_dir_entry[1] + root_dir_entry[2] # Display dirs and files (direct descendants) in a single list
['dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']
>>>
>>> [os.path.join(root_dir_entry[0], item) for item in root_dir_entry[1] + root_dir_entry[2]] # Display all the entries in the previous list by their full path
['E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir1', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir2', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir3', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\file0', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\file1']
>>>
>>> for entry in walk_generator: # Display the rest of the elements (corresponding to every subdir)
... print(entry)
...
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0', ['dir00', 'dir01', 'dir02'], [])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir00', ['dir000'], ['file000'])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir00\\dir000', [], ['file0000'])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir01', [], ['file010', 'file011'])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir02', ['dir020'], [])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir02\\dir020', ['dir0200'], [])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir02\\dir020\\dir0200', [], [])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir1', [], ['file10', 'file11', 'file12'])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir2', ['dir20'], ['file20'])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir2\\dir20', [], ['file200'])
('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir3', [], [])
Return a possibly-empty list of path names that match pathname, which must be a string containing a path specification. pathname can be either absolute (like /usr/src/Python-1.5/Makefile) or relative (like ../../Tools/*/*.gif), and can contain shell-style wildcards. Broken symlinks are included in the results (as in the shell). ... Changed in version 3.5: Support for recursive globs using “**”.
>>> import glob, os
>>> wildcard_pattern = "*"
>>> root_dir = os.path.join("root_dir", wildcard_pattern) # Match every file/dir name
>>> root_dir
'root_dir\\*'
>>>
>>> glob_list = glob.glob(root_dir)
>>> glob_list
['root_dir\\dir0', 'root_dir\\dir1', 'root_dir\\dir2', 'root_dir\\dir3', 'root_dir\\file0', 'root_dir\\file1']
>>>
>>> [item.replace("root_dir" + os.path.sep, "") for item in glob_list] # Strip the dir name and the path separator from begining
['dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']
>>>
>>> for entry in glob.iglob(root_dir + "*", recursive=True):
... print(entry)
...
root_dir\
root_dir\dir0
root_dir\dir0\dir00
root_dir\dir0\dir00\dir000
root_dir\dir0\dir00\dir000\file0000
root_dir\dir0\dir00\file000
root_dir\dir0\dir01
root_dir\dir0\dir01\file010
root_dir\dir0\dir01\file011
root_dir\dir0\dir02
root_dir\dir0\dir02\dir020
root_dir\dir0\dir02\dir020\dir0200
root_dir\dir1
root_dir\dir1\file10
root_dir\dir1\file11
root_dir\dir1\file12
root_dir\dir2
root_dir\dir2\dir20
root_dir\dir2\dir20\file200
root_dir\dir2\file20
root_dir\dir3
root_dir\file0
root_dir\file1
def listdir(path):
"""List directory contents, using cache."""
try:
cached_mtime, list = cache[path]
del cache[path]
except KeyError:
cached_mtime, list = -1, []
mtime = os.stat(path).st_mtime
if mtime != cached_mtime:
list = os.listdir(path)
list.sort()
cache[path] = mtime, list
return list
ctypes is a foreign function library for Python. It provides C compatible data types, and allows calling functions in DLLs or shared libraries. It can be used to wrap these libraries in pure Python.
代码类型.py:
#!/usr/bin/env python3
import sys
from ctypes import Structure, \
c_ulonglong, c_longlong, c_ushort, c_ubyte, c_char, c_int, \
CDLL, POINTER, \
create_string_buffer, get_errno, set_errno, cast
DT_DIR = 4
DT_REG = 8
char256 = c_char * 256
class LinuxDirent64(Structure):
_fields_ = [
("d_ino", c_ulonglong),
("d_off", c_longlong),
("d_reclen", c_ushort),
("d_type", c_ubyte),
("d_name", char256),
]
LinuxDirent64Ptr = POINTER(LinuxDirent64)
libc_dll = this_process = CDLL(None, use_errno=True)
# ALWAYS set argtypes and restype for functions, otherwise it's UB!!!
opendir = libc_dll.opendir
readdir = libc_dll.readdir
closedir = libc_dll.closedir
def get_dir_content(path):
ret = [path, list(), list()]
dir_stream = opendir(create_string_buffer(path.encode()))
if (dir_stream == 0):
print("opendir returned NULL (errno: {:d})".format(get_errno()))
return ret
set_errno(0)
dirent_addr = readdir(dir_stream)
while dirent_addr:
dirent_ptr = cast(dirent_addr, LinuxDirent64Ptr)
dirent = dirent_ptr.contents
name = dirent.d_name.decode()
if dirent.d_type & DT_DIR:
if name not in (".", ".."):
ret[1].append(name)
elif dirent.d_type & DT_REG:
ret[2].append(name)
dirent_addr = readdir(dir_stream)
if get_errno():
print("readdir returned NULL (errno: {:d})".format(get_errno()))
closedir(dir_stream)
return ret
def main():
print("{:s} on {:s}\n".format(sys.version, sys.platform))
root_dir = "root_dir"
entries = get_dir_content(root_dir)
print(entries)
if __name__ == "__main__":
main()
Retrieves a list of matching filenames, using the Windows Unicode API. An interface to the API FindFirstFileW/FindNextFileW/Find close functions.
>>> import os, win32file, win32con
>>> root_dir = "root_dir"
>>> wildcard = "*"
>>> root_dir_wildcard = os.path.join(root_dir, wildcard)
>>> entry_list = win32file.FindFilesW(root_dir_wildcard)
>>> len(entry_list) # Don't display the whole content as it's too long
8
>>> [entry[-2] for entry in entry_list] # Only display the entry names
['.', '..', 'dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']
>>>
>>> [entry[-2] for entry in entry_list if entry[0] & win32con.FILE_ATTRIBUTE_DIRECTORY and entry[-2] not in (".", "..")] # Filter entries and only display dir names (except self and parent)
['dir0', 'dir1', 'dir2', 'dir3']
>>>
>>> [os.path.join(root_dir, entry[-2]) for entry in entry_list if entry[0] & (win32con.FILE_ATTRIBUTE_NORMAL | win32con.FILE_ATTRIBUTE_ARCHIVE)] # Only display file "full" names
['root_dir\\file0', 'root_dir\\file1']
将返回“somedirectory”中所有文件和目录的列表。
由于版本3.4,有内置的迭代器,比^{} 更有效:
^{} :在3.4版中是新的。
根据PEP 428,该^{} 库的目标是提供一个简单的类层次结构来处理文件系统路径和用户对它们执行的常见操作。
^{} :在3.5版中是新的。
注意^{} 使用^{} 而不是3.5版的^{} ,其速度根据PEP 471增加了2-20倍。
我也推荐你阅读下面的暗影游侠评论。
使用生成器
为了获得更好的结果,可以将
os
模块的listdir()
方法与生成器一起使用(生成器是保持其状态的强大迭代器,记得吗?)。以下代码对这两个版本都适用:Python 2和Python 3。这里有一个代码:
listdir()
方法返回给定目录的条目列表。如果给定项是文件,则方法os.path.isfile()
返回True
。并且yield
运算符退出func,但保持其当前状态,并且只返回作为文件检测的条目的名称。所有这些都允许我们在生成器函数上循环。一位聪明的老师曾经告诉我:
因此,我将为这个问题的一个子集添加一个解决方案:通常,我们只想检查一个文件是否匹配开始字符串和结束字符串,而不想进入子目录。因此,我们需要一个返回文件名列表的函数,例如:
如果希望首先声明两个函数,可以执行以下操作:
这个解决方案可以很容易地用正则表达式进行泛化(如果不希望模式总是停留在文件名的开头或结尾,您可能需要添加一个
pattern
参数)。返回绝对文件路径列表,不会递归到子目录中
我将提供一个示例一行程序,其中可以提供sourcepath和文件类型作为输入。代码返回一个扩展名为csv的文件名列表。使用。以防需要返回所有文件。这也将递归地扫描子目录。
[y for x in os.walk(sourcePath) for y in glob(os.path.join(x[0], '*.csv'))]
根据需要修改文件扩展名和源路径。
如果您正在寻找find的Python实现,这是我经常使用的一个方法:
所以我用它做了一个PyPIpackage,还有一个GitHub repository。我希望有人发现它可能对这段代码有用。
只获取文件列表的单行解决方案(无子目录):
或绝对路径名:
如何获取当前目录中的所有文件(和目录)(Python3)
下面是在Python 3中使用
os
和listdir()
函数仅检索当前目录中文件的简单方法。进一步的探索,将演示如何返回目录中的文件夹,但子目录中没有该文件,因此可以使用walk-discusted稍后)。我发现glob更容易选择同一类型的文件或有一些共同点的文件。看下面的例子:
函数现在返回与作为参数传递的字符串匹配的文件列表
正如您注意到的,在上面的代码中没有文件的完整路径。如果需要绝对路径,可以使用
os.path
模块的另一个名为_getfullpathname
的函数,将从os.listdir()
获取的文件作为参数。还有其他方法可以获得完整路径,我们稍后会检查(我按照mexmex的建议,将getfullpathname替换为abspath
)。我发现这对在许多目录中查找内容非常有用,它帮助我找到了一个我不记得名字的文件:
在Python2中,如果想要当前目录中的文件列表,则必须在os.listdir方法中将参数指定为“.”或os.getcwd()。
如果我需要文件的绝对路径:
使用
list comprehension
:或者,使用
pathlib.Path()
而不是pathlib.Path(".")
在本例中,我们查找包含在所有目录及其子目录中的文件数。
在计算机中排序的脚本,用于查找某一类型(默认值:pptx)的所有文件并将其复制到新文件夹中。
如果要创建包含所有文件名的txt文件:
使用此函数,您可以创建一个txt文件,该文件将具有您要查找的文件类型(例如pngfile.txt)的名称以及该类型所有文件的完整路径。我想有时候它会有用的。
我真的很喜欢adamk's answer,建议您使用来自同名模块的
glob()
。这允许您使用与*
s匹配的模式但正如其他人在评论中指出的,
glob()
可能会被不一致的斜杠方向绊倒。为此,我建议您使用join()
模块中的expanduser()
和os.path
函数,也可以使用os
模块中的getcwd()
函数。例如:
上面的情况很糟糕-路径已经被硬编码,并且只能在驱动器名和被硬编码到路径中的
\
之间的窗口上工作。上面的方法工作得更好,但是它依赖于文件夹名
Users
,这个文件夹名通常在Windows上找到,而在其他OSs上却很少找到。它还依赖于具有特定名称admin
的用户。这在所有平台上都非常有效。
另一个很好的例子,它可以完美地跨平台工作,并且可以做一些不同的事情:
希望这些示例能帮助您了解一些在标准Python库模块中可以找到的函数的强大功能。
dircache是“自2.6版以来就不推荐使用的:dircache模块在Python 3.0中已被删除。”
从目录及其所有子目录获取完整文件路径
将打印列表的
print full_file_paths
:['/Users/johnny/Desktop/TEST/file1.txt', '/Users/johnny/Desktop/TEST/file2.txt', '/Users/johnny/Desktop/TEST/SUBFOLDER/file3.dat']
如果愿意,可以打开并读取内容,或者只关注扩展名为“.dat”的文件,如下面的代码所示:
/Users/johnny/Desktop/TEST/SUBFOLDER/file3.dat
这是我的通用函数。它返回一个文件路径列表,而不是文件名,因为我发现这更有用。它有一些可选的参数,使其通用。例如,我经常将它与诸如
pattern='*.txt'
或subfolders=True
之类的参数一起使用。我更喜欢使用^{} 模块,因为它进行模式匹配和扩展。
它将返回包含查询文件的列表:
这里我使用递归结构。
初步说明
当被问到这个问题时,我认为PythonPython2是LTS版本,但是代码示例将由Python3(.5)运行(我将尽可能使它们与Python 2兼容;而且,我要发布的任何属于Python的代码都是从v3.5.4-除非另有规定)。这与问题中的另一个关键字有关:“将它们添加到列表中”“:
这些示例将基于一个名为root\u dir的目录,该目录具有以下结构(此示例用于Win,但我在Lnx上使用相同的树):
解决方案
程序化方法:
[Python 3]: os.listdir(path='.')
一个更详细的示例(code\u os\u listdir.py):
注意:
输出:
[Python 3]: os.scandir(path='.')(Python3.5+,后台端口:[PyPI]: scandir)
注意:
os.listdir
[Python 3]: os.walk(top, topdown=True, onerror=None, followlinks=False)
注意:
os.scandir
(在旧版本上使用os.listdir
)[Python 3]: glob.glob(pathname, *, recursive=False)([Python 3]: glob.iglob(pathname, *, recursive=False))
注意:
os.listdir
[Python 3]: class pathlib.Path(*pathsegments)(Python3.4+,后台端口:[PyPI]: pathlib2)
注意:
[Python 2]: dircache.listdir(path)(仅限Python2)
os.listdir
上的(薄)包装,带有缓存[man7]: OPENDIR(3)/[man7]: READDIR(3)/[man7]: CLOSEDIR(3)经由[Python 3]: ctypes - A foreign function library for Python(POSIX特定)
代码类型.py:
注意:
os.walk
的格式返回数据。我不想让它递归,但是从现有的代码开始,这将是一个相当简单的任务输出:
[ActiveState.Docs]: win32file.FindFilesW(Win特定)
注意:
win32file.FindFilesW
是[GitHub]: mhammond/pywin32 - Python for Windows (pywin32) Extensions的一部分,它是WINAPIs上的一个Python包装器注意:
代码应该是可移植的(除了针对特定区域的位置-已标记)或交叉的:
在上述变体中使用了多种路径样式(绝对路径、相对路径),以说明所使用的“工具”在这个方向上是灵活的
os.listdir
和os.scandir
使用opendir/readdir/closedir([MS.Docs]: FindFirstFileW function/[MS.Docs]: FindNextFileW function/[MS.Docs]: FindClose function)(通过[GitHub]: python/cpython - (master) cpython/Modules/posixmodule.c)win32file.FindFilesW
也使用那些(Win特定的)函数(通过[GitHub]: mhammond/pywin32 - (master) pywin32/win32/src/win32file.i)获取目录内容(从点1开始)。)可以使用这些方法中的任何一种来实现(有些需要更多的工作,有些则需要更少的工作)
if not filter_func(entry_with_path): continue
(如果函数在一个条目上失败,它将被跳过),但是代码越复杂,执行所需的时间就越长不是贝尼!由于使用递归,我必须提到,我在笔记本电脑上做了一些测试(Win 10 x64),与此问题完全无关,并且当递归级别在(990)中的某个地方达到值时。。1000)范围(递归限制-1000(默认值)),得到StackOverflow:)。如果目录树超过了这个限制(我不是FS专家,所以我不知道这是否可能),那可能是个问题。
我还必须提到,我没有试图增加递归极限,因为我在这个领域没有经验(在增加堆栈之前,我可以增加多少),但理论上总是有失败的可能性,如果dir深度大于最大可能的递归极限(在该机器上)
代码示例仅用于演示目的。这意味着我没有考虑错误处理(我认为除了最后的块之外,没有任何>尝试/其他/方法),所以代码不健壮(原因是:尽可能简单和简短)。对于生产,还应添加错误处理
其他方法:
仅将Python用作包装
我所知道的最著名的风格是我所称的系统管理员方法:
grep
/findstr
)或输出格式可以在两边都做,但我不会坚持这样做。另外,我故意使用os.system
,而不是subprocess.Popen
。一般来说,这种方法是要避免的,因为如果OS版本/风格之间的某些命令输出格式稍有不同,那么解析代码也应该进行调整;更不用说语言环境之间的差异了。
Python 3.4+的另一个非常可读的变体是使用pathlib.Path.glob:
更具体一点很简单,例如只在所有子目录中查找不是符号链接的Python源文件:
^{} 将获得目录中的所有内容—文件和目录。
如果您只需要文件,可以使用^{} 来过滤:
或者您可以使用^{} ,它将为它访问的每个目录生成两个列表-为您拆分成文件和目录。如果你只想要最上面的目录,你可以在它第一次产生时就打破它
对于python2: pip安装rglob
相关问题 更多 >
编程相关推荐