如何使用Boto3 Python在S3中创建zipfile?

2024-04-18 07:44:45 发布

您现在位置:Python中文网/ 问答频道 /正文

我试图从S3存储桶中的子文件夹中的多个文件创建一个zipfile,然后将该zipfile保存在同一存储桶中的另一个子文件夹中

在本地运行flask应用程序时,我可以从S3子文件夹bucket创建ZipFile,但不能使用Heroku,因为它不存储任何内容

我正在看这个例子,但它似乎过时了,并且使用了本地文件。 https://www.botreetechnologies.com/blog/create-and-download-zip-file-in-django-via-amazon-s3

下面是我正在使用的代码片段

from flask import Response
import boto3, zipfile, os

AWS_ACCESS_KEY_ID = "some access key"
AWS_ACCESS_SECRET_ACCESS_KEY = "some secret key"
AWS_STORAGE_BUCKET_NAME = "some bucket"

aws_session = boto3.Session(aws_access_key_id = AWS_ACCESS_KEY_ID,
                   aws_secret_access_key = AWS_SECRET_ACCESS_KEY)

s3 = aws_session.resource("s3")

s3 = boto3.client("s3", region_name = "some region")
s3_resource = boto3.resource("s3")
blog_folder = "blog_1"

paginator = s3.get_paginator("list_objects")

file_list = [page for page in paginator.paginate(Bucket=AWS_STORAGE_BUCKET_NAME)\
            .search("Contents[?Size >`0`][]")
            if blog_folder in page["Key"]]



zf = zipfile.ZipFile(byte, "w")
zipped_files = []

zip_filename = "download_files.zip"

for key in file_list:

    file_name = key["Key"].split("/")[-1]

    my_bucket = s3_resource.Bucket(AWS_STORAGE_BUCKET_NAME)

    file_obj = my_bucket.Object(key["Key"]).get()


    zipped_files.append(file_obj["Body"].read())

你知道我怎么解决这个问题吗?用户下载zipfile比下载单个文件方便得多。 非常感谢您的帮助


Tags: keyin文件夹awss3bucketaccessblog
2条回答

python的内存zip库非常适合于此。下面是我的一个项目中的一个示例:

import io
import zipfile

zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zipper:
    infile_object = s3.get_object(Bucket=bucket, Key=object_key) 
    infile_content = infile_object['Body'].read()
    zipper.writestr(file_name, infile_content)

s3.put_object(Bucket=bucket, Key=PREFIX + zip_name, Body=zip_buffer.getvalue())

所以我设法让它在我的Heroku flask应用程序中工作。 希望它能帮助任何挣扎的人。 PS子文件夹=博客文件夹 所以结构是Bucket/blog\u文件夹/resources Bucket/blog_文件夹/压缩

import tempfile, zipfile, os, boto3
AWS_ACCESS_KEY_ID = "some access key"
AWS_ACCESS_SECRET_ACCESS_KEY = "some secret key"
AWS_STORAGE_BUCKET_NAME = "some bucket"



def make_zipfile(output_filename, source_dir):
    relroot = os.path.abspath(os.path.join(source_dir, os.pardir)) 
    with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
         for root, dirs, files in os.walk(source_dir):
             # add directory (needed for empty dirs)
             zip.write(root, os.path.relpath(root, relroot))
             for file in files:
                 filename = os.path.join(root, file)
                 if os.path.isfile(filename): # regular files only
                     arcname = os.path.join(os.path.relpath(root, relroot), file)
                     zip.write(filename, arcname)

aws_session = boto3.Session(aws_access_key_id = AWS_ACCESS_KEY_ID,
                   aws_secret_access_key = AWS_SECRET_ACCESS_KEY)

s3 = aws_session.resource("s3")

current_path = os.getcwd()
temp = tempfile.TemporaryDirectory(suffix="_tmp",  prefix="basic_", dir=current_path)

### AT TOP OF YOUR APP.PY file ^^^^^^^^^^

@app_blog.route("/download_blog_res_zipfile/<int:blog_id>", methods = ["GET", "POST"])
def download_blog_res_zipfile(blog_id):
    
    current_path = os.getcwd()
    
    blog = Blog.query.filter_by(id = blog_id).first()
    print(blog)
    print("DOWNLOAD COUNT!!!")
    print(blog.download_count)
    blog.download_count += 1
    db.session.commit()
    
    
    del_folders = os.listdir(os.getcwd() + "/BLOG_ZIPPED_FOLDER")
    
    
    for folder in del_folders:
        
        zipp_path = os.getcwd() + "/BLOG_ZIPPED_FOLDER/" + folder
        
        print(folder)
        print("DELETING ZIPPING!")
        
        shutil.rmtree(os.getcwd() + "/BLOG_ZIPPED_FOLDER/" + folder)
        
        
    temp_zipp = tempfile.TemporaryDirectory(suffix="_tmp", prefix="zipping_",
                                            dir=current_path + "/BLOG_ZIPPED_FOLDER")
    
    
    s3 = boto3.client("s3", region_name = REGION_NAME)
    s3_resource = boto3.resource("s3")
    my_bucket = s3_resource.Bucket(AWS_STORAGE_BUCKET_NAME)
    
    
    paginator = s3.get_paginator("list_objects")
    
    folder = "blogs/blog_{}/resources".format(blog.id)
    
    
    file_list = [page for page in paginator.paginate(Bucket = AWS_STORAGE_BUCKET_NAME)\
                 .search("Contents[?Size >`0`][]")
                 if folder in page["Key"]]
    
    
    for key in file_list:
        
        
        file_name = key["Key"].split("/")[-1]
        
        print(file_name)
        
        file_obj = my_bucket.Object(key["Key"]).get()["Body"]
        
        with open(os.getcwd() + "/" + BLOG_FOLDER + "/" + file_name, "wb") as w:
            
            w.write(file_obj.read())
            
            
    make_zipfile(temp_zipp.name + "/blog_res_{}.zip".format(blog_id),
                 current_path + "/" + BLOG_FOLDER)
    
    try:
        
        for key in file_list:
            
            file_name = key["Key"].split("/")[-1]
            
            file_path = current_path + "/" + BLOG_FOLDER +"/" + file_name
            os.remove(file_path)
            print("TRYY!!")
            print("REMOVED!!!")
            
            
            
    except:
        
        for key in file_list:
            
            file_name = key["Key"].split("/")[-1]
            
            file_path = current_path + "/" + BLOG_FOLDER + "/" + file_name
            os.remove(file_path)
            print("EXCEPT!!!")
            print("REMOVED!!!")

    
    return send_from_directory(temp_zipp.name, "blog_res_{}.zip".format(blog_id),
                               as_attachment = True)

相关问题 更多 >