使用boto和pandas从awss3读取csv文件

srcFileName="gossips.csv" def on_session_started(): print("Starting new session.") conn = S3Connection() my_bucket = conn.get_bucket("randomdatagossip", validate=False) print("Bucket Identified") print(my_bucket) key = Key(my_bucket,srcFileName) key.open() print(key.read()) conn.close() on_session_started()

def on_session_started2(): print("Starting Second new session.") conn = S3Connection() my_bucket = conn.get_bucket("randomdatagossip", validate=False) # url = "https://s3.amazonaws.com/randomdatagossip/gossips.csv" # urllib2.urlopen(url) for line in smart_open.smart_open('s3://my_bucket/gossips.csv'): print line # data = pd.read_csv(url) # print(data) on_session_started2()

2条回答

网友

1楼 · 编辑于 2024-04-18 04:09:16

这对我有效。

import pandas as pd
import boto3
import io

s3_file_key = 'data/test.csv'
bucket = 'data-bucket'

s3 = boto3.client('s3')
obj = s3.get_object(Bucket=bucket, Key=s3_file_key)

initial_df = pd.read_csv(io.BytesIO(obj['Body'].read()))

网友

2楼 · 编辑于 2024-04-18 04:09:16

下面是我为成功地从S3上的csv读取df所做的工作。

import pandas as pd
import boto3

bucket = "yourbucket"
file_name = "your_file.csv"

s3 = boto3.client('s3') 
# 's3' is a key word. create connection to S3 using default config and all buckets within S3

obj = s3.get_object(Bucket= bucket, Key= file_name) 
# get object and file (key) from bucket

initial_df = pd.read_csv(obj['Body']) # 'Body' is a key word

相关问题更多 >

编程相关推荐

热门问题

热门文章