尝试通过Sagemaker将本地文件上传到S3时出现FileNotFoundError
我想先说明一下:我对AWS还比较陌生。正如标题所说,我想把本地机器上的一个文件夹上传到亚马逊的S3存储上,使用的是Sagemaker Studio里的JupyterLab。我可以通过点击JupyterLab里的上传图标手动完成这个操作,但我希望能用下面的代码来实现:
import sagemaker
from sagemaker.tuner import (
IntegerParameter,
CategoricalParameter,
ContinuousParameter,
HyperparameterTuner,
)
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/my-first-proj"
role = sagemaker.get_execution_role()
local_dir = "/Users/tomi/DevProjects/WeThePeople/datasets"
inputs = sagemaker_session.upload_data(path=local_dir, bucket=bucket, key_prefix=prefix)
当我运行上面的代码时,出现了这个错误:
FileNotFoundError Traceback (most recent call last)
Cell In[2], line 2
1 local_dir = "/Users/tomi/DevProjects/WeThePeople/datasets"
----> 2 inputs = sagemaker_session.upload_data(path=local_dir, bucket=bucket, key_prefix=prefix)
3 print("input spec (in this case, just an S3 path): {}".format(inputs))
File /opt/conda/lib/python3.10/site-packages/sagemaker/session.py:400, in Session.upload_data(self, path, bucket, key_prefix, extra_args)
397 s3 = self.s3_resource
399 for local_path, s3_key in files:
--> 400 s3.Object(bucket, s3_key).upload_file(local_path, ExtraArgs=extra_args)
402 s3_uri = "s3://{}/{}".format(bucket, key_prefix)
403 # If a specific file was used as input (instead of a directory), we return the full S3 key
404 # of the uploaded object. This prevents unintentionally using other files under the same
405 # prefix during training.
File /opt/conda/lib/python3.10/site-packages/boto3/s3/inject.py:318, in object_upload_file(self, Filename, ExtraArgs, Callback, Config)
287 def object_upload_file(
288 self, Filename, ExtraArgs=None, Callback=None, Config=None
289 ):
290 """Upload a file to an S3 object.
291
292 Usage::
(...)
316 transfer.
317 """
--> 318 return self.meta.client.upload_file(
319 Filename=Filename,
320 Bucket=self.bucket_name,
321 Key=self.key,
322 ExtraArgs=ExtraArgs,
323 Callback=Callback,
324 Config=Config,
325 )
File /opt/conda/lib/python3.10/site-packages/boto3/s3/inject.py:143, in upload_file(self, Filename, Bucket, Key, ExtraArgs, Callback, Config)
108 """Upload a file to an S3 object.
109
110 Usage::
(...)
140 transfer.
141 """
142 with S3Transfer(self, Config) as transfer:
--> 143 return transfer.upload_file(
144 filename=Filename,
145 bucket=Bucket,
146 key=Key,
147 extra_args=ExtraArgs,
148 callback=Callback,
149 )
File /opt/conda/lib/python3.10/site-packages/boto3/s3/transfer.py:292, in S3Transfer.upload_file(self, filename, bucket, key, callback, extra_args)
288 future = self._manager.upload(
289 filename, bucket, key, extra_args, subscribers
290 )
291 try:
--> 292 future.result()
293 # If a client error was raised, add the backwards compatibility layer
294 # that raises a S3UploadFailedError. These specific errors were only
295 # ever thrown for upload_parts but now can be thrown for any related
296 # client error.
297 except ClientError as e:
File /opt/conda/lib/python3.10/site-packages/s3transfer/futures.py:103, in TransferFuture.result(self)
98 def result(self):
99 try:
100 # Usually the result() method blocks until the transfer is done,
101 # however if a KeyboardInterrupt is raised we want want to exit
102 # out of this and propagate the exception.
--> 103 return self._coordinator.result()
104 except KeyboardInterrupt as e:
105 self.cancel()
File /opt/conda/lib/python3.10/site-packages/s3transfer/futures.py:266, in TransferCoordinator.result(self)
263 # Once done waiting, raise an exception if present or return the
264 # final result.
265 if self._exception:
--> 266 raise self._exception
267 return self._result
File /opt/conda/lib/python3.10/site-packages/s3transfer/tasks.py:269, in SubmissionTask._main(self, transfer_future, **kwargs)
265 self._transfer_coordinator.set_status_to_running()
267 # Call the submit method to start submitting tasks to execute the
268 # transfer.
--> 269 self._submit(transfer_future=transfer_future, **kwargs)
270 except BaseException as e:
271 # If there was an exception raised during the submission of task
272 # there is a chance that the final task that signals if a transfer
(...)
281
282 # Set the exception, that caused the process to fail.
283 self._log_and_set_exception(e)
File /opt/conda/lib/python3.10/site-packages/s3transfer/upload.py:591, in UploadSubmissionTask._submit(self, client, config, osutil, request_executor, transfer_future, bandwidth_limiter)
589 # Determine the size if it was not provided
590 if transfer_future.meta.size is None:
--> 591 upload_input_manager.provide_transfer_size(transfer_future)
593 # Do a multipart upload if needed, otherwise do a regular put object.
594 if not upload_input_manager.requires_multipart_upload(
595 transfer_future, config
596 ):
File /opt/conda/lib/python3.10/site-packages/s3transfer/upload.py:244, in UploadFilenameInputManager.provide_transfer_size(self, transfer_future)
242 def provide_transfer_size(self, transfer_future):
243 transfer_future.meta.provide_transfer_size(
--> 244 self._osutil.get_file_size(transfer_future.meta.call_args.fileobj)
245 )
File /opt/conda/lib/python3.10/site-packages/s3transfer/utils.py:247, in OSUtils.get_file_size(self, filename)
246 def get_file_size(self, filename):
--> 247 return os.path.getsize(filename)
File /opt/conda/lib/python3.10/genericpath.py:50, in getsize(filename)
48 def getsize(filename):
49 """Return the size of a file, reported by os.stat()."""
---> 50 return os.stat(filename).st_size
FileNotFoundError: [Errno 2] No such file or directory: '/Users/tomi/DevProjects/WeThePeople/datasets'
不过我很确定这个路径在我的机器上是存在的。如果我打开终端,就能顺利访问这个目录,如下所示:
>>> (WeThePeople) tomi@MacBook-Pro-4 datasets % pwd
/Users/tomi/DevProjects/WeThePeople/datasets
我想这可能是AWS上的IAM权限问题,但我用的Sagemaker用户配置已经有了AmazonS3FullAccess
和AmazonSageMakerFullAccess
的权限。我不太确定这是不是问题,但我觉得有必要提一下。
我的问题是,这个问题可能是什么原因造成的,我该怎么解决?会不会是其他权限设置的问题?还有什么我没有检查的地方吗?
1 个回答
0
你的工作室笔记本和你本地的电脑是不一样的。
看你分享的命令:
>>> (WeThePeople) tomi@MacBook-Pro-4 datasets % pwd
/Users/tomi/DevProjects/WeThePeople/datasets
这看起来是你在用的Mac电脑。工作室无法访问你本地Mac上的文件。如果你想的话,可以用你提到的上传按钮把文件上传到工作室,然后你就可以从SageMaker工作室把它们上传到S3,记得在工作室里引用正确的路径。