Hello, I'm running into the same issue as well. I started from the "Documentation" link (https://archive.podaac.earthdata.nasa.gov/s3credentialsREADME) under the Direct S3-Access option for my data product of interest (https://podaac.jpl.nasa.gov/dataset/MITgcm_LLC4320_Pre-SWOT_JPL_L4_ACC_SMST_v1.0), and wrote the following script:
```
import argparse
import base64
import json
import os
from getpass import getpass
import boto3
import requests
from botocore.exceptions import NoCredentialsError
# Default bucket and region
DEFAULT_BUCKET = "podaac-ops-cumulus-public"
DEFAULT_OBJ_PREFIX = "MITgcm_LLC4320_Pre-SWOT_JPL_L4_ACC_SMST_v1.0"
DEFAULT_REGION = "us-west-2"
S3_CREDENTIALS_ENDPOINT = "https://archive.podaac.earthdata.nasa.gov/s3credentials"
def main(bucket_name, object_prefix, local_download_path):
creds = retrieve_credentials()
# Create an S3 client with the temporary credentials and default region
s3_client = boto3.client(
"s3",
region_name=DEFAULT_REGION,
aws_access_key_id=creds["accessKeyId"],
aws_secret_access_key=creds["secretAccessKey"],
aws_session_token=creds["sessionToken"],
)
try:
# List objects within a specified bucket and prefix
response = s3_client.list_objects(Bucket=bucket_name, Prefix=object_prefix)
if "Contents" in response:
for item in response["Contents"]:
key = item["Key"]
download_file(
s3_client,
bucket_name,
key,
f"{local_download_path}/{key.split('/')[-1]}",
)
else:
print("No objects found.")
except Exception as e:
print(f"Error listing objects in bucket {bucket_name}: {e}")
def retrieve_credentials():
"""Authenticate with EarthData and return a set of temporary S3 credentials."""
username = os.environ.get("EARTHDATA_USERNAME", input("Enter EDL username: "))
password = os.environ.get("EARTHDATA_PASSWORD", getpass("Enter EDL password: "))
edl_login_url = get_edl_login_url()
s3_cred_url = authenticate_with_edl(
encode_edl_creds(username, password), edl_login_url
)
return get_temporary_aws_credentials(s3_cred_url)
def get_edl_login_url():
"""Make initial GET request to S3 credentials service and get EDL login form URL."""
response = requests.get(S3_CREDENTIALS_ENDPOINT, allow_redirects=False)
response.raise_for_status()
return response.headers["location"]
def encode_edl_creds(username, password):
"""Encode EDL credentials for authorization."""
return base64.b64encode(f"{username}:{password}".encode("ascii")).decode("ascii")
def authenticate_with_edl(encoded_credentials, edl_login_url):
"""Make POST request to EDL login form URL and return the S3 credentials URL."""
response = requests.post(
edl_login_url,
data={"credentials": encoded_credentials},
headers={"Origin": S3_CREDENTIALS_ENDPOINT},
allow_redirects=False,
)
response.raise_for_status()
return response.headers["location"]
def get_temporary_aws_credentials(s3_cred_url):
"""Make authenticated GET request and return temporary AWS credentials."""
print(f"Obtaining S3 access token from {s3_cred_url}")
response = requests.get(s3_cred_url, allow_redirects=False)
final_response = requests.get(
S3_CREDENTIALS_ENDPOINT,
cookies={"accessToken": response.cookies["accessToken"]},
)
final_response.raise_for_status()
return json.loads(final_response.content)
def download_file(s3_client, bucket, key, download_path):
"""Download a file from S3."""
try:
s3_client.download_file(bucket, key, download_path)
print(f"File {key} downloaded to {download_path}")
except NoCredentialsError:
print("Credentials are not available or invalid")
except Exception as e:
print(f"Failed to download {key}: {e}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Download files from AWS S3 with temporary credentials"
)
parser.add_argument(
"--bucket",
default=DEFAULT_BUCKET,
help="S3 bucket name, defaults to a specific PODAAC dataset",
)
parser.add_argument(
"--prefix", help="Object prefix for files to download", default=DEFAULT_OBJ_PREFIX
)
parser.add_argument(
"--download-path", required=True, help="Local path to download files"
)
args = parser.parse_args()
main(args.bucket, args.prefix, args.download_path)
```
The following is a sample of my Conda environment:
```
python 3.10.12 hd12c33a_0_cpython conda-forge
boto3 1.29.1 py310h06a4308_0 anaconda
aws-c-auth 0.6.26 h987a71b_2 conda-forge
aws-c-cal 0.5.21 h48707d8_2 conda-forge
aws-c-common 0.8.14 h0b41bf4_0 conda-forge
aws-c-compression 0.2.16 h03acc5a_5 conda-forge
aws-c-event-stream 0.2.20 h00877a2_4 conda-forge
aws-c-http 0.7.6 hf342b9f_0 conda-forge
aws-c-io 0.13.19 h5b20300_3 conda-forge
aws-c-mqtt 0.8.6 hc4349f7_12 conda-forge
aws-c-s3 0.2.7 h909e904_1 conda-forge
aws-c-sdkutils 0.1.9 h03acc5a_0 conda-forge
aws-checksums 0.1.14 h03acc5a_5 conda-forge
aws-crt-cpp 0.19.8 hf7fbfca_12 conda-forge
aws-sdk-cpp 1.10.57 h17c43bd_8 conda-forge
requests 2.31.0 py310h06a4308_0 anaconda
requests-oauthlib 1.3.0 py_0 anaconda
s3transfer 0.7.0 py310h06a4308_0 anaconda
```
```
import argparse
import base64
import json
import os
from getpass import getpass
import boto3
import requests
from botocore.exceptions import NoCredentialsError
# Default bucket and region
DEFAULT_BUCKET = "podaac-ops-cumulus-public"
DEFAULT_OBJ_PREFIX = "MITgcm_LLC4320_Pre-SWOT_JPL_L4_ACC_SMST_v1.0"
DEFAULT_REGION = "us-west-2"
S3_CREDENTIALS_ENDPOINT = "https://archive.podaac.earthdata.nasa.gov/s3credentials"
def main(bucket_name, object_prefix, local_download_path):
creds = retrieve_credentials()
# Create an S3 client with the temporary credentials and default region
s3_client = boto3.client(
"s3",
region_name=DEFAULT_REGION,
aws_access_key_id=creds["accessKeyId"],
aws_secret_access_key=creds["secretAccessKey"],
aws_session_token=creds["sessionToken"],
)
try:
# List objects within a specified bucket and prefix
response = s3_client.list_objects(Bucket=bucket_name, Prefix=object_prefix)
if "Contents" in response:
for item in response["Contents"]:
key = item["Key"]
download_file(
s3_client,
bucket_name,
key,
f"{local_download_path}/{key.split('/')[-1]}",
)
else:
print("No objects found.")
except Exception as e:
print(f"Error listing objects in bucket {bucket_name}: {e}")
def retrieve_credentials():
"""Authenticate with EarthData and return a set of temporary S3 credentials."""
username = os.environ.get("EARTHDATA_USERNAME", input("Enter EDL username: "))
password = os.environ.get("EARTHDATA_PASSWORD", getpass("Enter EDL password: "))
edl_login_url = get_edl_login_url()
s3_cred_url = authenticate_with_edl(
encode_edl_creds(username, password), edl_login_url
)
return get_temporary_aws_credentials(s3_cred_url)
def get_edl_login_url():
"""Make initial GET request to S3 credentials service and get EDL login form URL."""
response = requests.get(S3_CREDENTIALS_ENDPOINT, allow_redirects=False)
response.raise_for_status()
return response.headers["location"]
def encode_edl_creds(username, password):
"""Encode EDL credentials for authorization."""
return base64.b64encode(f"{username}:{password}".encode("ascii")).decode("ascii")
def authenticate_with_edl(encoded_credentials, edl_login_url):
"""Make POST request to EDL login form URL and return the S3 credentials URL."""
response = requests.post(
edl_login_url,
data={"credentials": encoded_credentials},
headers={"Origin": S3_CREDENTIALS_ENDPOINT},
allow_redirects=False,
)
response.raise_for_status()
return response.headers["location"]
def get_temporary_aws_credentials(s3_cred_url):
"""Make authenticated GET request and return temporary AWS credentials."""
print(f"Obtaining S3 access token from {s3_cred_url}")
response = requests.get(s3_cred_url, allow_redirects=False)
final_response = requests.get(
S3_CREDENTIALS_ENDPOINT,
cookies={"accessToken": response.cookies["accessToken"]},
)
final_response.raise_for_status()
return json.loads(final_response.content)
def download_file(s3_client, bucket, key, download_path):
"""Download a file from S3."""
try:
s3_client.download_file(bucket, key, download_path)
print(f"File {key} downloaded to {download_path}")
except NoCredentialsError:
print("Credentials are not available or invalid")
except Exception as e:
print(f"Failed to download {key}: {e}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Download files from AWS S3 with temporary credentials"
)
parser.add_argument(
"--bucket",
default=DEFAULT_BUCKET,
help="S3 bucket name, defaults to a specific PODAAC dataset",
)
parser.add_argument(
"--prefix", help="Object prefix for files to download", default=DEFAULT_OBJ_PREFIX
)
parser.add_argument(
"--download-path", required=True, help="Local path to download files"
)
args = parser.parse_args()
main(args.bucket, args.prefix, args.download_path)
```
The following is a sample of my Conda environment:
```
python 3.10.12 hd12c33a_0_cpython conda-forge
boto3 1.29.1 py310h06a4308_0 anaconda
aws-c-auth 0.6.26 h987a71b_2 conda-forge
aws-c-cal 0.5.21 h48707d8_2 conda-forge
aws-c-common 0.8.14 h0b41bf4_0 conda-forge
aws-c-compression 0.2.16 h03acc5a_5 conda-forge
aws-c-event-stream 0.2.20 h00877a2_4 conda-forge
aws-c-http 0.7.6 hf342b9f_0 conda-forge
aws-c-io 0.13.19 h5b20300_3 conda-forge
aws-c-mqtt 0.8.6 hc4349f7_12 conda-forge
aws-c-s3 0.2.7 h909e904_1 conda-forge
aws-c-sdkutils 0.1.9 h03acc5a_0 conda-forge
aws-checksums 0.1.14 h03acc5a_5 conda-forge
aws-crt-cpp 0.19.8 hf7fbfca_12 conda-forge
aws-sdk-cpp 1.10.57 h17c43bd_8 conda-forge
requests 2.31.0 py310h06a4308_0 anaconda
requests-oauthlib 1.3.0 py_0 anaconda
s3transfer 0.7.0 py310h06a4308_0 anaconda
```
Statistics: Posted by egoh — Tue Feb 13, 2024 10:41 pm America/New_York