How to pull logs from an API and store them in AWS S3

Question

I am trying to achieve this through a Python program. I am able to pull logs from API to a local drive. However I am struggling to copy them to AWS S3. I appreciate your help on this. I am using the code below to copy files to a local drive

'''''' import requests import requests.auth from bs4 import BeautifulSoup import os.path from pathlib import Path import os import boto3

CLIENT_ID = "xxxxxxxx"
CLIENT_SECRET = "xxxxxxxx"
TOKEN_URL = "https://xxxxxxxx/dwsso/oauth2/access_token"
REDIRECT_URI = "https://xxxxxxxx/dwsso/oauth2/callback"


def get_token(code=None):
    client_auth = requests.auth.HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET)
    post_data = {"grant_type": "client_credentials",
                 "code": code,
                 "redirect_uri": REDIRECT_URI}
    response = requests.post(TOKEN_URL,
                             auth=client_auth,
                             data=post_data)

    token_json = response.json()
    return token_json["access_token"]

my_token=get_token()



headersAPI = {
    'accept': 'application/json',
    'Authorization': 'Bearer '+ my_token,
}

params = (
    ('offset', '0'),
    ('limit', '20'),
)

def download_files(urls):
    for a in soup.find_all('a', href=True):
        url = urls
        a = a['href']
        logs_url = url + a
        r = requests.get(logs_url, headers=headersAPI, params=params, verify=True, stream = True)
        save_path = "download" + ((url.split('/')[2]).split('.')[0])
        Path(save_path).mkdir(parents=True, exist_ok=True)
        lname = (logs_url.split('/')[-1])
        completeName= os.path.join(save_path, lname)
        print("Downloding file from %s domain: %s" % ( save_path, lname )
        open(completeName, 'wb').write(r.content)


url_lst = [ "https://xxxxxxxx/webdav/Sites/Logs/",
            "https://xxxxxxxx/webdav/Sites/Logs/",
            "https://xxxxxxxx/Sites/Logs/" ]

for i in url_lst:
    response = requests.get(myUrl, headers=headersAPI, params=params, verify=True, stream = True)
    soup = BeautifulSoup(response.content, 'html.parser')
    f_url = ((i.split('/')[2]))
    url = "https://" + f_url
    download_files(url)

''''''

score 1 · Answer 1 · answered Mar 22 '21 at 14:02

1

You can upload to S3 buckets using Boto3:

import boto3
s3 = boto3.resource('s3')
s3.Bucket('mybucket').upload_file('/tmp/hello.txt', 'hello.txt')

See Upload to S3 Bucket documentation here.

And here for installation and config of Boto3.

answered Mar 22 '21 at 14:02

Thaer A

1,570
1
5
10

How to pull logs from an API and store them in AWS S3

1 Answers1