I'm dynamically generating queries from 11 different tables in SQL Server and storing that into S3 CSV file.
However, when I store null integer fields in CSV it converts them to float so when I'm doing a copy command it returns an error.
I really need to avoid that. is there and option for that?
for object in table_list:
if args.load_type == "full":
query_load = object["query_full"]
else:
query_load = object["query_active"]
df = pd.read_sql_query(query_load, sql_server_conn)
df = df.replace(",", " ", regex=True)
df = df.replace("\n", " ", regex=True)
#print(df)
#df = df * 1
#print(df.dtypes)
#print(df.info())
df = df.assign(extraction_dttm=currentdate)
csv_buffer = StringIO()
df.to_csv(csv_buffer, index=False)
folder_name = "{}".format(object["lake_table_name"])
file_name = "{}_{}.csv".format(object["lake_table_name"], currentdate.strftime("%Y%m%d"))
full_path_to_file = DATALAKE_PATH + "/" + folder_name + "/" + file_name
# print("{} - Storing files in {} ... ".format(dt.utcnow(), datalake_bucket))
s3_resource.Object(datalake_bucket, full_path_to_file).put(Body=csv_buffer.getvalue())