Setup: MyISAM table with 8 columns... and there's 1 index on a column. No foreign keys.
Data: Inserting 500m+ records via Pymysql / LOAD data
Server: Mariadb 10.08 on a VPS with 1gb RAM (yeah I know.. client's server, not mine)
Schema:
drop table if exists profiles;
create table profiles (
id bigint auto_increment primary key,
first_name varchar(128) NULL,
last_name varchar(128) NULL,
liid varchar(256) NOT NULL,
city varchar(128) NULL,
state varchar(128) NULL,
country varchar(128) NULL,
email varchar(512) null,
phone varchar(512) null
) engine=MyISAM auto_increment=0 default CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
create index liid_idx on profiles(liid);
I'm loading 1m records at a time. Here's some code:
connection = None
cursor = None
def execute_sql( sql ):
global cursor
cursor.execute( sql )
def open_db(mysql_user, mysql_passwd, mysql_host, mysql_db):
global connection, cursor
connection = MySQLdb.Connect(host=mysql_host, user=mysql_user, passwd=mysql_passwd, db=mysql_db, local_infile=True)
cursor = connection.cursor()
########################################################################################
# https://stackoverflow.com/questions/2463602/mysql-load-data-infile-acceleration
#
# note: this must be run as root
########################################################################################
cursor.execute("set autocommit = 0;");
cursor.execute("set unique_checks = 0;");
cursor.execute("set foreign_key_checks = 0;");
cursor.execute("set sql_log_bin = 0;");
def close_db():
global connection
connection.commit()
open_db(
mysql_user=mysql_user,
mysql_passwd=mysql_passwd,
mysql_host=mysql_host,
mysql_db=mysql_db
)
for file in [....]:
start = time.time()
execute_sql(sql='''
LOAD DATA LOCAL INFILE '{}' INTO TABLE profile FIELDS TERMINATED by '\t' ENCLOSED BY '"' (@first_name, @last_name, liid, @city, @state, @country, @email, @phone) SET first_name = NULLIF(@first_name,''), first_name = NULLIF(@first_name,''), last_name = NULLIF(@last_name,''), city = NULLIF(@city,''), state = NULLIF(@state,''), country = NULLIF(@country,''), email = NULLIF(@email,''), phone = NULLIF(@phone,'');
'''.format(file))
end = time.time()
duration= end-start
print("[mysql] duration: {} seconds".format(int(duration))
close_db()
It's getting slower and slower.
Any ideas?
Results:
[mysql] duration:26 seconds
[mysql] duration:63 seconds
[mysql] duration:73 seconds
[mysql] duration:75 seconds
[mysql] duration:81 seconds
[mysql] duration:91 seconds
[mysql] duration:85 seconds
[mysql] duration:94 seconds
[mysql] duration:98 seconds
[mysql] duration:96 seconds
[mysql] duration:107 seconds
[mysql] duration:122 seconds
[mysql] duration:141 seconds
[mysql] duration:169 seconds
[mysql] duration:255 seconds
[mysql] duration:356 seconds
[mysql] duration:822 seconds
[mysql] duration:1590 seconds
[mysql] duration:2454 seconds
[mysql] duration:3800 seconds
[mysql] duration:4080 seconds
[mysql] duration:4459 seconds
[mysql] duration:4879 seconds
[mysql] duration:4638 seconds
[mysql] duration:4994 seconds
[mysql] duration:5749 seconds
[mysql] duration:5047 seconds
[mysql] duration:5641 seconds
[mysql] duration:6207 seconds
[mysql] duration:6325 seconds
[mysql] duration:6772 seconds