polardbxengine/mysql-test/suite/xengine_stress/t/load_generator.py

436 lines
14 KiB
Python

import cStringIO
import hashlib
import MySQLdb
import os
import random
import signal
import sys
import threading
import time
import string
CHARS = string.letters + string.digits
def sha1(x):
return hashlib.sha1(str(x)).hexdigest()
# Should be deterministic given an idx
def get_msg(do_blob, idx):
random.seed(idx);
if do_blob:
blob_length = random.randint(1, 24000)
else:
blob_length = random.randint(1, 255)
if random.randint(1, 2) == 1:
# blob that cannot be compressed (well, compresses to 85% of original size)
return ''.join([random.choice(CHARS) for x in xrange(blob_length)])
else:
# blob that can be compressed
return random.choice(CHARS) * blob_length
class PopulateWorker(threading.Thread):
global LG_TMP_DIR
def __init__(self, con, start_id, end_id, i):
threading.Thread.__init__(self)
self.con = con
con.autocommit(False)
self.log = open('/%s/populate-%d.log' % (LG_TMP_DIR, i), 'a')
self.num = i
self.start_id = start_id
self.end_id = end_id
self.exception = None
self.start_time = time.time()
self.start()
def run(self):
try:
self.runme()
print >> self.log, "ok"
except Exception, e:
self.exception = e
try:
cursor = self.con.cursor()
cursor.execute("INSERT INTO errors VALUES('%s')" % e)
except MySQLdb.Error, e2:
print >> self.log, "caught while inserting error (%s)" % e2
print >> self.log, "caught (%s)" % e
finally:
self.finish()
def finish(self):
print >> self.log, "total time: %.2f s" % (time.time() - self.start_time)
self.log.close()
self.con.commit()
self.con.close()
def runme(self):
print >> self.log, "populate thread-%d started" % self.num
cur = self.con.cursor()
stmt = None
for i in xrange(self.start_id, self.end_id):
msg = get_msg(do_blob, i)
stmt = """
INSERT INTO t1(id,msg_prefix,msg,msg_length,msg_checksum) VALUES (%d,'%s','%s',%d,'%s')
""" % (i+1, msg[0:255], msg, len(msg), sha1(msg))
cur.execute(stmt)
if i % 100 == 0:
self.con.commit()
def populate_table(con, num_records_before, do_blob, log):
con.autocommit(False)
cur = con.cursor()
stmt = None
workers = []
N = num_records_before / 10
start_id = 0
for i in xrange(10):
w = PopulateWorker(MySQLdb.connect(user=user, host=host, port=port, db=db),
start_id, start_id + N, i)
start_id += N
workers.append(w)
for i in xrange(start_id, num_records_before):
msg = get_msg(do_blob, i)
# print >> log, "length is %d, complen is %d" % (len(msg), len(zlib.compress(msg, 6)))
stmt = """
INSERT INTO t1(id,msg_prefix,msg,msg_length,msg_checksum) VALUES (%d,'%s','%s',%d,'%s')
""" % (i+1, msg[0:255], msg, len(msg), sha1(msg))
cur.execute(stmt)
con.commit()
for w in workers:
w.join()
if w.exception:
print >>log, "populater thead %d threw an exception" % w.num
return False
return True
def get_update(msg, idx):
return """
UPDATE t1 SET msg_prefix='%s',msg='%s',msg_length=%d,msg_checksum='%s' WHERE id=%d""" % (
msg[0:255], msg, len(msg), sha1(msg), idx)
def get_insert_on_dup(msg, idx):
return """
INSERT INTO t1 (msg_prefix,msg,msg_length,msg_checksum,id) VALUES ('%s','%s',%d,'%s',%d)
ON DUPLICATE KEY UPDATE
msg_prefix=VALUES(msg_prefix),
msg=VALUES(msg),
msg_length=VALUES(msg_length),
msg_checksum=VALUES(msg_checksum),
id=VALUES(id)""" % (
msg[0:255], msg, len(msg), sha1(msg), idx)
def get_insert(msg, idx):
return """
INSERT INTO t1 (msg_prefix,msg,msg_length,msg_checksum,id) VALUES ('%s','%s',%d,'%s',%d)""" % (
msg[0:255], msg, len(msg), sha1(msg), idx)
def get_insert_null(msg):
return """
INSERT INTO t1 (msg_prefix,msg,msg_length,msg_checksum,id) VALUES ('%s','%s',%d,'%s',NULL)""" % (
msg[0:255], msg, len(msg), sha1(msg))
class ChecksumWorker(threading.Thread):
global LG_TMP_DIR
def __init__(self, con, checksum):
threading.Thread.__init__(self)
self.con = con
con.autocommit(False)
self.log = open('/%s/worker-checksum.log' % LG_TMP_DIR, 'a')
self.checksum = checksum
print >> self.log, "given checksum=%d" % checksum
self.start()
def run(self):
try:
self.runme()
print >> self.log, "ok"
except Exception, e:
try:
cursor = self.con.cursor()
cursor.execute("INSERT INTO errors VALUES('%s')" % e)
con.commit()
except MySQLdb.Error, e2:
print >> self.log, "caught while inserting error (%s)" % e2
print >> self.log, "caught (%s)" % e
finally:
self.finish()
def finish(self):
print >> self.log, "total time: %.2f s" % (time.time() - self.start_time)
self.log.close()
self.con.close()
def runme(self):
print >> self.log, "checksum thread started"
self.start_time = time.time()
cur = self.con.cursor()
cur.execute("SET SESSION innodb_lra_size=16")
cur.execute("CHECKSUM TABLE t1")
checksum = cur.fetchone()[1]
self.con.commit()
if checksum != self.checksum:
print >> self.log, "checksums do not match. given checksum=%d, calculated checksum=%d" % (self.checksum, checksum)
self.checksum = checksum
else:
print >> self.log, "checksums match! (both are %d)" % checksum
class Worker(threading.Thread):
global LG_TMP_DIR
def __init__(self, num_xactions, xid, con, server_pid, do_blob, max_id, fake_changes, secondary_checks):
threading.Thread.__init__(self)
self.do_blob = do_blob
self.xid = xid
con.autocommit(False)
self.con = con
self.num_xactions = num_xactions
cur = self.con.cursor()
self.rand = random.Random()
self.rand.seed(xid * server_pid)
self.loop_num = 0
self.max_id = max_id
self.num_primary_select = 0
self.num_secondary_select = 0
self.num_secondary_only_select = 0
self.num_inserts = 0
self.num_deletes = 0
self.num_updates = 0
self.time_spent = 0
self.log = open('/%s/worker%02d.log' % (LG_TMP_DIR, self.xid), 'a')
if fake_changes:
cur.execute("SET innodb_fake_changes=1")
self.secondary_checks = secondary_checks
self.start()
def finish(self):
print >> self.log, "loop_num:%d, total time: %.2f s" % (
self.loop_num, time.time() - self.start_time + self.time_spent)
print >> self.log, "num_primary_select=%d,num_secondary_select=%d,num_secondary_only_select=%d" %\
(self.num_primary_select, self.num_secondary_select, self.num_secondary_only_select)
print >> self.log, "num_inserts=%d,num_updates=%d,num_deletes=%d,time_spent=%d" %\
(self.num_inserts, self.num_updates, self.num_deletes, self.time_spent)
self.log.close()
def validate_msg(self, msg_prefix, msg, msg_length, msg_checksum, idx):
prefix_match = msg_prefix == msg[0:255]
checksum = sha1(msg)
checksum_match = checksum == msg_checksum
len_match = len(msg) == msg_length
if not prefix_match or not checksum_match or not len_match:
errmsg = "id(%d), length(%s,%d,%d), checksum(%s,%s,%s) prefix(%s,%s,%s)" % (
idx,
len_match, len(msg), msg_length,
checksum_match, checksum, msg_checksum,
prefix_match, msg_prefix, msg[0:255])
print >> self.log, errmsg
cursor = self.con.cursor()
cursor.execute("INSERT INTO errors VALUES('%s')" % errmsg)
cursor.execute("COMMIT")
raise Exception('validate_msg failed')
else:
print >> self.log, "Validated for length(%d) and id(%d)" % (msg_length, idx)
# Check to see if the idx is in the first column of res_array
def check_exists(self, res_array, idx):
for res in res_array:
if res[0] == idx:
return True
return False
def run(self):
try:
self.runme()
print >> self.log, "ok, with do_blob %s" % self.do_blob
except Exception, e:
try:
cursor = self.con.cursor()
cursor.execute("INSERT INTO errors VALUES('%s')" % e)
cursor.execute("COMMIT")
except MySQLdb.Error, e2:
print >> self.log, "caught while inserting error (%s)" % e2
print >> self.log, "caught (%s)" % e
finally:
self.finish()
def runme(self):
self.start_time = time.time()
cur = self.con.cursor()
print >> self.log, "thread %d started, run from %d to %d" % (
self.xid, self.loop_num, self.num_xactions)
while not self.num_xactions or (self.loop_num < self.num_xactions):
idx = self.rand.randint(0, self.max_id)
insert_or_update = self.rand.randint(0, 3)
self.loop_num += 1
# Randomly toggle innodb_prefix_index_cluster_optimization 5% of the time
if self.rand.randint(0, 20) == 0:
cur.execute("SET GLOBAL innodb_prefix_index_cluster_optimization=1-@@innodb_prefix_index_cluster_optimization")
try:
stmt = None
msg = get_msg(self.do_blob, idx)
# Query primary key 70%, secondary key lookup 20%, secondary key only 10%
r = self.rand.randint(1, 10)
if r <= 7:
cur.execute("SELECT msg_prefix,msg,msg_length,msg_checksum FROM t1 WHERE id=%d" % idx)
res = cur.fetchone()
self.num_primary_select += 1
elif r <= 9:
cur.execute("SELECT msg_prefix,msg,msg_length,msg_checksum FROM t1 WHERE msg_prefix='%s'" % msg[0:255])
res = cur.fetchone()
self.num_secondary_select += 1
# Query only the secondary index
else:
cur.execute("SELECT id, msg_prefix FROM t1 WHERE msg_prefix='%s'" % msg[0:255])
res = cur.fetchall()
self.num_secondary_only_select += 1
# have to continue to next iteration since we arn't fetching other data
continue
if res:
self.validate_msg(res[0], res[1], res[2], res[3], idx)
insert_with_index = False
if insert_or_update:
if res:
if self.rand.randint(0, 1):
stmt = get_update(msg, idx)
else:
stmt = get_insert_on_dup(msg, idx)
insert_with_index = True
self.num_updates += 1
else:
r = self.rand.randint(0, 2)
if r == 0:
stmt = get_insert(msg, idx)
insert_with_index = True
elif r == 1:
stmt = get_insert_on_dup(msg, idx)
insert_with_index = True
else:
stmt = get_insert_null(msg)
self.num_inserts += 1
else:
stmt = "DELETE FROM t1 WHERE id=%d" % idx
self.num_deletes += 1
query_result = cur.execute(stmt)
# 10% probability of checking to see the key exists in secondary index
if self.secondary_checks and self.rand.randint(1, 10) == 1:
cur.execute("SELECT id, msg_prefix FROM t1 WHERE msg_prefix='%s'" % msg[0:255])
res_array = cur.fetchall()
if insert_or_update:
if insert_with_index:
if not self.check_exists(res_array, idx):
print >> self.log, "Error: Inserted row doesn't exist in secondary index"
raise Exception("Error: Inserted row doesn't exist in secondary index")
else:
if self.check_exists(res_array, idx):
print >> self.log, "Error: Deleted row still exists in secondary index"
raise Exception("Error: Deleted row still exists in secondary index")
if (self.loop_num % 100) == 0:
print >> self.log, "Thread %d loop_num %d: result %d: %s" % (self.xid,
self.loop_num, query_result,
stmt)
# 30% commit, 10% rollback, 60% don't end the trx
r = self.rand.randint(1,10)
if r < 4:
self.con.commit()
elif r == 4:
self.con.rollback()
except MySQLdb.Error, e:
if e.args[0] == 2006: # server is killed
print >> self.log, "mysqld down, transaction %d" % self.xid
return
else:
print >> self.log, "mysql error for stmt(%s) %s" % (stmt, e)
try:
self.con.commit()
except Exception, e:
print >> self.log, "commit error %s" % e
if __name__ == '__main__':
global LG_TMP_DIR
pid_file = sys.argv[1]
kill_db_after = int(sys.argv[2])
num_records_before = int(sys.argv[3])
num_workers = int(sys.argv[4])
num_xactions_per_worker = int(sys.argv[5])
user = sys.argv[6]
host = sys.argv[7]
port = int(sys.argv[8])
db = sys.argv[9]
do_blob = int(sys.argv[10])
max_id = int(sys.argv[11])
LG_TMP_DIR = sys.argv[12]
fake_changes = int(sys.argv[13])
checksum = int(sys.argv[14])
secondary_checks = int(sys.argv[15])
checksum_worker = None
workers = []
server_pid = int(open(pid_file).read())
log = open('/%s/main.log' % LG_TMP_DIR, 'a')
# print "kill_db_after = ",kill_db_after," num_records_before = ", \
#num_records_before, " num_workers= ",num_workers, "num_xactions_per_worker =",\
#num_xactions_per_worker, "user = ",user, "host =", host,"port = ",port,\
#" db = ", db, " server_pid = ", server_pid
if num_records_before:
print >> log, "populate table do_blob is %d" % do_blob
con = MySQLdb.connect(user=user, host=host, port=port, db=db)
if not populate_table(con, num_records_before, do_blob, log):
sys.exit(1)
con.close()
if checksum:
print >> log, "start the checksum thread"
checksum_worker = ChecksumWorker(MySQLdb.connect(user=user, host=host, port=port, db=db), checksum)
workers.append(checksum_worker)
print >> log, "start %d threads" % num_workers
for i in xrange(num_workers):
worker = Worker(num_xactions_per_worker, i,
MySQLdb.connect(user=user, host=host, port=port, db=db),
server_pid, do_blob, max_id, fake_changes, secondary_checks)
workers.append(worker)
if kill_db_after:
print >> log, "kill mysqld"
time.sleep(kill_db_after)
os.kill(server_pid, signal.SIGKILL)
print >> log, "wait for threads"
for w in workers:
w.join()
if checksum_worker and checksum_worker.checksum != checksum:
print >> log, "checksums do not match. given checksum=%d, calculated checksum=%d" % (checksum, checksum_worker.checksum)
sys.exit(1)
print >> log, "all threads done"