1 Star 0 Fork 0

DataManagement / Relation-autotuning

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
mysql_data_collection.py 12.51 KB
一键复制 编辑 原始数据 按行查看 历史
Whn 提交于 2019-09-30 19:36 . First commit
#!/usr/bin/env python
#coding=utf-8
import pymysql
import os
import numpy as np
import pandas as pd
import time
import json
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
class mysql(object):
def __init__(self):
# tywan:修改处
self.host = '10.104.165.38'
self.user = 'root'
self.port = 3306
self.passwd = 'admin'
self.charset = 'utf8'
self.db = 'tpcc1'
#self.sudoPassword = '123315'
self.conn = None
# 默认配置
self.innodb_buffer_pool_size = '128M' # 可以通过set global innodb_buffer_pool_size=1024*1024*8设置
# {scope:'global', type:int, default:128M, min:5M, max:2^64-1}
self.innodb_thread_concurrency = '0' # 可以通过set global innodb_thread_concurrency=1024*1024*8设置
# {scope:'global', type:int, default:0, min:0, max:1000}
self.innodb_log_file_size = '48M' # 不可以通过set global,需要写配置文件重启
# {scope:'global', type:int, default:48M, min:4M, max:512GB/innodb_log_files_in_group=2~100}
self.thread_cache_size = '9' # 可以通过set global thread_cache_size=1024*1024*8设置
# {scope:'global', type:int, default:-1, min:0, max:16384}
self.innodb_buffer_pool_instances = '1' # 不可以通过set global,需要写配置文件重启
# {scope:'global', type:int, default:1, min:1, max:64}
# self.innodb_flush_method = 'O_DIRECT'
# variables配置查询:https://dev.mysql.com/doc/refman/8.0/en/server-option-variable-reference.html
# 获取数据
self.X_set = []
self.X = []
self.y = []
self.init = [128, 0, 5, 9, 1]
def restart_mysql(self):
'''
更改参数写入文件,并重启数据库
'''
if self.conn is not None:
self.conn.close()
# 更改参数
self.change_arg()
#sudoPassword = self.sudoPassword
#command = 'sudo service mysql restart'
command_start = 'kubectl create -f ../../mysql/hit-mysql.yaml '
command_stop = 'kubectl delete -f ../../mysql/hit-mysql.yaml '
#p = os.system('echo %s|sudo -S %s' % (sudoPassword, command))
p_stop = os.system(command_stop)
#是否停止
while p_stop:
p_stop = os.system(command_stop)
time.sleep(2)
p_start = os.system(command_start)
#是否开始
while p_start:
p_start = os.system(command_start)
time.sleep(2)
command_connect = 'mysql -h10.104.165.38 -uroot -P3306 -padmin -e"show databases"'
p_connect = os.system(command_connect)
#是否可连接
while p_connect:
p_connect = os.system(command_connect)
time.sleep(2)
def fetch_time(self, sql, times=1):
'''
获取执行某调sql语句执行时间
'''
if self.conn is None:
self.conn = pymysql.connect(host=self.host, port=self.port, user=self.user,
passwd=self.passwd, db=self.db, charset=self.charset)
# 创建游标
cursor = self.conn.cursor()
cursor.execute('show status like \'uptime\'')
time1 = int(cursor.fetchall()[0][1])
## add something here
for i in range(times):
cursor.execute(sql)
##
cursor.execute('show status like \'uptime\'')
time2 = int(cursor.fetchall()[0][1])
# 关闭游标
cursor.close()
self.conn.close()
self.conn = None
return time2 - time1
def change_arg(self):
'''
将配置参数写入文件
'''
txt = '# Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.\n\
#\n\
# This program is free software; you can redistribute it and/or modify\n\
# it under the terms of the GNU General Public License as published by\n\
# the Free Software Foundation; version 2 of the License.\n\
#\n\
# This program is distributed in the hope that it will be useful,\n\
# but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
# GNU General Public License for more details.\n\
#\n\
# You should have received a copy of the GNU General Public License\n\
# along with this program; if not, write to the Free Software\n\
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\n\
\n\
#\n\
# The MySQL Server configuration file.\n\
#\n\
# For explanations see\n\
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html\n\
\n\
[mysqld]\n\
pid-file = /var/run/mysqld/mysqld.pid\n\
socket = /var/run/mysqld/mysqld.sock\n\
datadir = /var/lib/mysql\n\
secure-file-priv= NULL\n\
\n\
#innodb_buffer_pool_size = 128M\n\
innodb_buffer_pool_size = {}\n\
#innodb_buffer_pool_instances = 1\n\
innodb_buffer_pool_instances = {}\n\
#innodb_log_file_size = 5M\n\
innodb_log_file_size = {}\n\
#innodb_flush_method = O_DIRECT\n\
innodb_flush_method = O_DIRECT\n\
#thread_cache_size = 9\n\
thread_cache_size = {}\n\
#innodb_thread_concurrency=0\n\
innodb_thread_concurrency={}\n\
\n\
# Disabling symbolic-links is recommended to prevent assorted security risks\n\
symbolic-links=0\n\
\n\
# Custom config should go here\n\
!includedir /etc/mysql/conf.d/'.format(self.innodb_buffer_pool_size,
self.innodb_buffer_pool_instances,
self.innodb_log_file_size,
self.thread_cache_size,
self.innodb_thread_concurrency)
#sudoPassword = self.sudoPassword
#command = 'sudo sh -c \"echo \'{}\' > /etc/mysql/my.cnf\"'.format(txt)
#p = os.system('echo %s|sudo -S %s' % (sudoPassword, command))
print('now setting the conf file: my.cnf!')
print(txt)
command = 'echo \'{}\' > ../conf/mysql/my.cnf'.format(txt)
p = os.system(command)
def fetch_tpms(self):
'''
获得吞吐率
'''
os.system('./tpcc_start -h{} -P{} -d{} -u{} -p{} -w1 -c4 -r30 -l120 >> res_second.txt'.format(self.host,
self.port,
self.db,
self.user,
self.passwd))
fp = open('res_second.txt')
lines = fp.readlines()
## add something
self.y.append(float(lines[-1].split()[0]))
def fetch_sql_arg(self):
'''
获得配置参数
'''
self.conn = pymysql.connect(host=self.host, port=self.port, user=self.user,
passwd=self.passwd, db=self.db, charset=self.charset)
print(self.conn)
temp = []
cursor = self.conn.cursor()
cursor.execute('show variables like \'innodb_buffer_pool_size\'')
#tmp = cursor.fetchall()
#print(tmp[0][1])
temp.append(int(cursor.fetchall()[0][1]) / 1024 / 1024)
cursor.execute('show variables like \'innodb_thread_concurrency\'')
#tmp= cursor.fetchall()
#print(tmp)
temp.append(int(cursor.fetchall()[0][1]))
cursor.execute('show variables like \'innodb_log_file_size\'')
#tmp=cursor.fetchall()
#print(tmp[0][1])
temp.append(int(cursor.fetchall()[0][1]) / 1024 / 1024)
cursor.execute('show variables like \'thread_cache_size\'')
#tmp=cursor.fetchall()
#print(tmp[0][1])
temp.append(int(cursor.fetchall()[0][1]))
cursor.execute('show variables like \'innodb_buffer_pool_instances\'')
#tmp=cursor.fetchall()
#print(tmp[0][1])
temp.append(int(cursor.fetchall()[0][1]))
self.X.append(temp)
self.X_set.append([self.innodb_buffer_pool_size,
self.innodb_thread_concurrency,
self.innodb_log_file_size,
self.thread_cache_size,
self.innodb_buffer_pool_instances])
cursor.close()
self.conn.close()
self.conn = None
def fetch_X_y(self):
for pool_size in [128, 1024, 1024*8]: # 最小是5M
for thread_concurrency in [8, 32]:
for log_file_size in [8, 32, 64]:
for thread_cache in [16, 32]:
for pool_instance in [2, 4]: # 只有pool_size超过1G,这个参数才会有效
# 设置参数
self.innodb_buffer_pool_size = '{}M'.format(pool_size)
self.innodb_thread_concurrency = '{}'.format(thread_concurrency)
self.innodb_log_file_size = '{}M'.format(log_file_size)
self.thread_cache_size = '{}'.format(thread_cache)
self.innodb_buffer_pool_instances = '{}'.format(pool_instance)
# 更改参数并重启数据库
self.restart_mysql()
# 获取配置参数
self.fetch_sql_arg()
# 保存配置参数到文件
txt = '{{innodb_buffer_pool_size:{},innodb_buffer_pool_instances:{},innodb_log_file_size:{},\
thread_cache_size:{},innodb_thread_concurrency={}}}\n'.format(self.innodb_buffer_pool_size,
self.innodb_buffer_pool_instances,
self.innodb_log_file_size,
self.innodb_thread_concurrency,
self.thread_cache_size)
# print txt
command = 'echo \'{}\' >> txt/config_json_second.txt'.format(txt)
#p = os.system('echo %s|sudo -S %s' % ('password', command))
p = os.system(command)
# 获取tpms
self.fetch_tpms()
def writetocsv(self, data_path):
data_df = pd.DataFrame(self.X, columns=['innodb_buffer_pool_size', 'innodb_thread_concurrency',
'innodb_log_file_size', 'thread_cache_size',
'innodb_buffer_pool_instances'])
data_df['performance_tpms'] = self.y
data_df.to_csv(data_path, index=False)
def read_argv_and_exec(self, dict_or_path=None):
'''
读入参数,重写my.cnf,重启数据库,执行tpcc,获取结果
input:
-dict_or_path:dict 非web交互,执行
-dict_or_path:str web交互,读入json文件
output:tpmc
'''
# 学弟调用:python mysql_data_collection.py -json_path
# 设置参数
# 如果读入json文件
if isinstance(dict_or_path, str):
with open(dict_or_path, 'r') as f:
line = f.readline()
json_line = json.loads(line)
# 如果读入dict
if isinstance(dict_or_path, dict):
json_line = dict_or_path
pool_size = json_line['innodb_buffer_pool_size']
thread_concurrency = json_line['innodb_thread_concurrency']
log_file_size = json_line['innodb_log_file_size']
thread_cache = json_line['thread_cache_size']
pool_instance = json_line['innodb_buffer_pool_instances']
self.innodb_buffer_pool_size = '{}M'.format(pool_size)
self.innodb_thread_concurrency = '{}'.format(thread_concurrency)
self.innodb_log_file_size = '{}M'.format(log_file_size)
self.thread_cache_size = '{}'.format(thread_cache)
self.innodb_buffer_pool_instances = '{}'.format(pool_instance)
# 更改参数并重启数据库
self.restart_mysql()
# 获取配置参数/X(实际设置值)/X_set(设置参数值):验证是否设置生效
self.fetch_sql_arg()
# 获取tpms
self.fetch_tpms()
if __name__ == '__main__':
sql = mysql()
# 获取训练数据
if len(sys.argv)==1:
sql.fetch_X_y()
sql.writetocsv('train_csv/mysql_data_second_2.csv')
# web交互:获取参数并执行,并将结果写入json文件
if len(sys.argv)==2:
sql.read_argv_and_exec(sys.argv[1])
# 设置配置X_set
# 查询配置X
# tpms执行结果y
print sql.X, sql.X_set, sql.y
fp = open('json/tpcc_conf_set_res.json', 'w')
s = json.dumps({'tpms':sql.y[0]})
fp.write(s)
fp.close()
1
https://gitee.com/HITMassiveData/Relation-autotuning.git
git@gitee.com:HITMassiveData/Relation-autotuning.git
HITMassiveData
Relation-autotuning
Relation-autotuning
master

搜索帮助