千家信息网

重写慢日志解析程序,实现打印慢SQL信息及其所属数据库

发表于:2025-01-21 作者:千家信息网编辑
千家信息网最后更新 2025年01月21日,分组自研的审计平台最近推出慢SQL优化的功能,topN慢SQL可以通过mysqldumpslow拿到,但由于mysqldumpslow输出的信息不包含数据库,这让程序后续的自动优化变得有些棘手。在观察
千家信息网最后更新 2025年01月21日重写慢日志解析程序,实现打印慢SQL信息及其所属数据库

分组自研的审计平台最近推出慢SQL优化的功能,topN慢SQL可以通过mysqldumpslow拿到,但由于mysqldumpslow输出的信息不包含数据库,这让程序后续的自动优化变得有些棘手。在观察了MySQL慢日志结构后,决定自己写一个python解析程序,返回的结果比mysqldumpslow解析结果上多出数据库名称这一列:

Count: 15  Time=0.002961s (0.034505s)  Lock=8.8e-05s (0.000767s)  Rows=446 (6690), dbmgr[dbmgr]@10.33.46.179  mysql  --SQL所属数据库  SHOW GLOBAL VARIABLES;

python版本:2.7
文件名称:slowParse.py --目前仅支持按query time取topN
代码内容:

import osimport sysdef get_sql(slowlog, topN):    #Slow log 所在目录,请自行替换        f1 = open("/var/mysql/data3306/" + slowlog, "r")    flag1 = 1    flag2 = 0    sqltext = ""    slow_sql_all = {}    sql_info = []    queryTime_list = []    locksTime_list = []    rows_list = []    logonInfo_list = []    db = "None"    rownum = 0    while 1:        line = f1.readline()        rownum += 1        if not line:            break        elif "use " in line and len(line) < 30:            db = getDB(line.strip())        elif "# User@Host" in line and flag1 == 1:            flag2 = 1            userAndHost = getUserAndHost(line.strip())            logonInfo = userAndHost[0] + "[" + userAndHost[0] + "]@" + userAndHost[1]            logonInfo_list.append(logonInfo)        elif "# Query_time" in line and flag1 == 1 and flag2 == 1:            execInfo = getExecInfo(line.strip())            queryTime_list.append(execInfo[0])            locksTime_list.append(execInfo[1])            rows_list.append(execInfo[2])        elif ";" in line and "SET timestamp" not in line and flag1 == 1 and flag2 == 1:            flag2 = 0            sqltext = sqltext + line.strip()            if slow_sql_all.has_key(sqltext):                tmp = slow_sql_all[sqltext]                tmp[0].append(queryTime_list[0])                tmp[1].append(locksTime_list[0])                tmp[2].append(rows_list[0])                tmp[3].append(logonInfo_list[0])            else:                # count = 1                # sql_info.append(count)                sql_info.append(queryTime_list)                sql_info.append(locksTime_list)                sql_info.append(rows_list)                sql_info.append(logonInfo_list)                sql_info.append(db)                slow_sql_all[sqltext] = sql_info            queryTime_list = []            locksTime_list = []            rows_list = []            logonInfo_list = []            sqltext = ""            sql_info = []        elif flag1 == 1 and flag2 == 1 and "# User@Host" not in line and "# Query_time" not in line and "# Time" not in line and "SET timestamp" not in line:            sqltext = sqltext + line.strip() + " "    f1.close()    sqlCombined = {}    sqlTmp = {}    for i in slow_sql_all:        # print i,slow_sql_all[i]        count = len(slow_sql_all[i][0])        totalQueryTime = 0        totalLocksTime = 0        totalRows = 0        for j in slow_sql_all[i][0]:            totalQueryTime += float(j)        maxQueryTime = float(max(slow_sql_all[i][0]))        for k in slow_sql_all[i][1]:            totalLocksTime += float(k)        maxLocksTime = float(max(slow_sql_all[i][1]))        for l in slow_sql_all[i][2]:            totalRows += int(l)        maxRows = int(max(slow_sql_all[i][2]))        logonInfo = slow_sql_all[i][3][0]        db = slow_sql_all[i][4]        sqlCombined[i] = (            count, maxQueryTime, totalQueryTime, maxLocksTime, totalLocksTime, maxRows, totalRows, logonInfo, db)        sqlTmp[i] = maxQueryTime    sqlTopN = sorted(sqlTmp.items(), key=lambda x: x[1], reverse=True)[:topN]    #请自行替换生成文件的所在目录        f2 = open("/var/mysql/data3306/" + slowlog[:-4] + "-top" + str(topN) + ".txt", "w")    for i in sqlTopN:        sqltext = i[0]        count_str = "Count: " + str(sqlCombined[sqltext][0])        queryTime_str = "Time=" + str(sqlCombined[sqltext][1]) + "s (" + str(sqlCombined[sqltext][2]) + "s)"        locksTime_str = "Lock=" + str(sqlCombined[sqltext][3]) + "s (" + str(sqlCombined[sqltext][4]) + "s)"        rows_str = "Rows=" + str(sqlCombined[sqltext][5]) + " (" + str(sqlCombined[sqltext][6]) + "),"        logonInfo_str = sqlCombined[sqltext][7]        db_str = sqlCombined[sqltext][8]        f2.write(            count_str + "  " + queryTime_str + "  " + locksTime_str + "  " + rows_str + " " + logonInfo_str + "  " + db_str + "\n  " + sqltext + "\n")    f2.close()def getDB(line):    info = line.split(" ")    db = info[1][:-1]    return dbdef getUserAndHost(line):    info_list = line.split(" ")    User = info_list[2].split("[")[0]    idx = info_list.index("@")    hostInfo = info_list[idx + 2]    if hostInfo == "[]":        Host = "localhost"    else:        Host = hostInfo[1:-1]    return User, Hostdef getExecInfo(line):    info_list = line.split("  ")    Query_time = info_list[0].split(" ")[2]    Lock_time = info_list[1].split(" ")[1]    Rows_sent = info_list[1].split(" ")[3]    return Query_time, Lock_time, Rows_sentif __name__ == '__main__':    filename = str(sys.argv[1])    topN = int(sys.argv[2])    get_sql(filename, topN)

使用:

python slowParse.py slow.log 5        --取top 5
0