线上删除mysql数据脚本

  1. 云栖社区>
  2. 博客>
  3. 正文

线上删除mysql数据脚本

余二五 2017-11-16 22:59:00 浏览764
展开阅读全文

一、需求

    线上有几个实例,经常磁盘告警,之前每次人工删除,效率实在不高。

二、解决方法

   登录跳板机执行脚本删除,不过在删除前,需要导出binary log日志和库表的的数据,最后把备份的传数据传到hdfs。

三、脚本

  1. 读取配置文件

 1.1 mysql.conf,哪些实例,哪些表名

1
2
3
4
5
6
7
MYSQL_HOME=/usr/local/mysql/bin/mysql
INCLUDE_MYSQL=(mysql4:5507 mysql5:5508 mysql6:5509 mysql7:5510 mysql7:5511)
EXCLUDE_DB_GA10=(dc_15 dc_17 dc_44 dc_49 dc_88 dc_183 dc_279 dc_490 dc_624 dc_643 dc_903 dc_906 dc_908 dc_954 dc_1099 dc_1100 dc_1167 dc_1214 dc_1463 dc_1464 dc_2444 dc_2445 dc_2695)
APPID_REG=[0-9A-Za-z]{32,33}
DELETE_TABLE_GA10=(dc_datacenter_cache dc_day_report_cache_basic dc_day_report_cache_income dc_day_report_cache_cumu dc_day_report_cache_channel ${APPID_REG_REG}_dc_distributed_everyday front_${APPID_REG}_dc_everyday2 ${APPID_REG}_dc_everyhour ${APPID_REG}_dc_everyday ${APPID_REG}_dc_distributed_everyday2 ${APPID_REG}_dc_distributedweek_everyweek2 ${APPID_REG}_dc_distributedmonth_everymonth2 ${APPID_REG}_dc_pay_distributed_everyday ${APPID_REG}_dc_custom_retain_by_day front_${APPID_REG}_dc_custom_retain_by_day ${APPID_REG}_dc_distributedmonth_everymonth2 ${APPID_REG}_dc_equipment_distributed_by_day ${APPID_REG}_dc_fp_distributed_everyday ${APPID_REG}_dc_event_by_day ${APPID_REG}_dc_event_attr_by_day ${APPID_REG}_dc_error_report_detail ${APPID_REG}_dc_error_report_dist_hour ${APPID_REG}_dc_player_30dayvalue front_${APPID_REG}_dc_player_30dayvalue ${APPID_REG}_dc_player_30day_arpu front_${APPID_REG}_dc_player_30day_arpu ${APPID_REG}_dc_whale_player_day ${APPID_REG}_dc_uid_retain_by_day front_${APPID_REG}_dc_uid_retain_by_day ${APPID_REG}_dc_task_everyday ${APPID_REG}_dc_level_everyday ${APPID_REG}_dc_app_ring ${APPID_REG}_dc_error_report_dist_sys ${APPID_REG}_dc_error_report_detail_sys ${APPID_REG}_dc_error_report_dist_user ${APPID_REG}_dc_error_report_detail_user ${APPID_REG}_dc_rollserver_player_by_day ${APPID_REG}_dc_rollserver_income_by_day ${APPID_REG}_dc_rollserver_retain front_${APPID_REG}_dc_rollserver_retain ${APPID_REG}_dc_tag_everyday ${APPID_REG}_dc_tag_level_outflow ${APPID_REG}_dc_tag_retain_by_day ${APPID_REG}_dc_everyweek ${APPID_REG}_dc_everymonth ${APPID_REG}_dc_distributed_everyweek ${APPID_REG}_dc_distributed_everyweek2 ${APPID_REG}_dc_distributed_everymonth ${APPID_REG}_dc_distributed_everymonth2 ${APPID_REG}_dc_custom_retain_by_week ${APPID_REG}_dc_custom_retain_by_month front_${APPID_REG}_dc_custom_retain_by_week front_${APPID_REG}_dc_custom_retain_by_month)
 
EXPIRE_LOGS_DAYS=21

 1.2 backup.conf

1
2
3
4
JAVA_HOME=/usr/java/jdk1.7.0_25
LOCAL_FILES=(/home/yaolihong/yao/backup/bak.tar.gz)
BUSINESS_TYPE=ga
CUSTOM_SUB_DIR=mysql

2.backup_tools.sh脚本

   主要将备份压缩好的数据发往到hdfs,这个会在clean_data0.sh和clean_data1.sh最后调用到

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/bin/sh
#set -x
 
#check user
WHO_AM_I=`whoami`
if "hadoop" == $WHO_AM_I ]; then
        echo "Error: you should not use hadoop user to back files, use another user instead please..."
        exit 1
fi
 
shellLocation=`dirname $0`
shellLocation=`cd "$shellLocation" ; pwd`
. $shellLocation/conf/backup.conf
HADOOP_CMD="$shellLocation/bin/hadoop-1.0.4/bin/hadoop"
 
#check if empty
REQUIED_PARAMS=(JAVA_HOME LOCAL_FILES BUSINESS_TYPE CUSTOM_SUB_DIR)
for PARAM in ${REQUIED_PARAMS[@]}
do
        if "X${!PARAM}" "X" ]; then
                echo "Error: ${PARAM} is not set..."
                exit 1
        fi
done
export JAVA_HOME
 
CURRENT_TIME=`date +%Y%m%d-%H_%M_%S`
CURRENT_LOG_FILE=$shellLocation/logs/backup.$CURRENT_TIME.log
 
#get time
CURRENT_YEAR=`date +%Y -d '1 hours ago'`
CURRENT_MONTH=`date +%m -d '1 hours ago'`
CURRENT_DAY=`date +%d -d '1 hours ago'`
LAST_HOUR=`date +%H -d '1 hours ago'`
HOUR_STR_FOR_HDFS="$CURRENT_YEAR/$CURRENT_MONTH/$CURRENT_DAY/$LAST_HOUR"
 
BACKUP_BASIC_PATH="/backup/$BUSINESS_TYPE/$CUSTOM_SUB_DIR"
BACKUP_DAY_PATH="$BACKUP_BASIC_PATH/$CURRENT_YEAR/$CURRENT_MONTH/$CURRENT_DAY"
 
# 判断是否有传入路径,有则以传入的路径为准
if [ $# = 1 ]; then
    BACKUP_DAY_PATH="/backup/$BUSINESS_TYPE/$CUSTOM_SUB_DIR/$1"
fi
 
#check if hdfs dir exist and mkdir
$HADOOP_CMD fs -test -e $BACKUP_DAY_PATH
if [ $? -ne 0 ]; then
        $HADOOP_CMD fs -mkdir $BACKUP_DAY_PATH >> $CURRENT_LOG_FILE 2>&1
fi
HOST_NAME=`/bin/hostname`
 
IS_ANY_FAILED="N"
for LOCAL_FILE in ${LOCAL_FILES[@]}
do
        #check if local file exist
        if [ ! -f $LOCAL_FILE ]; then
                IS_ANY_FAILED="Y"
                echo "Error: Local file not exist : $LOCAL_FILE" >> $CURRENT_LOG_FILE
                continue
        fi
 
        #put local file to hdfs
        LOCL_FILE_NAME=`/bin/basename $LOCAL_FILE`
        HDFS_FILE_NAME=${LOCL_FILE_NAME}_${HOST_NAME}_${CURRENT_TIME}
        $HADOOP_CMD fs -put $LOCAL_FILE $BACKUP_DAY_PATH/$HDFS_FILE_NAME >> $CURRENT_LOG_FILE 2>&1
 
        #if put failed, we should not touch a done file
        if [ $? -ne 0 ]; then
                IS_ANY_FAILED="Y"
                echo "Error: put file to hdfs failed : $LOCAL_FILE" >> $CURRENT_LOG_FILE
                continue
        fi
done
 
#delete log 10 days ago
find $shellLocation/logs/ -mtime +10 -delete
 
if [ $IS_ANY_FAILED = "Y" ]; then
        exit 1
fi
#set +x


  

3.清除数据库脚本clean_data0.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash
 
# 加载配置文件
source ./conf/mysql.conf
source ./conf/backup.conf
SHELLLOCATION=`dirname $0`
SHELLLOCATION=`cd "${SHELLLOCATION}" ; pwd`
HADOOP_CMD="${SHELLLOCATION}/bin/hadoop-1.0.4/bin/hadoop"
 
 
echo "选择要清理的数据库实例:"  
select MYSQL in ${INCLUDE_MYSQL[@]};
do
    break
done
echo "您选择要清理的实例是:$MYSQL"
 
MYSQL_HOST=`echo $MYSQL | awk -F':' '{print $1}'`
MYSQL_PORT=`echo $MYSQL | awk -F':' '{print $2}'`
 
read -p "数据库账号:" MYSQL_USER
read -s -p "数据库密码:" MYSQL_PWD
 
TODAY=`date +"%Y%m%d"`
if [ ! -d ./logs/${MYSQL_HOST}/${MYSQL_PORT} ]; then
    mkdir -p ./logs/${MYSQL_HOST}/${MYSQL_PORT}
fi
 
# 定义一个查询数据库的方法
query_mysql()
{
    mysql -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} --default-character-set=utf8 -N -e "$*" | sed "s/^//;s/$//"
}
 
# 定义一个打印日志的方法
printlog()
{
    echo -e $*
    echo -e $* >> ./logs/${MYSQL_HOST}/${MYSQL_PORT}/mysql.${TODAY}.log
}
 
# 定义一个打印删除记录日志的方法
print_delete_log()
{
    echo -e $*
    echo -e $* >> ./logs/${MYSQL_HOST}/${MYSQL_PORT}/delete.${TODAY}.log
}
 
DB_SET=`query_mysql "show databases" | egrep -E "dc_[0-9]|ga_[0-9]"`
FAIL_BAK_DB=()
for DB in ${DB_SET}; do
    # 如果是游戏分析1.0的数据库并且该数据库不是需要过滤的数据库,则开始清理
    if [[ "${DB}" =~ ^dc_[0-9]*$ && ! " ${EXCLUDE_DB_GA10[@]} " =~ " ${DB} " ]]; then
        DIR=data/${MYSQL_HOST}/${MYSQL_PORT}/${DB}/${TODAY}
        if [ ! -d ./backup/${DIR} ]; then
            mkdir -p ./backup/${DIR}
        printlog "开始扫描$DB"
        WAIT_DELETE_TABLE=()
        for TABLE in `query_mysql "use ${DB}; show tables"`; do
            # 遍历需要清理的表
            for INCLUDE_TABLE in ${DELETE_TABLE_GA10[@]}; do
                # 把appid替换成正则表达式
                INCLUDE_TABLE=`echo ${INCLUDE_TABLE} | sed 's/^/\^/;s/$/\$/'`
                if [[ ${TABLE} =~ ${INCLUDE_TABLE} ]]; then
                    TOTAL_ROW=`query_mysql "select count(*) from ${DB}.${TABLE}"`
                    DELETE_ROW=0
                    DELETE_COLUMN=""
                    # 如果是日表
                    if [[ ! `query_mysql "desc ${DB}.${TABLE} StatiTime"` == "" ]]; then
                        DELETE_COLUMN="StatiTime"
                        DELETE_ROW=`query_mysql "select count(*) from ${DB}.${TABLE} where StatiTime < unix_timestamp(date_add(now(), interval -6 month))"`
 
                    #如果是周表或者月表
                    elif [[  ! `query_mysql "desc ${DB}.${TABLE} EndDate"` == "" ]]; then
                        DELETE_COLUMN="EndDate"
                        DELETE_ROW=`query_mysql "select count(*) from ${DB}.${TABLE} where EndDate < unix_timestamp(date_add(now(), interval -6 month))"`
                    fi
 
                    if [ ${DELETE_ROW} -gt 0 ]; then
                        WAIT_DELETE_TABLE+=(${TABLE}.${DELETE_COLUMN})
                        printlog "${DB}.${TABLE} \t ${TOTAL_ROW} \t ${DELETE_ROW}"
                    fi
 
                    TOTAL_ROW=0
                    DELETE_ROW=0
                    DELETE_COLUMN=""
                    break
                fi
            done
        done
 
        if [ ${#WAIT_DELETE_TABLE[@]} -gt 0 ]; then
            read -p "是否清理历史数据:[Yes/No]" SURE
            if [ ${SURE} = "Yes" ]; then
                print_delete_log "开始清除${DB}"
                for WAIT_DELETE in ${WAIT_DELETE_TABLE[@]}; do
                    DEL_TABLE=`echo ${WAIT_DELETE} | awk -F'.' '{print $1}'`
                    DEL_COLUMN=`echo ${WAIT_DELETE} | awk -F'.' '{print $2}'`
                    mysqldump -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} ${DB} ${DEL_TABLE} -w"${DEL_COLUMN} < unix_timestamp(date_add(now(), interval -6 month))" > ./backup/${DIR}/${DEL_TABLE}.sql
                    DEL_ROW=`query_mysql "set sql_log_bin=0; delete from ${DB}.${DEL_TABLE} where ${DEL_COLUMN} < unix_timestamp(date_add(now(), interval -6 month))"`
                    print_delete_log "${DEL_TABLE} \t ${DEL_ROW}"
                done
 
                echo "开始打包导出的sql文件!"
                tar -zcf ./backup/bak.tar.gz ./backup/${DIR}/*.sql
                echo "开始发送打包文件到hdfs,可能需要几分钟,请耐心等候!"
                sh backup_tools.sh "${DIR}"
                if [[ $? -ne 0 || `${HADOOP_CMD} fs -ls /backup/${BUSINESS_TYPE}/${CUSTOM_SUB_DIR}/${DIR}/ | grep bak.tar.gz` = "" ]]; then
                    print_delete_log "==========${DB}备份失败,请手动备份!=========="
                    FAIL_BAK_DB+=(${MYSQL}.${DB})
                fi
            fi
        fi
        print_delete_log "==========备份失败的数据库有:${FAIL_BAK_DB[@]}=========="
        unset WAIT_DELETE_TABLE
    fi
done

3.清除日志脚本clean_data1.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
# 加载配置文件
source ./conf/mysql.conf
source ./conf/backup.conf
SHELLLOCATION=`dirname $0`
SHELLLOCATION=`cd "${SHELLLOCATION}" ; pwd`
HADOOP_CMD="${SHELLLOCATION}/bin/hadoop-1.0.4/bin/hadoop"
 
echo "选择要清理的数据库实例:"  
select MYSQL in ${INCLUDE_MYSQL[@]};
do
    break
done
echo "您选择要清理的实例是:${MYSQL}"
 
MYSQL_HOST=`echo ${MYSQL} | awk -F':' '{print $1}'`
MYSQL_PORT=`echo ${MYSQL} | awk -F':' '{print $2}'`
 
read -p "主机账号:" USER
read -s -p "主机密码:" PWD
 
TARGET_DAY=`date -d"-${EXPIRE_LOGS_DAYS} day" +%Y-%m-%d`
MTIME=`expr ${EXPIRE_LOGS_DAYS} - 1`
TODAY=`date +%Y%m%d`
DIR=binlog/${MYSQL_HOST}/${MYSQL_PORT}/${TODAY}
if [ ! -d ./backup/${DIR} ]; then
    mkdir -p ./backup/${DIR}
fi
rm -f ~/.ssh/known_hosts
for LOG in `expect << EOF
    spawn ssh ${USER}@${MYSQL_HOST} "find /data1/mysql/log${MYSQL_PORT}/mysql-bin.* -mtime +${MTIME} | xargs ls --full-time | grep -v ${TARGET_DAY}"
    expect "no)?"
    send "yes\r"
    expect "password:"
    send "${PWD}\r"
    set timeout -1
    expect eof
EOF`; do
if [[ `echo ${LOG} | egrep -E "mysql-bin\.[0-9]{1,}"` != "" ]]; then
rm -f ~/.ssh/known_hosts
expect << EOF
    spawn scp ${USER}@${MYSQL_HOST}:${LOG} backup/${DIR}
    expect "no)?"
    send "yes\r"
    expect "password:"
    send "${PWD}\r"
    set timeout -1
    expect eof
EOF
fi
done
 
echo "开始打包导出的sql文件!"
tar -zcf ./backup/bak.tar.gz ./backup/${DIR}/mysql-bin.*
echo "开始发送打包文件到hdfs,可能需要几分钟,请耐心等候!"
sh backup_tools.sh "${DIR}"
if [[ $? -ne 0 || `${HADOOP_CMD} fs -ls /backup/${BUSINESS_TYPE}/${CUSTOM_SUB_DIR}/${DIR}/ | grep bak.tar.gz` = "" ]]; then
    echo "==========备份失败,请手动备份!=========="
else
    read -p "数据库账号:" MYSQL_USER
    read -s -p "数据库密码:" MYSQL_PWD
    mysql -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} -e "PURGE BINARY LOGS BEFORE CURRENT_DATE -INTERVAL ${EXPIRE_LOGS_DAYS} DAY"
    echo "备份成功"
fi









本文转自 zouqingyun 51CTO博客,原文链接:http://blog.51cto.com/zouqingyun/1739972,如需转载请自行联系原作者

网友评论

登录后评论
0/500
评论
余二五
+ 关注