1.背景;java
系统运维过程当中会遇到各类场景须要编写运维脚本处理频繁人工处理工做,本文主要描述 系统运维过程实战中遇到场景针对的问题编写运维脚本。python
2.开机系统自动预设脚本;json
#1.开机机器初始化; #!/bin/bash #01.时间记录方法 function current_date(){ start_current_date="`date "+%Y-%m-%d %H:%M:%S"`" echo ${start_current_date} } #02.执行脚本路径日志 function init_logs(){ init_dir="/chj/logs/" log_file="/chj/logs/init.log" mkdir ${init_dir} -p touch ${log_file} if [ -f ${log_file} ] then echo ${log_file} else touch ${log_file} echo ${log_file} fi } #03.初始化磁盘判断云端磁盘或者本地磁盘. function init_disk(){ log=`init_logs` run_date=`current_date` echo "start mkfs.disk $run_date" >> $log if [ -b /dev/vdb ] then mkfs.xfs /dev/vdb mount /dev/vdb /chj chmod 777 /chj -R echo "/dev/vdb /chj xfs defaults 0 0 " >> /etc/fstab echo "disk mount success" >> $log elif [ -b /dev/sdb ] then mkfs.xfs /dev/sdb mount /dev/sdb /chj chmod 777 /chj -R echo "/dev/sdb /chj xfs defaults 0 0 " >> /etc/fstab echo "disk mount success" >> $log else echo "/dev/vdb nonentity " >> $log fi } #04.初始化环境目录 function project_dir(){ mkdir -p {/chj/data/log/,/chj/app/,/chj/devops/build,/chj/app/download,/chj/app/module,/chj/data/cache/,/chj/backup/build,/chj/certs/} chown work:work /chj/ -R chown work:work /chj/data/ -R create_log=`init_logs` runtime_date=`current_date` echo "mkdir dirctory sucess $runtime_date" >> $create_log } #05.dns 任务计划 function dnsCron(){ chmod +x /chj/devops/init_fluash.sh /usr/bin/bash /chj/devops/init_fluash.sh if ! grep init_fluash.sh /etc/cron.d/watchdog 1>/dev/null 2>&1 ;then echo '*/01 * * * * root /chj/devops/init_fluash.sh' >>/etc/cron.d/watchdog fi } #06.设置云端dns; function defind_bcc_dns(){ cat >/chj/devops/init_fluash.sh <<EOF #!/bin/bash if ! grep '172.20.240.28' /etc/resolv.conf >/dev/null;then echo 'options timeout:1 attempts:2 nameserver 172.21.242.30 nameserver 172.20.240.28 nameserver 172.21.242.49'> /etc/resolv.conf fi EOF #07.设置本地虚拟机dns function defind_vm_dns(){ cat >/chj/devops/init_fluash.sh <<EOF #!/bin/bash if ! grep '192.168.67.231' /etc/resolv.conf >/dev/null;then echo 'options timeout:1 attempts:2 nameserver 192.168.4.231 nameserver 192.168.67.231 nameserver 172.21.242.30'> /etc/resolv.conf fi EOF #08.判断dns function ifdns(){ hostname -i|grep "192.168." if [ $? -eq 0 ] then defind_vm_dns else defind_bcc_dns fi #09.备份dns; function bak_dns(){ edit_dns_log=`init_logs` dns_runtime=`current_date` dns_config_file="/etc/resolv.conf" if [ -f ${dns_config_file} ] then cp -rpf ${dns_config_file} ${dns_config_file}.bak else echo "dns 文件丢失 ${dns_runtime}" >> ${edit_dns_log} fi } #10.添加swap 内存; function add_swap(){ add_swap_log=`init_logs` add_swap_runtime=`current_date` echo "开始新增swap ${add_swap_runtime}" >> ${add_swap_log} ####新增内存swap 8G############## swap_status=`free -m |grep Swap |awk '{print $2}'` if [ "$swap_status" = "0" ] then dd if=/dev/zero of=/chj/.swap bs=1G count=8 mkswap /chj/.swap swapon /chj/.swap sed -i '$a/chj/.swap swap swap defaults 0 0' /etc/fstab echo "swap 添加完成 ${add_swap_runtime}" >> ${add_swap_log} else echo "swap 内存已经存在 ${add_swap_runtime}" >> ${add_swap_log} fi } #11.处理百云端自动重置ulimt文件打开数; function baidu_unlimit_file(){ sed -i "/65535/d" /etc/profile } #12.出现系统文件打开数 function system_limit(){ ###判断文件是否存在 存在替换内核参数 和 文件打开数 3000000 ############# ansibe_log=`init_logs` runtime_ansible=`current_date` file_limit="/etc/security/limits.conf" file_sysctl="/etc/sysctl.conf" if [ -f ${file_limits} ] && [ -f ${file_sysctl} ] then baidu_unlimit_file cp -rpf $file_limit /etc/security/limits.conf.${ansibe_runtime}.bak cp -rpf $file_sysctl /etc/sysctl.conf.${ansibe_runtime}.bak sed -i "s/1024/3000000/g" ${file_limit} sed -i "s/fs.nr_open = 1024/fs.nr_open = 3000000/g" ${file_sysctl} sed -i "s/fs.file-max = 1024/fs.file-max = 3000000/g" ${file_sysctl} echo "内核文件打开数 已经替换 ${runtime_ansible}" >> ${ansibe_log} else echo "内核文件和用户文件打开时不存在,${ansibe_runtime}" >> ${ansibe_log} fi #13.日志清理策略 function DelAppLOG(){ if ! grep delete /etc/cron.d/Dellog 1>/dev/null 2>&1 ;then echo '50 23 * * * root /bin/bash /chj/devops/del_log.sh >/dev/null 2>&1' > /etc/cron.d/Dellog fi } #14.日志清理测试脚本; cat >/chj/devops/del_log.sh <<EOF #!/bin/bash delogtime=`date "+%Y-%m-%d %T"` delDay="7" dirList=" /chj/data/log/ /chj/data/logs/ /data/logs/ /data/log/ /chj/logs/logdelrecord/ " logDir="/chj/logs/logdelrecord/logdelrecord-`date "+%Y%m%d"`.log" if [ ! -d /chj/logs/logdelrecord/ ]; then mkdir -p /chj/logs/logdelrecord/ fi for dir in $dirList do find $dir -mtime +${delDay} |grep -E '(201|202|203|204|205)'|grep log|xargs rm -fv |sed "s#^#${delogtime} : delete log record : #g" >>$logDir echo "${delogtime} : delete log record : clear ${line}" >>$logDir done EOF #15.添加内核参数######全部AF_INET类型socket的listen队列 function addkerner(){ log=`init_logs` runTime_date=`current_date` /bin/sed -i '$i net.core.somaxconn = 2048' /etc/sysctl.conf /sbin/sysctl -p echo "初始化最后执行时间:$runTime_date" >> $log } #16.设置系统ntp服务 function DevopsEditNtp(){ runTime_date=`current_date` CrontFile="/var/spool/cron/root" log=`init_logs` if [ -f ${CrontFile} ] then echo '*/5 * * * * /usr/sbin/ntpdate ntp.chj.cloud >/dev/null 2>&1' > ${CrontFile} else echo "ntp服务存在:$runTime_date" >> $log fi } #17.清理初始化检测文件 function cleanFirstRun(){ FirstRun_file="root/FirstRun" log=`init_logs` runTime_date=`current_date` if [ -f ${FirstRun_file} ] then /bin/rm -f ${FirstRun_file} echo "初始化最后执行时间:$runTime_date" >> $log fi } 18.#程序work用户密钥; function add_work(){ useradd work mkdir /home/work/.ssh wget http://ops.init.com:9090/tmp/work_authorized_keys -O /home/work/.ssh/authorized_keys #公钥路径 chown work:work /home/work/.ssh/ -R chmod 600 /home/work/.ssh/authorized_keys sed -i "s/Defaults requiretty/#Defaults requiretty/g" /etc/sudoers sed -i "s/# %wheel/%wheel/g" /etc/sudoers } 19.程序入口 function main(){ FirstRun_file="/root/FirstRun" ######判断模板是否存在初始化文件########## if [ -f ${FirstRun_file} ] then ###格式化磁盘 vdb ############ init_disk sleep 3 ###建立规划目录#### project_dir ####备份dns bak_dns ###判断dns###### ifdns ###新增swap 内存## add_swap ####系统limit ####### system_limit ###清理程序日志###### DelAppLOG ###添加ntp服务######## DevopsEditNtp ####add内核参数# addkerner ##清理系统初始化检查文件 cleanFirstRun ####添加系统work认证### add_work ######清理初始化脚本 rm -f /root/ops_init.sh else ###百度云自动重置dns##### ifdns start_current_date="`date "+%Y-%m-%d %H:%M:%S"`" echo "init bcc time $start_current_date" > /chj/logs/$start_current_date.log fi cleanFirstRun ifdns start_current_date="`date "+%Y-%m-%d %H:%M:%S"`" echo "init bcc time $start_current_date" > /chj/logs/$start_current_date.log cleanFirstRun rm -f /root/baidu_init.sh } main 19.系统开机rc.local 写入获取脚本地址和rc.local 执行权限; chmod +x /rc.d/rc.local cat /etc/rc.d/rc.local #!/bin/bash wget -o /var/log/wget.log -P /root http://ops.init.com:9090/boot/ops_init.sh cd /root chmod +x ops_init.sh bash ops_init.sh >> /var/log/init.log 2>&1 rm -f ops_init.sh
3.读取应用列表和写入开机启动脚本bash
#!/bin/bash #嫦娥发布脚本自动监控程序异常自动拉起分发脚本 #开启嫦娥监控monitor脚本 function startMonitorScript(){ appname=$1 cd /chj/app/$appname/bin/ && su work -c "nohup ./script/monitor.sh ${appname} >/dev/null 2>&1 &" } #获取程序app名称 function getAppName(){ appName=$(ls -lrt /chj/app/ |tail -1|awk '{print $9}'|tr -d "\r") startMonitorScript $appName } ###写入rc.local 文件内容 function fileCenton(){ file=$1 appName=$(ls -lrt /chj/app/ |tail -1|awk '{print $9}'|tr -d "\r") filepath="/chj/app/$appName/bin/console start" echo "source /etc/profile.d/java.sh su work -c '$filepath' " >> $file } ##判断rc.local 读写权限 function writeRcLocal(){ rcfile="/etc/rc.d/rc.local" if [ -x $rcfile ] then fileCenton $rcfile else chmod +x $rcfile fileCenton $fcfile fi } function main(){ getAppName writeRcLocal } main
4.Python 分发文件脚本到指定应用目录;app
import logging import time logging.basicConfig(filename='scriptRun.log', filemode="a+",level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) import requests,json,os import logging def getAppData(): """ 1.读取excel 内容进行判断环境ptest进行数据组装list返回 """ import xlrd try: workbook=xlrd.open_workbook(r'/chj/devops/SRE/file/app.xls') sheet_name = workbook.sheet_names() sheet = workbook.sheet_by_index(0) data=[] rows = sheet.row_values(0) for i in list(range(2,sheet.nrows)): machineInfo=sheet.row_values(i) if machineInfo[2] == "ptest": data.append(machineInfo) return data except Exception as e: msg = 'get date error' + str(e) logger.error(msg) def getPullFile(): """ 1.获取ansible指定机器目录下文件列表,经过列表方式返回. """ fileList=[] try: for root,dirs,files in os.walk(r"/chj/devops/SRE/ServicePullUp"): for file in files: fileList.append(os.path.join(root,file)) return fileList except Exception as e: msg = '推送文件获取失败' + str(e) logger.error(msg) def main(Data,FileName): """ 1.获取数据和ansible src源文件名进行数据组装,并记录日志. """ try: for d in Data: if FileName: for srcfile in FileName: publicAnsibleCmd(srcfile,d[0],d[1]) else: msg="获取文件列表失败,文件为空" logger.error(msg) break except Exception as e: msg = '获取数据失败' + str(e) logger.error(msg) def publicAnsibleCmd(filename,appname,ip): """ 1.ansible 命令方式拷贝文件到应用机器上 """ import os try: date=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()) Command='''ansible -i {Ip}, all -m copy -a 'src={srcFile} dest=/chj/app/{appDir}/bin/script/ owner="work" group="work" mode="0755" backup="yes"' -b '''.format(Ip=ip,srcFile=filename,appDir=appname) os.system(Command) logger.error(Command) print("结束拷贝文件" + date) except Exception as e: msg = 'ansible command error' + str(e) logger.error(msg) if __name__ == '__main__': main(getAppData(),getPullFile())