巡检项:
cpu使用率,内存使用率,文件系统使用率,日志文件大小,oracle后台日志(有无异常),oracle连接数使用情况,oracle表空间使用率,asm存储使用率,失效索引,rman备份日志(有无异常)
主巡检脚本:Oracheck.sh
[oracle@orcl scripts]$ cat oracheck.sh #!/bin/sh # Get Hostname DB_CONN_STR=system/oracle source /home/oracle/.bash_profile logfile=oracheck_`date +%y%m%d`.log hostinfodir=/home/oracle/oracheck DB_UNIQUE_NAME=orcl if [[ ! -d $hostinfodir ]] then mkdir -p $hostinfodir fi echo -e "==== Oracheck started at $(date +"%y-%m-%d %H:%M:%S") ===\n" >> $hostinfodir/$logfile echo -e "*** step1.CPU使用率检查 \n" >> $hostinfodir/$logfile sar 1 10 >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step2.内存使用率检查 \n" >> $hostinfodir/$logfile free -g >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step3.文件系统使用率检查 \n" >> $hostinfodir/$logfile df -h >>$hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step4.日志文件大小检查 \n" >> $hostinfodir/$logfile echo -e "trace目录(trc,trm,alert_sid):" >> $hostinfodir/$logfile du -sh $ORACLE_BASE/diag/rdbms/$DB_UNIQUE_NAME/$ORACLE_SID/trace >>$hostinfodir/$logfile echo -e "alert目录(log_*.xml):" >> $hostinfodir/$logfile du -sh $ORACLE_BASE/diag/rdbms/$DB_UNIQUE_NAME/$ORACLE_SID/alert >>$hostinfodir/$logfile echo -e "监听日志(listener.log):" >> $hostinfodir/$logfile du -sh $ORACLE_HOME/network/admin/listener.ora >>$hostinfodir/$logfile echo -e "审计目录(adump):" >> $hostinfodir/$logfile du -sh $ORACLE_BASE/admin/$DB_UNIQUE_NAME/adump >>$hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step5.oracle后台日志检查 \n" >> $hostinfodir/$logfile tail -3000 $ORACLE_BASE/diag/rdbms/$DB_UNIQUE_NAME/$ORACLE_SID/trace/alert_$ORACLE_SID.log |grep ORA- >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step6.oracle连接数检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/process_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step7.oracle表空间使用率检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/tbs_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step8.asm存储使用率检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/asm_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step9.失效索引检查 \n" >> $hostinfodir/$logfile sh /home/oracle/oracheck/scripts/invalid_index_check.sh >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "*** step10.rman备份检查 \n" >> $hostinfodir/$logfile echo -e "rman备份日志:" >> $hostinfodir/$logfile # cat /backup/logs/rman`date +%Y%m%d`.log |grep ORA- >> $hostinfodir/$logfile echo -e "rman备份集校验日志:" >> $hostinfodir/$logfile # cat /backup/logs/rman_validate`$date+%Y%m%d`.log |grep ORA- >> $hostinfodir/$logfile echo -e "\n" >> $hostinfodir/$logfile echo -e "==== Oracheck ended at $(date +"%y-%m-%d %H:%M:%S") ===\n" >> $hostinfodir/$logfile exit 0
process_check.sh
[oracle@orcl scripts]$ cat process_check.sh sqlplus -s $DB_CONN_STR <<EOF set linesize 300 col resource_name format a15 select resource_name,current_utilization,limit_value, trunc(current_utilization * 100 / limit_value) Result_Number from v\$resource_limit where resource_name in ('processes'); EOF exit
tbs_check.sh
[oracle@orcl scripts]$ cat tbs_check.sh sqlplus -s $DB_CONN_STR <<EOF set linesize 300 col tablespace_name format a20 SELECT DF.TABLESPACE_NAME, COUNT(*) DATAFILE_COUNT, ROUND(SUM(DF.BYTES) / 1048576 / 1024, 2) SIZE_GB, ROUND(SUM(FREE.BYTES) / 1048576 / 1024, 2) FREE_GB, ROUND(SUM(DF.BYTES) / 1048576 / 1024 - SUM(FREE.BYTES) / 1048576 / 1024, 2) USED_GB, ROUND(MAX(FREE.MAXBYTES) / 1048576 / 1024, 2) MAXFREE, 100 - ROUND(100.0 * SUM(FREE.BYTES) / SUM(DF.BYTES), 2) PCT_USED, ROUND(100.0 * SUM(FREE.BYTES) / SUM(DF.BYTES), 2) PCT_FREE FROM DBA_DATA_FILES DF, (SELECT TABLESPACE_NAME, FILE_ID, SUM(BYTES) BYTES, MAX(BYTES) MAXBYTES FROM DBA_FREE_SPACE WHERE BYTES > 1024 * 1024 GROUP BY TABLESPACE_NAME, FILE_ID) FREE WHERE DF.TABLESPACE_NAME = FREE.TABLESPACE_NAME(+) AND DF.FILE_ID = FREE.FILE_ID(+) GROUP BY DF.TABLESPACE_NAME ORDER BY 8; EOF exit
asm_check.sh
[oracle@orcl scripts]$ cat asm_check.sh sqlplus -s $DB_CONN_STR <<EOF select name,state,total_mb,free_mb,usable_file_mb from v\$asm_diskgroup; EOF exit
invalid_index_check.sh
[oracle@orcl scripts]$ cat invalid_index_check.sh sqlplus -s $DB_CONN_STR <<EOF select owner, index_name, status from dba_indexes where status not in ('VALID','N/A') order by 1, 2; select i.owner, i.index_name, p.partition_name, p.status from dba_ind_partitions p, dba_indexes i where p.index_name = i.index_name and p.index_owner=i.owner and p.status != 'USABLE' order by 1, 2, 3; select i.owner, i.index_name, s.subpartition_name, s.status from dba_ind_subpartitions s, dba_indexes i where s.index_name = i.index_name and s.index_name = i.index_name and s.status != 'USABLE' order by 1, 2, 3; EOF exit
rman_validate.sh(校验前一天的备份集)
#!/bin/bash # +-----------------------------------------------------------------------+ # | EXPORT ENVIRONMENT VARIABLE OF ORACLE USER | # +-----------------------------------------------------------------------+ source ~/.bash_profile; # +-----------------------------------------------------------------------+ # | GLOBAL VARIABLES ABOUT THE ABSOLUTE PATH OF THE SHELL COMMAND | # +-----------------------------------------------------------------------+ export AWK=`which awk` export DATE=`which date` export ECHO=`which echo` # +-----------------------------------------------------------------------+ # | GLOBAL VARIABLES ABOUT STRINGS AND BACKTICK EXECUTION RESULT OF SHELL | # +-----------------------------------------------------------------------+ export BACK_LOG=~/backup/logs export RMAN=$ORACLE_HOME/bin/rman export SQLPLUS=$ORACLE_HOME/bin/sqlplus export YESTERDAY=`$DATE +%Y-%m-%d -d yesterday` export DAY_OF_WEEK=`$DATE +%u` export BSKEY_LIST= export BSKEY_LIST_WITH_COMMA= # +-----------------------------------------------------------------------+ # | QUERY ALL OF BS_KEY VALUE OF RMAN BACKUPSET YESTERDAY INTO BSKEY_LIST | # +-----------------------------------------------------------------------+ BSKEY_LIST=` $SQLPLUS -S /nolog << EOF connect / as sysdba set echo off feedback off heading off underline off select bs_key from v\\$backup_set_details where device_type='DISK' and completion_time > to_date('$YESTERDAY','yyyy-mm-dd') order by 1; exit; EOF` # +-----------------------------------------------------------------------+ # | WITH AWK COMMAND TO PROCESS BSKEY_LIST SAVE TO BSKEY_LIST_WITH_COMMA | # +-----------------------------------------------------------------------+ BSKEY_LIST_WITH_COMMA=`$ECHO $BSKEY_LIST | $AWK -F' ' '{ for ( i=1; i<NF; i++ ) print $i","; print $NF }'` # +-----------------------------------------------------------------------+ # | VALIDATE RMAN BACKUPSET THAT IS GENERATED LAST NIGHT | # +-----------------------------------------------------------------------+ case $DAY_OF_WEEK in 6) $RMAN nocatalog log $BACK_LOG/rman_validate`$DATE +%Y%m%d`.log <<EOF connect target / run { allocate channel d1 type disk maxpiecesize 16g; allocate channel d2 type disk maxpiecesize 16g; allocate channel d3 type disk maxpiecesize 16g; allocate channel d4 type disk maxpiecesize 16g; validate backupset $BSKEY_LIST_WITH_COMMA check logical; release channel d4; release channel d3; release channel d2; release channel d1; } exit; EOF ;; 1|2|3|4|5|7) $RMAN nocatalog log $BACK_LOG/rman_validate`$DATE +%Y%m%d`.log <<EOF connect target / validate backupset $BSKEY_LIST_WITH_COMMA check logical; exit; EOF ;; esac
巡检结果:
==== Oracheck started at 17-11-18 14:15:20 === *** step1.CPU使用率检查 Linux 2.6.32-431.el6.x86_64 (orcl) 11/18/2017 _x86_64_ (1 CPU) 02:15:20 PM CPU %user %nice %system %iowait %steal %idle 02:15:21 PM all 0.00 0.00 1.01 1.01 0.00 97.98 02:15:22 PM all 0.00 0.00 1.02 0.00 0.00 98.98 02:15:23 PM all 0.00 0.00 1.03 0.00 0.00 98.97 02:15:24 PM all 1.03 0.00 1.03 0.00 0.00 97.94 02:15:25 PM all 0.00 0.00 3.03 0.00 0.00 96.97 02:15:26 PM all 0.00 0.00 2.08 0.00 0.00 97.92 02:15:27 PM all 0.00 0.00 3.09 1.03 0.00 95.88 02:15:28 PM all 1.02 0.00 3.06 0.00 0.00 95.92 02:15:29 PM all 0.00 0.00 2.08 0.00 0.00 97.92 02:15:30 PM all 1.02 0.00 3.06 0.00 0.00 95.92 Average: all 0.31 0.00 2.05 0.21 0.00 97.44 *** step2.内存使用率检查 total used free shared buffers cached Mem: 1 1 0 0 0 1 -/+ buffers/cache: 0 1 Swap: 3 0 3 *** step3.文件系统使用率检查 Filesystem Size Used Avail Use% Mounted on /dev/sda3 36G 22G 12G 65% / tmpfs 932M 266M 666M 29% /dev/shm /dev/sda1 194M 34M 151M 19% /boot *** step4.日志文件大小检查 trace目录(trc,trm,alert_sid): 1.6M /u01/app/oracle/diag/rdbms/orcl/orcl/trace alert目录(log_*.xml): 592K /u01/app/oracle/diag/rdbms/orcl/orcl/alert 监听日志(listener.log): 4.0K /u01/app/oracle/product/11.2.0/db_1/network/admin/listener.ora 审计目录(adump): 820K /u01/app/oracle/admin/orcl/adump *** step5.oracle后台日志检查 ORA-1109 signalled during: ALTER DATABASE CLOSE NORMAL... ORA-00313: open failed for members of log group 1 of thread 1 ORA-00312: online log 1 thread 1: '/u01/app/oracle/oradata/orcl/redo01.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 1 of thread 1 ORA-00312: online log 1 thread 1: '/u01/app/oracle/oradata/orcl/redo01.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 1 of thread 1 ORA-00312: online log 1 thread 1: '/u01/app/oracle/oradata/orcl/redo01.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 2 of thread 1 ORA-00312: online log 2 thread 1: '/u01/app/oracle/oradata/orcl/redo02.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 2 of thread 1 ORA-00312: online log 2 thread 1: '/u01/app/oracle/oradata/orcl/redo02.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 2 of thread 1 ORA-00312: online log 2 thread 1: '/u01/app/oracle/oradata/orcl/redo02.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 3 of thread 1 ORA-00312: online log 3 thread 1: '/u01/app/oracle/oradata/orcl/redo03.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 3 of thread 1 ORA-00312: online log 3 thread 1: '/u01/app/oracle/oradata/orcl/redo03.log' ORA-27037: unable to obtain file status ORA-00313: open failed for members of log group 3 of thread 1 ORA-00312: online log 3 thread 1: '/u01/app/oracle/oradata/orcl/redo03.log' ORA-27037: unable to obtain file status *** step6.oracle连接数检查 RESOURCE_NAME CURRENT_UTILIZATION LIMIT_VALUE RESULT_NUMBER --------------- ------------------- -------------------- ------------- processes 34 150 22 *** step7.oracle表空间使用率检查 TABLESPACE_NAME DATAFILE_COUNT SIZE_GB FREE_GB USED_GB MAXFREE PCT_USED PCT_FREE -------------------- -------------- ---------- ---------- ---------- ---------- ---------- ---------- SYSTEM 1 .74 .01 .74 .01 99.08 .92 SYSAUX 1 .57 .03 .54 .03 94.83 5.17 USERS 1 0 0 0 0 26.25 73.75 UNDOTBS1 2 .17 .14 .03 .08 17.06 82.94 TBS_DATA 1 .2 .19 .01 .19 3 97 *** step8.asm存储使用率检查 no rows selected *** step9.失效索引检查 no rows selected no rows selected no rows selected *** step10.rman备份检查 rman备份日志: rman备份集校验日志: ==== Oracheck ended at 17-11-18 14:15:30 ===