Expect Output inconsistency - tcl

I want to automate the following interaction using Tcl/expect
[root#mgmt NAS]# ssh -q -p 8022 -l user 10.1.1.1
Password:
HP Network Automation Version 9.10.02
Type "HELP connect" to see how to connect to a device.
Type "HELP" to view a list of available commands.
NA>connect 10.1.1.2
WARNING: You do not have an approved reservation for this device at this time.
Attempting to connect to device bigip1.network.company.net (10.1.1.2).
Last login: Wed Sep 26 08:11:42 2012 from 10.2.1.1
Last login: Wed Sep 26 08:11:42 2012 from 10.2.1.1
[root#bigip1:Standby] config #
[root#bigip1:Standby] config #
[root#bigip1:Standby] config #
[root#bigip1:Standby] config # uname -a
Linux bigip1.network.company.net 2.6.18-164.11.1.el5.1.0.f5app #1 SMP Thu Apr 8 18:26:58 PDT 2010 i686 i686 i386 GNU/Linux
[root#bigip1:Standby] config # exit
logout
Disconnected from device bigip1.network.company.net (10.1.1.2).
NA>quit
Logging out of the NA Proxy Interface.
<Blank Line: couldn't show it with simple formatting>
The user input is essentially:
password
connect 10.1.1.2
uname -a
exit
quit
The script I wrote out, connect.exp, is as follows:
#!/usr/local/bin/expect
# Set the input parameters
set nashost [lindex $argv 0]
set port [lindex $argv 1]
set user [lindex $argv 2]
set passw [lindex $argv 3]
set device [lindex $argv 4]
set cmd [lindex $argv 5]
set binpath /usr/bin
log_user 0
# Set timeout to 45 seconds
set timeout 45
#check if all were provided
if { $nashost == "" || $port == "" || $user == "" || $passw == "" || $device == "" || $cmd == "" } {
puts "Usage: <nashost> <port> <user> <passw> <device> <command>\n"
exit 1
}
# String Variables
set nasprompt "NA>$"
set prompt "config # $"
# Flag Variables
set running 1
set count 0
# SSH to specified NAS host
if { [catch {spawn $binpath/ssh -q -p $port -o "StrictHostKeyChecking no" -l $user $nashost} error] } {
puts "Spawn: SSH failed: $error"
exit
}
expect {
"assword: " {
send "$passw\r"
incr count
if {$count > 3} {
puts "SSH failed on authentication after 3 tries"
set running 0
} else {
exp_continue
}
}
-re "$nasprompt" {
set running 1
}
"Connection refused" {
puts "$expect_out(buffer)"
set running 0
}
"Offending key" {
puts "Host key verification failed."
set running 0
}
eof {
puts -nonewline "Connection terminated unexpectedly:\n$expect_out(buffer)"
set running 0
}
timeout {
puts "ssh: connect to NAS host $host: Connection timed out"
set running 0
}
}
if {$running == 1} {
send "connect $device\r"
expect {
-re "$nasprompt" {
if {$running > 0} {
puts "connect to Device $device failed:\n$expect_out(buffer)"
}
send "quit\r"
}
-re "$prompt" {
if {$running > 0} {
send "$cmd\r"
set running 0
exp_continue
} else {
puts "$expect_out(buffer)"
send "exit\r"
}
}
full_buffer {
puts "$expect_out(buffer)"
exp_continue
}
eof {
puts "ssh: Connection terminated unexpectedly during command execution: $host."
}
timeout {
puts "ssh: Connection timed out during command execution: $host."
}
}
}
The issue I face is that the output I get for this interaction with this script is inconsistent.
I call the script as follows: expect connect.exp 10.1.1.1 8022 user 'pwd' 10.1.1.2 'uname -a'
Output one:
[root#bigip1:Standby] config #
[root#bigip1:Standby] config # uname -a
Linux bigip1.network.company.net 2.6.18-164.11.1.el5.1.0.f5app #1 SMP Thu Apr 8 18:26:58 PDT 2010 i686 i686 i386 GNU/Linux
[root#bigip1:Standby] config #
Output two:
<blank line>
<blank line>
u[root#bigip1:Standby] config #
[root#bigip1:Standby] config #
The u at the beginning of line 3 is part of the output, not a typo.
Other variations of output two exist as well.
The output I expected is:
Linux bigip1.network.company.net 2.6.18-164.11.1.el5.1.0.f5app #1 SMP Thu Apr 8 18:26:58 PDT 2010 i686 i686 i386 GNU/Linux
[root#bigip1:Standby] config #
What am I doing incorrect in my script?

After you send the password, you don't actually wait for the NA prompt before sending the connect command. Change your first expect command to:
set running false
expect {
"assword: " {
incr count
if {$count > 3} {
puts "SSH failed on authentication after 3 tries"
} else {
send "$passw\r"
exp_continue
}
}
"Connection refused" {
puts "$expect_out(buffer)"
}
"Offending key" {
puts "Host key verification failed."
}
eof {
puts -nonewline "Connection terminated unexpectedly:\n$expect_out(buffer)"
}
timeout {
puts "ssh: connect to NAS host $host: Connection timed out"
}
-re "$nasprompt" {
set running true
}
}
if {$running} {
send "connect ...

Related

Expect: SSH into a remote host, run a command, and save its output to a variable

I'm trying to ssh into a server, run a command, and save its output to a variable, with no success.
spawn $env(SHELL)
expect "\$ "
send "ls\r"
expect "\$ "
send "ssh myserver1\r"
expect "\$ "
send "cd /tmp/remotedir1\r"
expect "\$ "
send "ls\r"
expect "\$ "
set myvar1 [exec ls]
puts "The value of \$myvar1 is: "
puts $myvar1
send "exit\r"
expect "\$ "
send "exit\r"
expect eof
When I run it, I get:
spawn /bin/bash
$ ls
localfile1 localfile2 localfile3
$ ssh myserver1
Last login: Tue Sep 10 15:45:07 2017 from 192.168.0.100
myserver1$ cd /tmp/remotedir1
myserver1$ ls
remotefile1
myserver1$ The value of $myvar1 is:
localfile1
localfile2
localfile3
exit
logout
Connection to myserver1 closed.
bash-3.2$ exit
exit
Apparently, instead of setting $myvar1 to "remotefile1", it sets to those 3 files in the $cwd on the local host.
Thank you for your help in advance!
Using exec will execute the command locally.
Upon sending the ls command, you have to make use of the expect_out array to get the response.
set prompt "(.*)(#|%|>|\\\$) $"
send "ls\r"
expect -re $prompt
puts $expect_out(1,string)

MySQL replication monitor - Seconds_Behind_Master

I'm using Nagios and the check_mysql_health plugin to monitor my MySQL databases. I need to keep an eye on my Seconds_Behind_Master values in my replicated databases, but I am unable to use SHOW SLAVE STATUS in a subquery to get at that value specifically. Does anyone know another way to get at the value of the Seconds_Behind_Master values of my slave databases as a single value? For the check_mysql_health plugin to work I need to return just a single numeric value that will be monitored.
#!/bin/bash
#########################################################################
# Script: check_mysql_slavestatus.sh #
# Author: Claudio Kuenzler www.claudiokuenzler.com #
# Purpose: Monitor MySQL Replication status with Nagios #
# Description: Connects to given MySQL hosts and checks for running #
# SLAVE state and delivers additional info #
# Original: This script is a modified version of #
# check mysql slave sql running written by dhirajt #
# Thanks to: Victor Balada Diaz for his ideas added on 20080930 #
# Soren Klintrup for stuff added on 20081015 #
# Marc Feret for Slave_IO_Running check 20111227 #
# Peter Lecki for his mods added on 20120803 #
# Serge Victor for his mods added on 20131223 #
# Omri Bahumi for his fix added on 20131230 #
# History: #
# 2008041700 Original Script modified #
# 2008041701 Added additional info if status OK #
# 2008041702 Added usage of script with params -H -u -p #
# 2008041703 Added bindir variable for multiple platforms #
# 2008041704 Added help because mankind needs help #
# 2008093000 Using /bin/sh instead of /bin/bash #
# 2008093001 Added port for MySQL server #
# 2008093002 Added mysqldir if mysql binary is elsewhere #
# 2008101501 Changed bindir/mysqldir to use PATH #
# 2008101501 Use $() instead of `` to avoid forks #
# 2008101501 Use ${} for variables to prevent problems #
# 2008101501 Check if required commands exist #
# 2008101501 Check if mysql connection works #
# 2008101501 Exit with unknown status at script end #
# 2008101501 Also display help if no option is given #
# 2008101501 Add warning/critical check to delay #
# 2011062200 Add perfdata #
# 2011122700 Checking Slave_IO_Running #
# 2012080300 Changed to use only one mysql query #
# 2012080301 Added warn and crit delay as optional args #
# 2012080302 Added standard -h option for syntax help #
# 2012080303 Added check for mandatory options passed in #
# 2012080304 Added error output from mysql #
# 2012080305 Changed from 'cut' to 'awk' (eliminate ws) #
# 2012111600 Do not show password in error output #
# 2013042800 Changed PATH to use existing PATH, too #
# 2013050800 Bugfix in PATH export #
# 2013092700 Bugfix in PATH export #
# 2013092701 Bugfix in getopts #
# 2013101600 Rewrite of threshold logic and handling #
# 2013101601 Optical clean up #
# 2013101602 Rewrite help output #
# 2013101700 Handle Slave IO in 'Connecting' state #
# 2013101701 Minor changes in output, handling UNKWNON situations now #
# 2013101702 Exit CRITICAL when Slave IO in Connecting state #
# 2013123000 Slave_SQL_Running also matched Slave_SQL_Running_State #
#########################################################################
# Usage: ./check_mysql_slavestatus.sh -H dbhost -P port -u dbuser -p dbpass -s connection -w integer -c integer
#########################################################################
help="\ncheck_mysql_slavestatus.sh (c) 2008-2014 GNU GPLv2 licence
Usage: check_mysql_slavestatus.sh -H host -P port -u username -p password [-s connection] [-w integer] [-c integer]\n
Options:\n-H Hostname or IP of slave server\n-P Port of slave server\n-u Username of DB-user\n-p Password of DB-user\n-s Connection name (optional, with multi-source replication)\n-w Delay in seconds for Warning status (optional)\n-c Delay in seconds for Critical status (optional)\n
Attention: The DB-user you type in must have CLIENT REPLICATION rights on the DB-server. Example:\n\tGRANT REPLICATION CLIENT on *.* TO 'nagios'#'%' IDENTIFIED BY 'secret';"
STATE_OK=0 # define the exit code if status is OK
STATE_WARNING=1 # define the exit code if status is Warning (not really used)
STATE_CRITICAL=2 # define the exit code if status is Critical
STATE_UNKNOWN=3 # define the exit code if status is Unknown
export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path
crit="No" # what is the answer of MySQL Slave_SQL_Running for a Critical status?
ok="Yes" # what is the answer of MySQL Slave_SQL_Running for an OK status?
for cmd in mysql awk grep [
do
if ! `which ${cmd} &>/dev/null`
then
echo "UNKNOWN: This script requires the command '${cmd}' but it does not exist; please check if command exists and PATH is correct"
exit ${STATE_UNKNOWN}
fi
done
# Check for people who need help - aren't we all nice ;-)
#########################################################################
if [ "${1}" = "--help" -o "${#}" = "0" ];
then
echo -e "${help}";
exit 1;
fi
# Important given variables for the DB-Connect
#########################################################################
while getopts "H:P:u:p:s:w:c:h" Input;
do
case ${Input} in
H) host=${OPTARG};;
P) port=${OPTARG};;
u) user=${OPTARG};;
p) password=${OPTARG};;
s) connection=\"${OPTARG}\";;
w) warn_delay=${OPTARG};;
c) crit_delay=${OPTARG};;
h) echo -e "${help}"; exit 1;;
\?) echo "Wrong option given. Please use options -H for host, -P for port, -u for user and -p for password"
exit 1
;;
esac
done
# Connect to the DB server and check for informations
#########################################################################
# Check whether all required arguments were passed in
if [ -z "${host}" -o -z "${port}" -o -z "${user}" -o -z "${password}" ];then
echo -e "${help}"
exit ${STATE_UNKNOWN}
fi
# Connect to the DB server and store output in vars
ConnectionResult=`mysql -h ${host} -P ${port} -u ${user} --password=${password} -e "show slave ${connection} status\G" 2>&1`
if [ -z "`echo "${ConnectionResult}" |grep Slave_IO_State`" ]; then
echo -e "CRITICAL: Unable to connect to server ${host}:${port} with username '${user}' and given password"
exit ${STATE_CRITICAL}
fi
check=`echo "${ConnectionResult}" |grep Slave_SQL_Running: | awk '{print $2}'`
checkio=`echo "${ConnectionResult}" |grep Slave_IO_Running: | awk '{print $2}'`
masterinfo=`echo "${ConnectionResult}" |grep Master_Host: | awk '{print $2}'`
delayinfo=`echo "${ConnectionResult}" |grep Seconds_Behind_Master: | awk '{print $2}'`
# Output of different exit states
#########################################################################
if [ ${check} = "NULL" ]; then
echo "CRITICAL: Slave_SQL_Running is answering NULL"; exit ${STATE_CRITICAL};
fi
if [ ${check} = ${crit} ]; then
echo "CRITICAL: ${host}:${port} Slave_SQL_Running: ${check}"; exit ${STATE_CRITICAL};
fi
if [ ${checkio} = ${crit} ]; then
# Checking local node replication role
# LOCAL_NODE=`hostname`
ROLE=`mysql -h $host -u slave_user -p'ZAQ!2wsx' --execute="SHOW master STATUS\G;" | grep Binlog_Do_DB | cut -d ' ' -f 6`
if [[ -n "$ROLE" ]];
then
echo "OK: This node is Master"; exit ${STATE_OK};
else
echo "CRITICAL: ${host} Slave_IO_Running: ${checkio}"; exit ${STATE_CRITICAL};
fi
fi
if [ ${checkio} = "Connecting" ]; then
echo "CRITICAL: ${host} Slave_IO_Running: ${checkio}"; exit ${STATE_CRITICAL};
fi
if [ ${check} = ${ok} ] && [ ${checkio} = ${ok} ]; then
# Delay thresholds are set
if [[ -n ${warn_delay} ]] && [[ -n ${crit_delay} ]]; then
if ! [[ ${warn_delay} -gt 0 ]]; then echo "Warning threshold must be a valid integer greater than 0"; exit $STATE_UNKNOWN; fi
if ! [[ ${crit_delay} -gt 0 ]]; then echo "Warning threshold must be a valid integer greater than 0"; exit $STATE_UNKNOWN; fi
if [[ -z ${warn_delay} ]] || [[ -z ${crit_delay} ]]; then echo "Both warning and critical thresholds must be set"; exit $STATE_UNKNOWN; fi
if [[ ${warn_delay} -gt ${crit_delay} ]]; then echo "Warning threshold cannot be greater than critical"; exit $STATE_UNKNOWN; fi
if [[ ${delayinfo} -ge ${crit_delay} ]]
then echo "CRITICAL: Slave is ${delayinfo} seconds behind Master | delay=${delayinfo}s"; exit ${STATE_CRITICAL}
elif [[ ${delayinfo} -ge ${warn_delay} ]]
then echo "WARNING: Slave is ${delayinfo} seconds behind Master | delay=${delayinfo}s"; exit ${STATE_WARNING}
else echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK};
fi
else
# Without delay thresholds
echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"
exit ${STATE_OK};
fi
fi
echo "UNKNOWN: should never reach this part (Slave_SQL_Running is ${check}, Slave_IO_Running is ${checkio})"
exit ${STATE_UNKNOWN}

Tcl Expect Keep SSH Spawn open

I want to open a spawn SSH connection and then query a MySQL Server for new users (in a loop), and if a new user is found a command should be sent to this SSH Connection via Expect.
I don't know if this is possible, up until now i allways kill ssh.exe when i try the "send" command after the MySQL Query.
I want the SSH to be open because it takes 10 seconds to login with Expect (Host ist slow) and i don't want that pause everytime a create a user.
How can i do this?
What i am doing:
...
set db [::mysql::connect -host 127.0.0.1 -user root -password **** -db test]
spawn ssh admin#192.168.1.2
expect {
timeout { send_user "\nFalscher SSH User admin!\n"; exit 1 }
"User:"
}
send "admin\r"
expect {
timeout { send_user "\nFalscher SSH User admin!\n"; exit 1 }
"Password:"
}
send "******\r"
set x = 1
while {$x>0} {
set query [::mysql::query $db {SELECT username, passwort FROM users WHERE erstellt='0'}]
while {[set row [::mysql::fetch $query]]!=""} {
set username [lindex $row 0]
set passwort [lindex $row 1]
send "create user...;\r"
}
::mysql::endquery $query
after 2000
}
I solved this by saving the expect output into a file.
After starting in the background, ssh wanted to verify my ssl fingerprint.
After accepting that, it worked fine.

pacemaker can't start my zabbix service when I stop zabbix service

I want use corosync+pacemaker+zabbix to achieve high availability. Follow is my config
crm(live)configure# show
node zabbix1 \
attributes standby="off" timeout="60"
node zabbix2 \
attributes standby="off"
primitive httpd lsb:httpd \
op monitor interval="10s"
primitive vip ocf:heartbeat:IPaddr \
params ip="192.168.56.110" nic="eth0" cidr_netmask="24" \
op monitor interval="10s"
primitive zabbix-ha lsb:zabbix_server \
op monitor interval="30s" timeout="20s" \
op start interval="0s" timeout="40s" \
op stop interval="0s" timeout="60s"
group webservice vip httpd zabbix-ha
property $id="cib-bootstrap-options" \
dc-version="1.1.8-7.el6-394e906" \
cluster-infrastructure="classic openais (with plugin)" \
expected-quorum-votes="2" \
stonith-enabled="false" \
last-lrm-refresh="1377489711" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
and my crm_mon status is:
Last updated: Mon Aug 26 18:52:48 2013
Last change: Mon Aug 26 18:52:33 2013 via cibadmin on zabbix1
Stack: classic openais (with plugin)
Current DC: zabbix1 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
3 Resources configured.
Node zabbix1: online
httpd (lsb:httpd): Started
vip (ocf::heartbeat:IPaddr): Started
zabbix-ha (lsb:zabbix_server): Started
Node zabbix2: online
now i stop zabbix-ha service on the zabbix1, I wait for 300s, pacemaker can't start my zabbix-ha service:
[root#zabbix1 tmp]# ps -ef|grep zabbix
root 13287 31252 0 18:59 pts/2 00:00:00 grep zabbix
and my zabbix-ha script is
i can use crm resource stop/start zabbix-ha to stop/start my zabbix-ha.
I'm not use zabbix default script(address is zabbix-2.0.6/misc/init.d/fedora/core/zabbix_serve),I create lsb script by myself.Follow is my script for zabbix_server(i put it in the /etc/init.d)
#!/bin/bash
#
# zabbix: Control the zabbix Daemon
#
# author: Denglei
#
# blog: http://dl528888.blog.51cto.com/
# description: This is a init.d script for zabbix. Tested on CentOS6. \
# Change DAEMON and PIDFILE if neccessary.
#
#Location of zabbix binary. Change path as neccessary
DAEMON=/usr/local/zabbix/sbin/zabbix_server
NAME=`basename $DAEMON`
#Pid file of zabbix, should be matched with pid directive in nginx config file.
PIDFILE=/tmp/$NAME.pid
#this file location
SCRIPTNAME=/etc/init.d/$NAME
#only run if binary can be found
test -x $DAEMON || exit 0
RETVAL=0
start() {
echo $"Starting $NAME"
$DAEMON
RETVAL=0
}
stop() {
echo $"Graceful stopping $NAME"
[ -s "$PIDFILE" ] && kill -QUIT `cat $PIDFILE`
RETVAL=0
}
forcestop() {
echo $"Quick stopping $NAME"
[ -s "$PIDFILE" ] && kill -TERM `cat $PIDFILE`
RETVAL=$?
}
reload() {
echo $"Graceful reloading $NAME configuration"
[ -s "$PIDFILE" ] && kill -HUP `cat $PIDFILE`
RETVAL=$?
}
status() {
if [ -s $PIDFILE ]; then
echo $"$NAME is running."
RETVAL=0
else
echo $"$NAME stopped."
RETVAL=3
fi
}
# See how we were called.
case "$1" in
start)
start
;;
stop)
stop
;;
force-stop)
forcestop
;;
restart)
stop
start
;;
reload)
reload
;;
status)
status
;;
*)
echo $"Usage: $0 {start|stop|force-stop|restart|reload|status}"
exit 1
esac
exit $RETVAL
</pre>

Better script to restart mysql on Ubuntu 8.04

When I say sudo /etc/init.d/mysql restart on Ubuntu 8.04.2 sometimes there remains a mysql_safe process eating 99% of cpu. Making the machine practically unusable.
Is there a better way to restart mysql? I thought about writing a script:
sudo /etc/init.d/mysql stop
sleep 10
sudo killall mysql_safe
sudo /etc/init.d/mysql start
But this would be a evil workaround. (And the script is just a quick shot)
I googled and found that mysql_safe is a wrapper script which starts mysqld, and makes sure it gets restarted if it should die. So there should be a better way to restart the thing.
I googled that this is a common problem in this ubuntu version. Is Debian / Ubuntu doing it wrong at this point? The /etc/init.d script looks quite sophisticated, and it deals with mysql_safe also, but my skills are not good enough to understand it fully. But this would be the best place to improve. This is a paste of the version on my machine (which is untouched):
#!/bin/bash
#
### BEGIN INIT INFO
# Provides: mysql
# Required-Start: $remote_fs $syslog mysql-ndb
# Required-Stop: $remote_fs $syslog mysql-ndb
# Should-Start: $network $named $time
# Should-Stop: $network $named $time
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Start and stop the mysql database server daemon
# Description: Controls the main MySQL database server daemon "mysqld"
# and its wrapper script "mysqld_safe".
### END INIT INFO
#
set -e
set -u
${DEBIAN_SCRIPT_DEBUG:+ set -v -x}
test -x /usr/sbin/mysqld || exit 0
. /lib/lsb/init-functions
SELF=$(cd $(dirname $0); pwd -P)/$(basename $0)
CONF=/etc/mysql/my.cnf
MYADMIN="/usr/bin/mysqladmin --defaults-file=/etc/mysql/debian.cnf"
# priority can be overriden and "-s" adds output to stderr
ERR_LOGGER="logger -p daemon.err -t /etc/init.d/mysql -i"
# Safeguard (relative paths, core dumps..)
cd /
umask 077
# mysqladmin likes to read /root/.my.cnf. This is usually not what I want
# as many admins e.g. only store a password without a username there and
# so break my scripts.
export HOME=/etc/mysql/
## Fetch a particular option from mysql's invocation.
#
# Usage: void mysqld_get_param option
mysqld_get_param() {
/usr/sbin/mysqld --print-defaults \
| tr " " "\n" \
| grep -- "--$1" \
| tail -n 1 \
| cut -d= -f2
}
## Do some sanity checks before even trying to start mysqld.
sanity_checks() {
# check for config file
if [ ! -r /etc/mysql/my.cnf ]; then
log_warning_msg "$0: WARNING: /etc/mysql/my.cnf cannot be read. See README.Debian.gz"
echo "WARNING: /etc/mysql/my.cnf cannot be read. See README.Debian.gz" | $ERR_LOGGER
fi
# check for diskspace shortage
datadir=`mysqld_get_param datadir`
if LC_ALL=C BLOCKSIZE= df --portability $datadir/. | tail -n 1 | awk '{ exit ($4>4096) }'; then
log_failure_msg "$0: ERROR: The partition with $datadir is too full!"
echo "ERROR: The partition with $datadir is too full!" | $ERR_LOGGER
exit 1
fi
}
## Checks if there is a server running and if so if it is accessible.
#
# check_alive insists on a pingable server
# check_dead also fails if there is a lost mysqld in the process list
#
# Usage: boolean mysqld_status [check_alive|check_dead] [warn|nowarn]
mysqld_status () {
ping_output=`$MYADMIN ping 2>&1`; ping_alive=$(( ! $? ))
ps_alive=0
pidfile=`mysqld_get_param pid-file`
if [ -f "$pidfile" ] && ps `cat $pidfile` >/dev/null 2>&1; then ps_alive=1; fi
if [ "$1" = "check_alive" -a $ping_alive = 1 ] ||
[ "$1" = "check_dead" -a $ping_alive = 0 -a $ps_alive = 0 ]; then
return 0 # EXIT_SUCCESS
else
if [ "$2" = "warn" ]; then
echo -e "$ps_alive processes alive and '$MYADMIN ping' resulted in\n$ping_output\n" | $ERR_LOGGER -p daemon.debug
fi
return 1 # EXIT_FAILURE
fi
}
#
# main()
#
case "${1:-''}" in
'start')
sanity_checks;
# Start daemon
log_daemon_msg "Starting MySQL database server" "mysqld"
if mysqld_status check_alive nowarn; then
log_progress_msg "already running"
log_end_msg 0
else
/usr/bin/mysqld_safe > /dev/null 2>&1 &
# 6s was reported in #352070 to be too few when using ndbcluster
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
sleep 1
if mysqld_status check_alive nowarn ; then break; fi
log_progress_msg "."
done
if mysqld_status check_alive warn; then
log_end_msg 0
# Now start mysqlcheck or whatever the admin wants.
output=$(/etc/mysql/debian-start)
[ -n "$output" ] && log_action_msg "$output"
else
log_end_msg 1
log_failure_msg "Please take a look at the syslog"
fi
fi
# Some warnings
if $MYADMIN variables | egrep -q have_bdb.*YES; then
echo "BerkeleyDB is obsolete, see /usr/share/doc/mysql-server-5.0/README.Debian.gz" | $ERR_LOGGER -p daemon.info
fi
if [ -f /etc/mysql/debian-log-rotate.conf ]; then
echo "/etc/mysql/debian-log-rotate.conf is obsolete, see /usr/share/doc/mysql-server-5.0/NEWS.Debian.gz" | $ERR_L
fi
;;
'stop')
# * As a passwordless mysqladmin (e.g. via ~/.my.cnf) must be possible
# at least for cron, we can rely on it here, too. (although we have
# to specify it explicit as e.g. sudo environments points to the normal
# users home and not /root)
log_daemon_msg "Stopping MySQL database server" "mysqld"
if ! mysqld_status check_dead nowarn; then
set +e
shutdown_out=`$MYADMIN shutdown 2>&1`; r=$?
set -e
if [ "$r" -ne 0 ]; then
log_end_msg 1
[ "$VERBOSE" != "no" ] && log_failure_msg "Error: $shutdown_out"
log_daemon_msg "Killing MySQL database server by signal" "mysqld"
killall -15 mysqld
server_down=
for i in 1 2 3 4 5 6 7 8 9 10; do
sleep 1
if mysqld_status check_dead nowarn; then server_down=1; break; fi
done
if test -z "$server_down"; then killall -9 mysqld; fi
fi
fi
if ! mysqld_status check_dead warn; then
log_end_msg 1
log_failure_msg "Please stop MySQL manually and read /usr/share/doc/mysql-server-5.0/README.Debian.gz!"
exit -1
else
log_end_msg 0
fi
;;
'restart')
set +e; $SELF stop; set -e
$SELF start
;;
'reload'|'force-reload')
log_daemon_msg "Reloading MySQL database server" "mysqld"
$MYADMIN reload
log_end_msg 0
;;
'status')
if mysqld_status check_alive nowarn; then
log_action_msg "$($MYADMIN version)"
else
log_action_msg "MySQL is stopped."
exit 3
fi
;;
*)
echo "Usage: $SELF start|stop|restart|reload|force-reload|status"
exit 1
;;
esac
I found many hints, but I would like this resolved to a certain degree of reliability for production servers.
Edit: It seems to be exactly this unsolved bug.
Maybe it is this bug from the MySQL site.
This seems related or identical.
Some people talk of a race condition with 2 instances of mysql_safe. Others suggest commentiong out the sanity check in the startup script.
I would try to figure out what is causing the CPU issue, rather than investigate how to re-write the startup script. The startup script is fairly standard and should work well in a production environment.