Ansible nagios-server role

From Notes_Wiki

Home > CentOS > CentOS 6.x > System administration tools > ansible > Ansible roles > Ansible nagios-server role

A nagios-server role for ansible for configuring nagios server which can monitor both public services and internal details using nrpe can be created using following steps:

Create roles/nagios-server folder

mkdir -p roles/nagios-server

Create roles/nagios-server/{files,handlers,tasks,templates} folders

mkdir -p roles/nagios-server/{files,handlers,tasks,templates}

Change working directory to roles/nagios-server folder

cd roles/nagios-server

Create files/commands.cfg file with following contents:

# 'notify-host-by-email' command definition
define command{
	command_name	notify-host-by-email
	command_line	/usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
	}

# 'notify-service-by-email' command definition
define command{
	command_name	notify-service-by-email
	command_line	/usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}

# notify-service for nrpe' command definition
define command{
        command_name check_nrpe
        command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}


# This command checks to see if a host is "alive" by pinging it
# The check must result in a 100% packet loss or 5 second (5000ms) round trip 
# average time to produce a critical error.
# Note: Five ICMP echo packets are sent (determined by the '-p 5' argument)
# 'check-host-alive' command definition
define command{
        command_name    check-host-alive
        command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
        }

# 'check_local_disk' command definition
define command{
        command_name    check_local_disk
        command_line    $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
        }


# 'check_local_load' command definition
define command{
        command_name    check_local_load
        command_line    $USER1$/check_load -w $ARG1$ -c $ARG2$
        }


# 'check_local_procs' command definition
define command{
        command_name    check_local_procs
        command_line    $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
        }


# 'check_local_users' command definition
define command{
        command_name    check_local_users
        command_line    $USER1$/check_users -w $ARG1$ -c $ARG2$
        }


# 'check_local_swap' command definition
define command{
	command_name	check_local_swap
	command_line	$USER1$/check_swap -w $ARG1$ -c $ARG2$
	}


# 'check_local_mrtgtraf' command definition
define command{
	command_name	check_local_mrtgtraf
	command_line	$USER1$/check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$
	}


################################################################################
# NOTE:  The following 'check_...' commands are used to monitor services on
#        both local and remote hosts.
################################################################################

# 'check_ftp' command definition
define command{
        command_name    check_ftp
        command_line    $USER1$/check_ftp -H $HOSTADDRESS$ $ARG1$
        }


# 'check_hpjd' command definition
define command{
        command_name    check_hpjd
        command_line    $USER1$/check_hpjd -H $HOSTADDRESS$ $ARG1$
        }


# 'check_snmp' command definition
define command{
        command_name    check_snmp
        command_line    $USER1$/check_snmp -H $HOSTADDRESS$ $ARG1$
        }


# 'check_http' command definition
define command{
        command_name    check_http
        command_line    $USER1$/check_http -I $HOSTADDRESS$ $ARG1$
        }


# 'check_ssh' command definition
define command{
	command_name	check_ssh
	command_line	$USER1$/check_ssh $ARG1$ $HOSTADDRESS$
	}


# 'check_dhcp' command definition
define command{
	command_name	check_dhcp
	command_line	$USER1$/check_dhcp $ARG1$
	}


# 'check_ping' command definition
define command{
        command_name    check_ping
        command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
        }


# 'check_pop' command definition
define command{
        command_name    check_pop
        command_line    $USER1$/check_pop -H $HOSTADDRESS$ $ARG1$
        }


# 'check_imap' command definition
define command{
        command_name    check_imap
        command_line    $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$
        }


# 'check_smtp' command definition
define command{
        command_name    check_smtp
        command_line    $USER1$/check_smtp -H $HOSTADDRESS$ $ARG1$
        }


# 'check_tcp' command definition
define command{
	command_name	check_tcp
	command_line	$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
	}


# 'check_udp' command definition
define command{
	command_name	check_udp
	command_line	$USER1$/check_udp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
	}


# 'check_nt' command definition
define command{
	command_name	check_nt
	command_line	$USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$
	}


# 'process-host-perfdata' command definition
define command{
	command_name	process-host-perfdata
	command_line	/usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /var/log/nagios/host-perfdata.out
	}


# 'process-service-perfdata' command definition
define command{
	command_name	process-service-perfdata
	command_line	/usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out
	}

Create files/index.html file with following contents:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
	<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
	<meta http-equiv="Refresh" content="0; URL=nagios" />
</head>
<body>
</body>
</html>

Create empty files/localhost.cfg file with no content

Create handlers/main.yaml with following contents

---
  - name: restart nagios
    service: name=nagios state=restarted

Create tasks/main.yaml with following contents:

---
- name: Install epel-release
  yum: name={{item}} state=present
  with_items:
    - epel-release

- name: Install the necessary packages
  yum: name={{item}} state=present
  with_items:
    - nagios
    - nagios-devel
    - nagios-lcgdm
    - nagios-plugins-all
    - nagios-plugins-fts
    - nagios-plugins-lcgdm
    - pnp4nagios
    - nagios-plugins-nrpe
    - nrpe

- name: Configure nagios to send alerts over email
  template: src=contacts.j2  dest='/etc/nagios/objects/contacts.cfg'

- name: Create server directory for client information
  file: path='/etc/nagios/servers' state=directory mode=0755 owner=root group=nagios

- name: To maintain the clients through nagios
  lineinfile: dest='/etc/nagios/nagios.cfg' insertafter='cfg_dir=/etc/nagios/conf.d' line='cfg_dir=/etc/nagios/servers'

- name: Add service information to client machine
  template: src=client_info.j2  dest='/etc/nagios/servers/all.cfg'
  notify:
    - restart nagios

- name: Replace existing localhost.cfg file with emtpy file
  copy: src=localhost.cfg dest="/etc/nagios/objects/localhost.cfg" owner=root group=nagios mode=644

- name: Configure various custom command in commands.cfg
  copy: src=commands.cfg dest=/etc/nagios/objects/commands.cfg  mode=664

- name: Set proper permissions on client files
  file: path={{item}} mode=0755 owner=root group=nagios
  with_items:
    - "/etc/nagios/objects/contacts.cfg"
    - "/etc/nagios/servers/all.cfg"
    - "/etc/nagios/objects/commands.cfg"

- name: Verify the setup of nagios
  shell: nagios -v /etc/nagios/nagios.cfg

- name: Start the nagios service
  service: name={{item}} state=started  enabled=yes
  with_items:
    - nagios
    - nrpe
    - httpd

- name: Setup automatic redirect to /nagios
  copy: src=index.html dest=/var/www/html/index.html owner=root group=root mode=444


Create templates/client_info.j2 with following contents:

###################################################remote service check using nagios##################################################
{% for client in nagios_client_list %}
#Define a host for local machine
define host{
        use                     linux-server            ; Name of host template to use
                                                        ; This host definition will inherit all variables that are defined
                                                        ; in (or inherited by) the linux-server host template definition.
        host_name               {{client.hostname}}
        address                 {{client.ip}}
}

{% for service in client.service %}

{% if service == "ping" %}
#Define a service to "ping" the local machine
define service{
        use                             local-service         ; Name of service template to use
        host_name			{{client.hostname}}
        service_description             {{service}}
        check_command                   check_ping!100.0,20%!500.0,60%
}
{% endif %}

{% if service == "ssh" %}
#Define a service to check SSH on the local machine.
define service{
	use                             local-service         ; Name of service template to use
	host_name                       {{client.hostname}}
	service_description             {{service}}
	check_command                   check_ssh
	notifications_enabled           1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% if service == "http" %}
# Define a service to check HTTP on the local machine.
define service{
	use                             local-service         ; Name of service template to use
	host_name                       {{client.hostname}}
        service_description             {{service}}
        check_command                   check_http
        notifications_enabled           1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}


{% if service == "https" %}
# Define a service to check HTTPS on the local machine.
define service{
	use 				generic-service;
	host_name			{{client.hostname}}
	service_description		{{service}}
	check_command 			check_tcp!443!-S
	notifications_enabled 		1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% if service == "ftp" %}
# Define a service to check FTP on the local machine.
define service{
        use          		 	 generic-service         ; Inherit default values from a template
        host_name              		 {{client.hostname}}
        service_description     	 {{service}}
        check_command   		 check_ftp
	notifications_enabled 	       	 1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}


{% if service == "smtp" %}
# Define a service to check SMTP on the local machine.
define service{
        use             		generic-service         ; Inherit default values from a template
        host_name               	{{client.hostname}}
        service_description     	{{service}}
        check_command   		check_smtp
	notifications_enabled 	        1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}


{% if service == "imap" %}
# Define a service to check IMAP on the local machine.
define service{
        use             		generic-service         ; Inherit default values from a template
        host_name               	{{client.hostname}}
        service_description     	{{service}}
        check_command   		check_imap
	notifications_enabled 	        1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% if service == "imaps" %}
# Define a service to check IMAPS on the local machine.
define service{
	use 				generic-service;
	host_name 			{{client.hostname}}
	service_description 		{{service}}
	check_command 			check_tcp!993!-S
	notifications_enabled 		1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% if service == "pop3" %}
# Define a service to check POP3 on the local machine.
define service{
        use             		generic-service         ; Inherit default values from a template
        host_name               	{{client.hostname}}
        service_description     	{{service}}
        check_command   		check_pop
	notifications_enabled 	        1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}


{% if service == "pop3s" %}
# Define a service to check POP3S on the local machine.
define service{
	use 				generic-service;
	host_name 			{{client.hostname}}
	service_description 		{{service}}
	check_command 			check_tcp!995!-S
	notifications_enabled 		1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

###############################################internal service check using nagios######################################################

{% if service == "users" %}
# Define a service to check Current USER on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
        use                 		generic-service
        host_name           		{{client.hostname}}
        service_description 		{{service}}
        check_command       		check_nrpe!check_users
        notifications_enabled 		1
	contact_groups  		{{client.contactgroup}}
 }
{% endif %}

{% if service == "load" %}
# Define a service to check CPU Load on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{ 
        use                 		generic-service
        host_name           		{{client.hostname}}
        service_description 		{{service}}
        check_command       		check_nrpe!check_load
        notifications_enabled 		1	
	contact_groups  		{{client.contactgroup}}
 }
{% endif %}

{% if service == "processes" %}
# Define a service to check PROCESSES on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
        use                 		generic-service
        host_name           		{{client.hostname}}
        service_description 		{{service}}
        check_command       		check_nrpe!check_total_procs!
        notifications_enabled 		1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% if service == "disk" %}
# Define a service to check DISK on the local machine.
define service{
        use         		        generic-service
        host_name           		{{client.hostname}}
        service_description 		{{service}}
        check_command       		check_nrpe!check_disk
	notifications_enabled 		1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% if service == "swap" %}
# Define a service to check SWAP on the local machine.
define service{
	use 				generic-service;
	host_name 			{{client.hostname}}
	service_description 		{{service}}
	check_command 			check_nrpe!check_swap
	notifications_enabled 		1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% if service == "zombie" %}
# Define a service to check ZOMBIE on the local machine.
define service{
	use 				generic-service;
	host_name 			{{client.hostname}}
	service_description 		{{service}}
	check_command 			check_nrpe!check_zombie_procs
	notifications_enabled     	1
	contact_groups  		{{client.contactgroup}}
}
{% endif %}

{% endfor %} 

{% endfor %} 

Create templates/contacts.j2 with following contents:

{% for contact1 in nagios_contacts %}
define contact{
        contact_name                    {{contact1.name}}             ; Short name of user
        use                             generic-contact         ; Inherit default values from generic-contact template (defined above)
        alias                           {{contact1.alias}}            ; Full name of user
        email 				{{contact1.email}}
}
{% endfor %}



{% for contactgroup1 in nagios_contactgroups %}
define contactgroup{
        contactgroup_name       {{contactgroup1.name}}
        alias                   {{contactgroup1.alias}}
        members                 {% for cg1member1 in contactgroup1.members %}   {{cg1member1}}	{% if not loop.last %} , {% endif %} {% endfor %}

}
{% endfor %}

Finally following variables need to be defined either in the host file implementing nagios-server role, or in common-vars or in vars/main.yaml of nagios-server role itself:

nagios_contacts:
 - { name: nagiosadmin, alias: "Nagios administrator", email: logs@example.com }
 - { name: saurabh, alias: "Saurabh", email: saurabh@example.com }

nagios_contactgroups:
    #Do not remove or rename this group.  Change alias or members as necessary.
 - { name: admins, alias: "Logs admin list",  members: [ nagiosadmin ] }
 - { name: engineers, alias: "Saurabh and Nagios admin",  members: [ nagiosadmin, saurabh ] }

nagios_client_list:
 - { hostname: server1.sbarjatiya.com , ip: "10.4.20.201", contactgroup: admins, service: [ "ping", "ssh", "users", "disk", "load", "processes", "zombie", "swap" ] }
 - { hostname: server2.sbarjatiya.com , ip: "10.4.20.171", contactgroup: saurabh, service: [ "ping", "ssh", "users", "disk", "load", "processes", "zombie", "swap" ] }

Other service options are: imap, imaps, smtp, smtps, http, https


Home > CentOS > CentOS 6.x > System administration tools > ansible > Ansible roles > Ansible nagios-server role