Ansible nagios-server role
<yambe:breadcrumb>Ansible_roles|Ansible roles</yambe:breadcrumb>
ansible nagios-server role
A nagios-server role for ansible for configuring nagios server which can monitor both public services and internal details using nrpe can be created using following steps:
Create roles/nagios-server folder
mkdir -p roles/nagios-server
Create roles/nagios-server/{files,handlers,tasks,templates} folders
mkdir -p roles/nagios-server/{files,handlers,tasks,templates}
Change working directory to roles/nagios-server folder
cd roles/nagios-server
Create files/commands.cfg file with following contents:
# 'notify-host-by-email' command definition
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
# 'notify-service-by-email' command definition
define command{
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}
# notify-service for nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
# This command checks to see if a host is "alive" by pinging it
# The check must result in a 100% packet loss or 5 second (5000ms) round trip
# average time to produce a critical error.
# Note: Five ICMP echo packets are sent (determined by the '-p 5' argument)
# 'check-host-alive' command definition
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
}
# 'check_local_disk' command definition
define command{
command_name check_local_disk
command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
}
# 'check_local_load' command definition
define command{
command_name check_local_load
command_line $USER1$/check_load -w $ARG1$ -c $ARG2$
}
# 'check_local_procs' command definition
define command{
command_name check_local_procs
command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
}
# 'check_local_users' command definition
define command{
command_name check_local_users
command_line $USER1$/check_users -w $ARG1$ -c $ARG2$
}
# 'check_local_swap' command definition
define command{
command_name check_local_swap
command_line $USER1$/check_swap -w $ARG1$ -c $ARG2$
}
# 'check_local_mrtgtraf' command definition
define command{
command_name check_local_mrtgtraf
command_line $USER1$/check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$
}
################################################################################
# NOTE: The following 'check_...' commands are used to monitor services on
# both local and remote hosts.
################################################################################
# 'check_ftp' command definition
define command{
command_name check_ftp
command_line $USER1$/check_ftp -H $HOSTADDRESS$ $ARG1$
}
# 'check_hpjd' command definition
define command{
command_name check_hpjd
command_line $USER1$/check_hpjd -H $HOSTADDRESS$ $ARG1$
}
# 'check_snmp' command definition
define command{
command_name check_snmp
command_line $USER1$/check_snmp -H $HOSTADDRESS$ $ARG1$
}
# 'check_http' command definition
define command{
command_name check_http
command_line $USER1$/check_http -I $HOSTADDRESS$ $ARG1$
}
# 'check_ssh' command definition
define command{
command_name check_ssh
command_line $USER1$/check_ssh $ARG1$ $HOSTADDRESS$
}
# 'check_dhcp' command definition
define command{
command_name check_dhcp
command_line $USER1$/check_dhcp $ARG1$
}
# 'check_ping' command definition
define command{
command_name check_ping
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
}
# 'check_pop' command definition
define command{
command_name check_pop
command_line $USER1$/check_pop -H $HOSTADDRESS$ $ARG1$
}
# 'check_imap' command definition
define command{
command_name check_imap
command_line $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$
}
# 'check_smtp' command definition
define command{
command_name check_smtp
command_line $USER1$/check_smtp -H $HOSTADDRESS$ $ARG1$
}
# 'check_tcp' command definition
define command{
command_name check_tcp
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
}
# 'check_udp' command definition
define command{
command_name check_udp
command_line $USER1$/check_udp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
}
# 'check_nt' command definition
define command{
command_name check_nt
command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$
}
# 'process-host-perfdata' command definition
define command{
command_name process-host-perfdata
command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /var/log/nagios/host-perfdata.out
}
# 'process-service-perfdata' command definition
define command{
command_name process-service-perfdata
command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out
}
Create files/index.html file with following contents:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-type" content="text/html;charset=UTF-8" /> <meta http-equiv="Refresh" content="0; URL=nagios" /> </head> <body> </body> </html>
Create empty files/localhost.cfg file with no content
Create handlers/main.yaml with following contents
---
- name: restart nagios
service: name=nagios state=restarted
Create tasks/main.yaml with following contents:
---
- name: Install epel-release
yum: name={{item}} state=present
with_items:
- epel-release
- name: Install the necessary packages
yum: name={{item}} state=present
with_items:
- nagios
- nagios-devel
- nagios-lcgdm
- nagios-plugins-all
- nagios-plugins-fts
- nagios-plugins-lcgdm
- pnp4nagios
- nagios-plugins-nrpe
- nrpe
- name: Configure nagios to send alerts over email
template: src=contacts.j2 dest='/etc/nagios/objects/contacts.cfg'
- name: Create server directory for client information
file: path='/etc/nagios/servers' state=directory mode=0755 owner=root group=nagios
- name: To maintain the clients through nagios
lineinfile: dest='/etc/nagios/nagios.cfg' insertafter='cfg_dir=/etc/nagios/conf.d' line='cfg_dir=/etc/nagios/servers'
- name: Add service information to client machine
template: src=client_info.j2 dest='/etc/nagios/servers/all.cfg'
notify:
- restart nagios
- name: Replace existing localhost.cfg file with emtpy file
copy: src=localhost.cfg dest="/etc/nagios/objects/localhost.cfg" owner=root group=nagios mode=644
- name: Configure various custom command in commands.cfg
copy: src=commands.cfg dest=/etc/nagios/objects/commands.cfg mode=664
- name: Set proper permissions on client files
file: path={{item}} mode=0755 owner=root group=nagios
with_items:
- "/etc/nagios/objects/contacts.cfg"
- "/etc/nagios/servers/all.cfg"
- "/etc/nagios/objects/commands.cfg"
- name: Verify the setup of nagios
shell: nagios -v /etc/nagios/nagios.cfg
- name: Start the nagios service
service: name={{item}} state=started enabled=yes
with_items:
- nagios
- nrpe
- httpd
- name: Setup automatic redirect to /nagios
copy: src=index.html dest=/var/www/html/index.html owner=root group=root mode=444
Create templates/client_info.j2 with following contents:
###################################################remote service check using nagios##################################################
{% for client in nagios_client_list %}
#Define a host for local machine
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name {{client.hostname}}
address {{client.ip}}
}
{% for service in client.service %}
{% if service == "ping" %}
#Define a service to "ping" the local machine
define service{
use local-service ; Name of service template to use
host_name {{client.hostname}}
service_description {{service}}
check_command check_ping!100.0,20%!500.0,60%
}
{% endif %}
{% if service == "ssh" %}
#Define a service to check SSH on the local machine.
define service{
use local-service ; Name of service template to use
host_name {{client.hostname}}
service_description {{service}}
check_command check_ssh
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "http" %}
# Define a service to check HTTP on the local machine.
define service{
use local-service ; Name of service template to use
host_name {{client.hostname}}
service_description {{service}}
check_command check_http
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "https" %}
# Define a service to check HTTPS on the local machine.
define service{
use generic-service;
host_name {{client.hostname}}
service_description {{service}}
check_command check_tcp!443!-S
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "ftp" %}
# Define a service to check FTP on the local machine.
define service{
use generic-service ; Inherit default values from a template
host_name {{client.hostname}}
service_description {{service}}
check_command check_ftp
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "smtp" %}
# Define a service to check SMTP on the local machine.
define service{
use generic-service ; Inherit default values from a template
host_name {{client.hostname}}
service_description {{service}}
check_command check_smtp
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "imap" %}
# Define a service to check IMAP on the local machine.
define service{
use generic-service ; Inherit default values from a template
host_name {{client.hostname}}
service_description {{service}}
check_command check_imap
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "imaps" %}
# Define a service to check IMAPS on the local machine.
define service{
use generic-service;
host_name {{client.hostname}}
service_description {{service}}
check_command check_tcp!993!-S
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "pop3" %}
# Define a service to check POP3 on the local machine.
define service{
use generic-service ; Inherit default values from a template
host_name {{client.hostname}}
service_description {{service}}
check_command check_pop
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "pop3s" %}
# Define a service to check POP3S on the local machine.
define service{
use generic-service;
host_name {{client.hostname}}
service_description {{service}}
check_command check_tcp!995!-S
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
###############################################internal service check using nagios######################################################
{% if service == "users" %}
# Define a service to check Current USER on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use generic-service
host_name {{client.hostname}}
service_description {{service}}
check_command check_nrpe!check_users
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "load" %}
# Define a service to check CPU Load on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use generic-service
host_name {{client.hostname}}
service_description {{service}}
check_command check_nrpe!check_load
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "processes" %}
# Define a service to check PROCESSES on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use generic-service
host_name {{client.hostname}}
service_description {{service}}
check_command check_nrpe!check_total_procs!
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "disk" %}
# Define a service to check DISK on the local machine.
define service{
use generic-service
host_name {{client.hostname}}
service_description {{service}}
check_command check_nrpe!check_disk
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "swap" %}
# Define a service to check SWAP on the local machine.
define service{
use generic-service;
host_name {{client.hostname}}
service_description {{service}}
check_command check_nrpe!check_swap
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% if service == "zombie" %}
# Define a service to check ZOMBIE on the local machine.
define service{
use generic-service;
host_name {{client.hostname}}
service_description {{service}}
check_command check_nrpe!check_zombie_procs
notifications_enabled 1
contact_groups {{client.contactgroup}}
}
{% endif %}
{% endfor %}
{% endfor %}
Create templates/contacts.j2 with following contents:
{% for contact1 in nagios_contacts %}
define contact{
contact_name {{contact1.name}} ; Short name of user
use generic-contact ; Inherit default values from generic-contact template (defined above)
alias {{contact1.alias}} ; Full name of user
email {{contact1.email}}
}
{% endfor %}
{% for contactgroup1 in nagios_contactgroups %}
define contactgroup{
contactgroup_name {{contactgroup1.name}}
alias {{contactgroup1.alias}}
members {% for cg1member1 in contactgroup1.members %} {{cg1member1}} {% if not loop.last %} , {% endif %} {% endfor %}
}
{% endfor %}
Finally following variables need to be defined either in the host file implementing nagios-server role, or in common-vars or in vars/main.yaml of nagios-server role itself:
nagios_contacts:
- { name: nagiosadmin, alias: "Nagios administrator", email: logs@sbarjatiya.com }
- { name: saurabh, alias: "Saurabh", email: saurabh@sbarjatiya.com }
nagios_contactgroups:
#Do not remove or rename this group. Change alias or members as necessary.
- { name: admins, alias: "Logs admin list", members: [ nagiosadmin ] }
- { name: engineers, alias: "Saurabh and Nagios admin", members: [ nagiosadmin, saurabh ] }
nagios_client_list:
- { hostname: server1.sbarjatiya.com , ip: "10.4.20.201", contactgroup: admins, service: [ "ping", "ssh", "users", "disk", "load", "processes", "zombie", "swap" ] }
- { hostname: server2.sbarjatiya.com , ip: "10.4.20.171", contactgroup: saurabh, service: [ "ping", "ssh", "users", "disk", "load", "processes", "zombie", "swap" ] }
Other service options are: imap, imaps, smtp, smtps, http, https
<yambe:breadcrumb>Ansible_roles|Ansible roles</yambe:breadcrumb>