Setup Monitoring: Prometheus, Node/Nginx/Postfix Exporter, UFW rules

This commit is contained in:
smoon 2025-12-04 14:37:57 +00:00
commit 6ae9867143
25 changed files with 644 additions and 0 deletions

24
.gitignore vendored Normal file
View File

@ -0,0 +1,24 @@
# Ansible temporäre Dateien
*.retry
*.pyc
*.log
# SSH Keys (NIEMALS pushen!)
id_rsa
id_rsa.pub
id_ed25519
id_ed25519.pub
*.pem
*.key
# Ansible Vault (falls du unverschlüsselte Secrets hast - verschlüsselte sind ok)
# secrets.yml
# Inventar (falls du echte IPs/Passwörter nicht pushen willst - bei Homelab meist ok, aber pass auf)
# inventory/hosts.ini <-- Entscheide selbst. Wenn Passwörter drin sind: NICHT pushen.
# Besser: Passwörter nur in ansible-vault oder gar nicht im File.
# Backup files
*.bak
*.swp

15
ansible.cfg Normal file
View File

@ -0,0 +1,15 @@
[defaults]
# Wo liegen meine Server?
inventory = inventory/hosts.ini
# Wo liegen meine Rollen?
roles_path = roles
# Host Key checking ausschalten -> sonst "Are you sure .. fingerprint" - da homelab sicher
host_key_checking = False
# keine retry dateien erstellen
retry_files_enabled = False
# Output zu YAML und nicht JSON
stdout_callback = yaml

39
inventory/hosts.ini Normal file
View File

@ -0,0 +1,39 @@
#########################################################################
[lxc_debian]
dns ansible_host=192.168.178.11
#wireguard ansible_host=192.168.178.12
#web1 ansible_host=192.168.178.13
web-conectare ansible_host=192.168.178.14 enable_wireguard_routing=True
vaultwarden ansible_host=192.168.178.15 enable_wireguard_routing=True
guacamole ansible_host=192.168.178.16 enable_wireguard_routing=True
#vm-kiosk1 ansible_host=192.168.178.17
#vm-kiosk2 ansible_host=192.168.178.18
#monitoring ansible_host=192.168.178.19 enable_wireguard_routing=True
gitea ansible_host=192.168.178.20 enable_wireguard_routing=True
ansible ansible_host=192.168.178.21 enable_wireguard_routing=True
#test2 ansible_host=192.168.178.48 enable_wireguard_routing=True
#test1 ansible_host=192.168.178.49
[lxc_debian:vars]
ansible_user=root
ansible_become=no
# +WireGuard Routen Konfig
wg_gateway=192.168.178.12
wg_subnet=10.0.0.0/24
# +WireGuard Route default disable
enable_wireguard_routing=False
#########################################################################
[vps_servers]
gwVPS ansible_host=10.0.0.2 nginx_enabled=True
prodVPS ansible_host=10.0.0.3 postfix_enabled=True
[vps_servers:vars]
ansible_user=ansible
ansible_become=yes
ansible_port=2222
enable_wireguard_routing=False
[monitoring]
monitorSrv ansible_host=192.168.178.22 enable_wireguard_routing=True

26
inventory/hosts.ini.old Normal file
View File

@ -0,0 +1,26 @@
# GRUPPE: alle Debian LXC Container
[lxc_debian]
# +Format: Alias_Name ansible_host=IP-Adresse
pihole ansible_host=192.168.178.11 enable_wireguard_routing=true
#wireguardGW ansible_host=192.168.178.12
web1 ansible_host=192.168.178.13 enable_wireguard_routing=true
web-conectare ansible_host=192.168.178.14 enable_wireguard_routing=true
vaultwarden ansible_host=192.168.178.15 enable_wireguard_routing=true
guacamole ansible_host=192.168.178.16 enable_wireguard_routing=true
checkmk ansible_host=192.168.178.19 enable_wireguard_routing=true
gitea ansible_host=192.168.178.20 enable_wireguard_routing=true
ansible_ctl ansible_host=127.0.0.1 ansible_connection=local
test1 ansible_host=192.168.178.49
# +Pi-Hole Beispiel, Host-Variable gewinnt vor Gruppen-Variable
#pihole ansible_host=192.168.178.11 enable_wireguard_routing=true
# VARIABLEN fuer die Gruppe [lxc_debian]
[lxc_debian:vars]
# +Login immer als root
ansible_user=root
# +WireGuard Routen Konfig
wg_gateway=192.168.178.12
wg_subnet=10.0.0.0/24
# +WireGuard Route default disable
enable_wireguard_routing=false

View File

@ -0,0 +1,7 @@
---
- name: Cleanup Checkmk artifacts
hosts: all # Auf ALLEN Servern ausführen (Homelab + VPS)
become: yes
roles:
- cleanup_checkmk

View File

@ -0,0 +1,29 @@
---
- name: Install Node Exporter
hosts: all
roles:
- node_exporter
- name: Install Prometheus Server
hosts: monitoring
roles:
- prometheus
- name: Setup Exporters on VPS
hosts: vps_servers
tasks:
- name: Install Postfix Exporter
include_role:
name: postfix_exporter
when: postfix_enabled | default(False)
- name: Install Nginx Exporter
include_role:
name: nginx_exporter
when: nginx_enabled | default(False)
- name: Update Prometheus Config
hosts: monitoring
roles:
- prometheus

9
playbooks/site.yml Normal file
View File

@ -0,0 +1,9 @@
---
- name: Setup Base Config for LXCs von Debain Base
# auf welche Hosts soll das angewendet werden? -> Auf die lxc debian grp
hosts: lxc_debian
# sudo werden? Ja
become: yes
# welche Rollen sollen durchlaufen?
roles:
- debian_base

View File

@ -0,0 +1,7 @@
---
- name: Restart xinetd
systemd:
name: xinetd
state: restarted
daemon_reload: yes
enabled: yes

View File

@ -0,0 +1,54 @@
---
- name: Download Checkmk Agent from Monitoring Server
get_url:
url: "http://{{ checkmk_server_ip }}/cmk/check_mk/agents/check-mk-agent_2.4.0p3-1_all.deb"
dest: /tmp/check-mk-agent.deb
mode: '0644'
- name: Install Checkmk Agent
apt:
deb: /tmp/check-mk-agent.deb
state: present
- name: Install xinetd (Legacy Mode Wrapper)
apt:
name: xinetd
state: present
- name: Disable Checkmk Systemd Daemon (LXC compatibility fix)
systemd:
name: cmk-agent-ctl-daemon
state: stopped
enabled: no
ignore_errors: true
- name: Create xinetd config for Checkmk
copy:
dest: /etc/xinetd.d/check-mk-agent
content: |
service check-mk-agent
{
type = UNLISTED
port = 6556
socket_type = stream
protocol = tcp
wait = no
user = root
server = /usr/bin/check_mk_agent
log_on_success =
disable = no
}
mode: '0644'
notify: Restart xinetd
- name: Ensure Checkmk Socket is enabled and active
systemd:
name: check-mk-agent.socket
enabled: yes
state: started
- name: Ensure xinetd is started and enabled
service:
name: xinetd
state: started
enabled: yes

View File

@ -0,0 +1,55 @@
---
- name: Stop Checkmk Agent service (systemd)
systemd:
name: check-mk-agent.socket
state: stopped
enabled: no
ignore_errors: yes # Falls er schon weg ist oder nie da war
- name: Stop xinetd service
systemd:
name: xinetd
state: stopped
enabled: no
ignore_errors: yes
- name: Purge check-mk-agent package
apt:
name: check-mk-agent
state: absent
purge: yes
- name: Purge xinetd package
apt:
name: xinetd
state: absent
purge: yes
- name: Remove Checkmk directories and configs
file:
path: "{{ item }}"
state: absent
loop:
- /etc/check_mk
- /var/lib/check_mk_agent
- /usr/lib/check_mk_agent
- /etc/xinetd.d/check_mk
- /etc/xinetd.d/check-mk-agent
- /usr/local/bin/fail2ban_spool.sh # Unser manuelles Skript
- /usr/lib/check_mk_agent/local/fail2ban_check # Unser Local Check
- name: Remove Fail2Ban spool cronjob
cron:
name: "Checkmk Fail2Ban Spool"
state: absent
# Hinweis: Das entfernt den Cronjob nur, wenn er einen Namen/Kommentar hätte.
# Da wir ihn manuell als "Raw Zeile" eingefügt haben, findet Ansible ihn so oft nicht.
# Wir nutzen daher brute-force sed, um sicher zu gehen:
- name: Remove raw cronjob line for fail2ban
lineinfile:
path: /var/spool/cron/crontabs/root
regexp: 'fail2ban_spool\.sh'
state: absent
ignore_errors: yes # Falls Datei nicht existiert

View File

@ -0,0 +1,21 @@
---
# Dieser Handler wird aufgerufen, wenn wir die Route in /etc/network/interfaces eingetragen haben.
# # Er sorgt dafür, dass die Route SOFORT aktiv ist, ohne Reboot.
- name: Set route live
command: ip route add {{ wg_subnet }} via {{ wg_gateway }} dev eth0
# wenn sie schon existiert Exit Code 2
register: route_add_result
failed_when:
- route_add_result.rc != 0
- "'File exists' not in route_add_result.sterr"
changed_when: route_add_result.rc == 0
- name: Remove route live
command: ip route del {{ wg_subnet }} via {{ wg_gateway }} dev eth0
# wenn schon weg, dann Exit Code 2
register: route_del_result
failed_when:
- route_del_result.rc != 0
- "'No such process' not in route_del_result.stderr"
- "'No such device' not in route_del_result.stderr"

View File

@ -0,0 +1,61 @@
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
- name: Ensure locales package is installed
apt:
name: locales
state: present
update_cache: yes
- name: Generate locales (en_US and de_DE)
locale_gen:
name: "{{ item }}"
state: present
loop:
- en_US.UTF-8
- de_DE.UTF-8
- name: Set default system locale to en_US.UTF-8
command: update-locale LANG=en_US.UTF-8
changed_when: false
- name: Install standard packages
apt:
name:
- vim
- curl
- wget
- htop
- net-tools
- git
- bash-completion
- iproute2
state: present
- name: Set vim as default editor
command: update-alternatives --set editor /usr/bin/vim.basic
ignore_errors: true
changed_when: false
## Wireguard Options
- name: Ensure WireGuard route is PRESENT in /etc/network/interfaces
lineinfile:
path: /etc/network/interfaces
regexp: '^up ip route add {{ wg_subnet }} via {{ wg_gateway }}'
line: 'up ip route add {{ wg_subnet }} via {{ wg_gateway }} dev eth0'
state: present
when: enable_wireguard_routing | default(false) | bool
notify: Set route live
- name: Ensure WireGuard route is ABSENT in /etc/network/interfaces
lineinfile:
path: /etc/network/interfaces
regexp: '^up ip route add {{ wg_subnet }} via {{ wg_gateway }}'
state: absent
when: not (enable_wireguard_routing | default(false) | bool)
notify: Remove route live

View File

@ -0,0 +1,6 @@
---
- name: Restart nginx_exporter
service:
name: prometheus-nginx-exporter
state: restarted

View File

@ -0,0 +1,14 @@
- name: Install prometheus-nginx-exporter via apt
apt:
name: prometheus-nginx-exporter
state: present
notify: Restart nginx_exporter
- name: Ensure nginx_exporter service is started
service:
name: prometheus-nginx-exporter
state: started
enabled: yes
# Hinweis: Standard-Arguments in /etc/default/prometheus-nginx-exporter müssen oft angepasst werden
# damit er auf http://127.0.0.1:8080/stub_status schaut.

View File

@ -0,0 +1,6 @@
---
- name: Restart node_exporter
systemd:
name: node_exporter
state: restarted
daemon_reload: yes

View File

@ -0,0 +1,55 @@
---
- name: Create node_exporter user
user:
name: node_exporter
shell: /bin/false
system: yes
#- name: Download node_exporter
# get_url:
# url: "https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz"
# dest: "/tmp/node_exporter.tar.gz"
- name: Download node_exporter (Robust Method)
block:
- name: Try downloading with get_url
get_url:
url: "https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz"
dest: "/tmp/node_exporter.tar.gz"
rescue:
- name: Install curl for fallback
apt:
name: curl
state: present
- name: Fallback downloading with curl
command: >
curl -L -o /tmp/node_exporter.tar.gz https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz
args:
creates: /tmp/node_exporter.tar.gz
- name: Unarchive node_exporter
unarchive:
src: "/tmp/node_exporter.tar.gz"
dest: "/tmp"
remote_src: yes
- name: Install node_exporter binary
copy:
src: "/tmp/node_exporter-1.7.0.linux-amd64/node_exporter"
dest: "/usr/local/bin/node_exporter"
mode: '0755'
remote_src: yes
notify: Restart node_exporter
- name: Create systemd service file
template:
src: node_exporter.service.j2
dest: /etc/systemd/system/node_exporter.service
notify: Restart node_exporter
- name: Enable and start node_exporter
systemd:
name: node_exporter
state: started
enabled: yes

View File

@ -0,0 +1,13 @@
[Unit]
Description=Node Exporter
Wants=network-online.target
After=network-online.target
[Service]
User=node_exporter
Group=node_exporter
Type=simple
ExecStart=/usr/local/bin/node_exporter
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,7 @@
---
- name: Restart postfix_exporter
systemd:
name: prometheus-postfix-exporter
state: restarted
daemon_reload: yes

View File

@ -0,0 +1,16 @@
---
- name: Install prometheus-postfix-exporter
apt:
name: prometheus-postfix-exporter
state: present
notify: Restart postfix_exporter
# Das Paket startet den Service oft automatisch als "prometheus-postfix-exporter"
# Wir müssen sicherstellen, dass er läuft und enabled ist.
- name: Ensure service is started and enabled
systemd:
name: prometheus-postfix-exporter
state: started
enabled: yes

View File

@ -0,0 +1,7 @@
---
- name: Restart prometheus
systemd:
name: prometheus
state: restarted
daemon_reload: yes

View File

@ -0,0 +1,67 @@
---
- name: Create prometheus user
user:
name: prometheus
shell: /bin/false
system: yes
- name: Create directories
file:
path: "{{ item }}"
state: directory
owner: prometheus
group: prometheus
mode: '0755'
loop:
- /etc/prometheus
- /var/lib/prometheus
- name: Download Prometheus
get_url:
url: "https://github.com/prometheus/prometheus/releases/download/v2.45.0/prometheus-2.45.0.linux-amd64.tar.gz"
dest: "/tmp/prometheus.tar.gz"
- name: Unarchive Prometheus
unarchive:
src: "/tmp/prometheus.tar.gz"
dest: "/tmp"
remote_src: yes
- name: Install binaries
copy:
src: "/tmp/prometheus-2.45.0.linux-amd64/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
mode: '0755'
remote_src: yes
loop:
- prometheus
- promtool
notify: Restart prometheus
- name: Copy console libraries
copy:
src: "/tmp/prometheus-2.45.0.linux-amd64/{{ item }}/"
dest: "/etc/prometheus/{{ item }}/"
remote_src: yes
loop:
- consoles
- console_libraries
- name: Configure Prometheus (Auto-Discovery)
template:
src: prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
notify: Restart prometheus
- name: Create systemd service
template:
src: prometheus.service.j2
dest: /etc/systemd/system/prometheus.service
notify: Restart prometheus
- name: Start Prometheus
systemd:
name: prometheus
state: started
enabled: yes

View File

@ -0,0 +1,21 @@
[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/bin/prometheus \
--config.file /etc/prometheus/prometheus.yml \
--storage.tsdb.path /var/lib/prometheus/ \
--web.console.templates=/etc/prometheus/consoles \
--web.console.libraries=/etc/prometheus/console_libraries
# Wichtig für LXC:
NoNewPrivileges=yes
PrivateTmp=false
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,19 @@
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node_exporter'
static_configs:
- targets:
# Hier loopen wir durch ALLE Hosts im Inventory
{% for host in groups['all'] %}
# Wir nehmen nur Hosts, die eine IP haben (manche Gruppen sind leer)
{% if hostvars[host]['ansible_host'] is defined %}
- '{{ hostvars[host]["ansible_host"] }}:9100'
{% endif %}
{% endfor %}

View File

@ -0,0 +1,38 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets:
- 'localhost:9090'
- job_name: 'node_exporter'
static_configs:
- targets:
{% for host in groups['all'] %}
- '{{ hostvars[host].ansible_host }}:9100'
{% endfor %}
- job_name: 'postfix_exporter'
static_configs:
- targets:
{% for host in groups['vps_servers'] %}
# Hier filtern wir idealerweise, ob Postfix drauf ist.
# Einfachheitshalber nehmen wir erstmal an, prodVPS ist der Mailserver.
# Oder wir nutzen eine Variable im Inventory: postfix_enabled=true
{% if hostvars[host]['postfix_enabled'] is defined and hostvars[host]['postfix_enabled'] %}
- '{{ hostvars[host].ansible_host }}:9154'
{% endif %}
{% endfor %}
- job_name: 'nginx_exporter'
static_configs:
- targets:
{% for host in groups['vps_servers'] %}
{% if hostvars[host]['nginx_enabled'] is defined and hostvars[host]['nginx_enabled'] %}
- '{{ hostvars[host].ansible_host }}:9113'
{% endif %}
{% endfor %}

View File

@ -0,0 +1,28 @@
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node_exporter'
static_configs:
- targets:
# Hier loopen wir durch ALLE Hosts im Inventory
{% for host in groups['all'] %}
# Wir nehmen nur Hosts, die eine IP haben (manche Gruppen sind leer)
{% if hostvars[host]['ansible_host'] is defined %}
- '{{ hostvars[host]["ansible_host"] }}:9100'
{% endif %}
{% endfor %}
- job_name: 'postfix_exporter'
static_configs:
- targets:
{% for host in groups['all'] %}
{% if hostvars[host]['postfix_enabled'] is defined and hostvars[host]['postfix_enabled'] %}
- '{{ hostvars[host]["ansible_host"] }}:9154'
{% endif %}
{% endfor %}