initial commit
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
75891c3271
129 changed files with 8046 additions and 0 deletions
93
roles/monitoring/templates/compose.yml.j2
Normal file
93
roles/monitoring/templates/compose.yml.j2
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:{{ prometheus_version }}
|
||||
container_name: prometheus
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--storage.tsdb.retention.time={{ prometheus_retention_days }}d'
|
||||
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||||
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||||
- '--web.enable-remote-write-receiver'
|
||||
volumes:
|
||||
- {{ monitoring_data_path }}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- {{ monitoring_data_path }}/prometheus/data:/prometheus
|
||||
networks:
|
||||
- monitoring
|
||||
- caddy
|
||||
{% if prometheus_expose_port | default(true) %}
|
||||
ports:
|
||||
- "{{ prometheus_port }}:9090"
|
||||
{% endif %}
|
||||
|
||||
alloy:
|
||||
image: grafana/alloy:{{ alloy_version }}
|
||||
container_name: alloy
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
command:
|
||||
- run
|
||||
- --server.http.listen-addr=0.0.0.0:{{ alloy_port }}
|
||||
- --storage.path=/var/lib/alloy/data
|
||||
- /etc/alloy/config.alloy
|
||||
volumes:
|
||||
- {{ monitoring_data_path }}/alloy/config.alloy:/etc/alloy/config.alloy:ro
|
||||
- /:/host/root:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /proc:/host/proc:ro
|
||||
- /var/log:/var/log:ro
|
||||
- /run/log/journal:/run/log/journal:ro
|
||||
- /etc/machine-id:/etc/machine-id:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- /sys/fs/cgroup:/sys/fs/cgroup:ro
|
||||
- /var/lib/docker:/var/lib/docker:ro
|
||||
environment:
|
||||
HOSTNAME: {{ ansible_facts["hostname"] }}
|
||||
networks:
|
||||
- monitoring
|
||||
{% if alloy_expose_port | default(true) %}
|
||||
ports:
|
||||
- "{{ alloy_port }}:{{ alloy_port }}"
|
||||
{% endif %}
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:{{ grafana_version }}
|
||||
container_name: grafana
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- {{ monitoring_data_path }}/grafana/data:/var/lib/grafana
|
||||
- {{ monitoring_data_path }}/grafana/provisioning:/etc/grafana/provisioning
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: {{ grafana_admin_password }}
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
GF_SERVER_ROOT_URL: {{ grafana_root_url | default('http://localhost:3000') }}
|
||||
GF_SERVER_SERVE_FROM_SUB_PATH: "false"
|
||||
networks:
|
||||
- monitoring
|
||||
- caddy
|
||||
{% if grafana_expose_port | default([]) %}
|
||||
ports:
|
||||
- "{{ grafana_expose_port }}:3000"
|
||||
{% endif %}
|
||||
|
||||
loki:
|
||||
image: grafana/loki:{{ loki_version }}
|
||||
container_name: loki
|
||||
restart: unless-stopped
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
volumes:
|
||||
- {{ monitoring_data_path }}/loki/loki-config.yaml:/etc/loki/local-config.yaml:ro
|
||||
- {{ monitoring_data_path }}/loki/data:/loki
|
||||
networks:
|
||||
- monitoring
|
||||
{% if loki_expose_port | default(true) %}
|
||||
ports:
|
||||
- "{{ loki_port | default(3100) }}:3100"
|
||||
{% endif %}
|
||||
|
||||
networks:
|
||||
monitoring:
|
||||
external: true
|
||||
caddy:
|
||||
external: true
|
||||
163
roles/monitoring/templates/config.alloy.j2
Normal file
163
roles/monitoring/templates/config.alloy.j2
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
// Prometheus metrics collection
|
||||
prometheus.exporter.unix "node" {
|
||||
rootfs_path = "/host/root"
|
||||
sysfs_path = "/host/sys"
|
||||
procfs_path = "/host/proc"
|
||||
textfile {
|
||||
directory = "/host/root/var/lib/node_exporter/textfile_collector"
|
||||
}
|
||||
|
||||
set_collectors = ["cpu", "loadavg", "meminfo", "diskstats", "filesystem", "netdev", "textfile"]
|
||||
}
|
||||
|
||||
prometheus.scrape "node_exporter" {
|
||||
targets = prometheus.exporter.unix.node.targets
|
||||
forward_to = [prometheus.remote_write.metrics.receiver]
|
||||
}
|
||||
|
||||
prometheus.scrape "alloy" {
|
||||
targets = [{
|
||||
__address__ = "localhost:{{ alloy_port }}",
|
||||
}]
|
||||
forward_to = [prometheus.remote_write.metrics.receiver]
|
||||
}
|
||||
|
||||
prometheus.exporter.cadvisor "docker" {
|
||||
docker_host = "unix:///var/run/docker.sock"
|
||||
docker_only = true
|
||||
}
|
||||
|
||||
prometheus.scrape "cadvisor" {
|
||||
targets = prometheus.exporter.cadvisor.docker.targets
|
||||
forward_to = [prometheus.remote_write.metrics.receiver]
|
||||
}
|
||||
|
||||
prometheus.remote_write "metrics" {
|
||||
endpoint {
|
||||
url = "http://prometheus:{{ prometheus_port }}/api/v1/write"
|
||||
}
|
||||
}
|
||||
|
||||
// Journal log collection (includes both system logs and Docker containers)
|
||||
loki.source.journal "journal" {
|
||||
forward_to = [loki.process.journal.receiver]
|
||||
relabel_rules = loki.relabel.journal.rules
|
||||
labels = {
|
||||
job = "journal",
|
||||
}
|
||||
}
|
||||
|
||||
loki.relabel "journal" {
|
||||
forward_to = []
|
||||
|
||||
// Systemd unit (e.g., ssh.service, docker.service)
|
||||
rule {
|
||||
source_labels = ["__journal__systemd_unit"]
|
||||
target_label = "unit"
|
||||
}
|
||||
|
||||
// Container name for Docker containers
|
||||
rule {
|
||||
source_labels = ["__journal_container_name"]
|
||||
target_label = "container"
|
||||
}
|
||||
|
||||
// Syslog priority (0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug)
|
||||
rule {
|
||||
source_labels = ["__journal_priority"]
|
||||
target_label = "priority"
|
||||
}
|
||||
|
||||
// Syslog identifier (program name)
|
||||
rule {
|
||||
source_labels = ["__journal_syslog_identifier"]
|
||||
target_label = "syslog_identifier"
|
||||
}
|
||||
|
||||
// Tag tuwunel container with its own job label
|
||||
rule {
|
||||
source_labels = ["__journal_container_name"]
|
||||
regex = "tuwunel"
|
||||
target_label = "job"
|
||||
replacement = "tuwunel"
|
||||
}
|
||||
}
|
||||
|
||||
loki.process "journal" {
|
||||
forward_to = [loki.write.logs.receiver]
|
||||
|
||||
// Extract log level from common formats: level=info, "level":"info", [INFO], etc.
|
||||
stage.regex {
|
||||
expression = "(?i)(level=|\"level\":\\s*\"|\\[)(?P<extracted_level>debug|info|warn|warning|error|err|fatal|panic|critical|crit|notice)(\\]|\"|\\s|$)"
|
||||
}
|
||||
|
||||
// Map extracted level to numeric priority for consistent filtering
|
||||
stage.template {
|
||||
source = "level"
|
||||
template = "{% raw %}{{ if .extracted_level }}{{ .extracted_level }}{{ else }}{{ .priority }}{{ end }}{% endraw %}"
|
||||
}
|
||||
|
||||
stage.labels {
|
||||
values = {
|
||||
level = "",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
loki.write "logs" {
|
||||
endpoint {
|
||||
url = "http://loki:{{ loki_port }}/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
|
||||
// Fail2ban log file collection (ban/unban details go to file, not journald)
|
||||
local.file_match "fail2ban" {
|
||||
path_targets = [{"__path__" = "/host/root/var/log/fail2ban.log"}]
|
||||
}
|
||||
|
||||
loki.source.file "fail2ban" {
|
||||
targets = local.file_match.fail2ban.targets
|
||||
forward_to = [loki.process.fail2ban.receiver]
|
||||
}
|
||||
|
||||
loki.process "fail2ban" {
|
||||
forward_to = [loki.write.logs.receiver]
|
||||
|
||||
stage.static_labels {
|
||||
values = {
|
||||
job = "fail2ban",
|
||||
unit = "fail2ban.service",
|
||||
}
|
||||
}
|
||||
|
||||
stage.regex {
|
||||
expression = "(?i)\\s(?P<extracted_level>notice|warning|error|info)\\s"
|
||||
}
|
||||
|
||||
stage.labels {
|
||||
values = {
|
||||
level = "extracted_level",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rspamd log file collection (logs to file inside mailserver, not stdout)
|
||||
local.file_match "rspamd" {
|
||||
path_targets = [{"__path__" = "/host/root/srv/mail/maillogs/rspamd.log"}]
|
||||
}
|
||||
|
||||
loki.source.file "rspamd" {
|
||||
targets = local.file_match.rspamd.targets
|
||||
forward_to = [loki.process.rspamd.receiver]
|
||||
}
|
||||
|
||||
loki.process "rspamd" {
|
||||
forward_to = [loki.write.logs.receiver]
|
||||
|
||||
stage.static_labels {
|
||||
values = {
|
||||
container = "mailserver",
|
||||
job = "rspamd",
|
||||
}
|
||||
}
|
||||
}
|
||||
13
roles/monitoring/templates/dashboards.yml.j2
Normal file
13
roles/monitoring/templates/dashboards.yml.j2
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Default'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
foldersFromFilesStructure: true
|
||||
17
roles/monitoring/templates/datasources.yml.j2
Normal file
17
roles/monitoring/templates/datasources.yml.j2
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:{{ prometheus_port }}
|
||||
isDefault: true
|
||||
editable: false
|
||||
jsonData:
|
||||
timeInterval: 15s
|
||||
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki:{{ loki_port }}
|
||||
editable: false
|
||||
43
roles/monitoring/templates/loki-config.yaml.j2
Normal file
43
roles/monitoring/templates/loki-config.yaml.j2
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096
|
||||
|
||||
common:
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
instance_addr: 127.0.0.1
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
limits_config:
|
||||
retention_period: {{ loki_retention_days | default(prometheus_retention_days) }}d
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h
|
||||
|
||||
compactor:
|
||||
working_directory: /loki/compactor
|
||||
compaction_interval: 10m
|
||||
20
roles/monitoring/templates/prometheus.yml.j2
Normal file
20
roles/monitoring/templates/prometheus.yml.j2
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:{{ prometheus_port }}']
|
||||
|
||||
- job_name: 'alloy'
|
||||
static_configs:
|
||||
- targets: ['alloy:{{ alloy_port }}']
|
||||
|
||||
- job_name: 'caddy'
|
||||
static_configs:
|
||||
- targets: ['caddy:{{ caddy_metrics_port }}']
|
||||
|
||||
- job_name: 'forgejo'
|
||||
static_configs:
|
||||
- targets: ['forgejo:{{ forgejo_port }}']
|
||||
Loading…
Add table
Add a link
Reference in a new issue