initial commit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Matthias Johnson 2026-02-27 15:09:25 -07:00
commit 75891c3271
129 changed files with 8046 additions and 0 deletions

View file

@ -0,0 +1,93 @@
services:
prometheus:
image: prom/prometheus:{{ prometheus_version }}
container_name: prometheus
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time={{ prometheus_retention_days }}d'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--web.enable-remote-write-receiver'
volumes:
- {{ monitoring_data_path }}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- {{ monitoring_data_path }}/prometheus/data:/prometheus
networks:
- monitoring
- caddy
{% if prometheus_expose_port | default(true) %}
ports:
- "{{ prometheus_port }}:9090"
{% endif %}
alloy:
image: grafana/alloy:{{ alloy_version }}
container_name: alloy
restart: unless-stopped
privileged: true
command:
- run
- --server.http.listen-addr=0.0.0.0:{{ alloy_port }}
- --storage.path=/var/lib/alloy/data
- /etc/alloy/config.alloy
volumes:
- {{ monitoring_data_path }}/alloy/config.alloy:/etc/alloy/config.alloy:ro
- /:/host/root:ro
- /sys:/host/sys:ro
- /proc:/host/proc:ro
- /var/log:/var/log:ro
- /run/log/journal:/run/log/journal:ro
- /etc/machine-id:/etc/machine-id:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- /sys/fs/cgroup:/sys/fs/cgroup:ro
- /var/lib/docker:/var/lib/docker:ro
environment:
HOSTNAME: {{ ansible_facts["hostname"] }}
networks:
- monitoring
{% if alloy_expose_port | default(true) %}
ports:
- "{{ alloy_port }}:{{ alloy_port }}"
{% endif %}
grafana:
image: grafana/grafana:{{ grafana_version }}
container_name: grafana
restart: unless-stopped
volumes:
- {{ monitoring_data_path }}/grafana/data:/var/lib/grafana
- {{ monitoring_data_path }}/grafana/provisioning:/etc/grafana/provisioning
environment:
GF_SECURITY_ADMIN_PASSWORD: {{ grafana_admin_password }}
GF_USERS_ALLOW_SIGN_UP: "false"
GF_SERVER_ROOT_URL: {{ grafana_root_url | default('http://localhost:3000') }}
GF_SERVER_SERVE_FROM_SUB_PATH: "false"
networks:
- monitoring
- caddy
{% if grafana_expose_port | default([]) %}
ports:
- "{{ grafana_expose_port }}:3000"
{% endif %}
loki:
image: grafana/loki:{{ loki_version }}
container_name: loki
restart: unless-stopped
command: -config.file=/etc/loki/local-config.yaml
volumes:
- {{ monitoring_data_path }}/loki/loki-config.yaml:/etc/loki/local-config.yaml:ro
- {{ monitoring_data_path }}/loki/data:/loki
networks:
- monitoring
{% if loki_expose_port | default(true) %}
ports:
- "{{ loki_port | default(3100) }}:3100"
{% endif %}
networks:
monitoring:
external: true
caddy:
external: true

View file

@ -0,0 +1,163 @@
// Prometheus metrics collection
prometheus.exporter.unix "node" {
rootfs_path = "/host/root"
sysfs_path = "/host/sys"
procfs_path = "/host/proc"
textfile {
directory = "/host/root/var/lib/node_exporter/textfile_collector"
}
set_collectors = ["cpu", "loadavg", "meminfo", "diskstats", "filesystem", "netdev", "textfile"]
}
prometheus.scrape "node_exporter" {
targets = prometheus.exporter.unix.node.targets
forward_to = [prometheus.remote_write.metrics.receiver]
}
prometheus.scrape "alloy" {
targets = [{
__address__ = "localhost:{{ alloy_port }}",
}]
forward_to = [prometheus.remote_write.metrics.receiver]
}
prometheus.exporter.cadvisor "docker" {
docker_host = "unix:///var/run/docker.sock"
docker_only = true
}
prometheus.scrape "cadvisor" {
targets = prometheus.exporter.cadvisor.docker.targets
forward_to = [prometheus.remote_write.metrics.receiver]
}
prometheus.remote_write "metrics" {
endpoint {
url = "http://prometheus:{{ prometheus_port }}/api/v1/write"
}
}
// Journal log collection (includes both system logs and Docker containers)
loki.source.journal "journal" {
forward_to = [loki.process.journal.receiver]
relabel_rules = loki.relabel.journal.rules
labels = {
job = "journal",
}
}
loki.relabel "journal" {
forward_to = []
// Systemd unit (e.g., ssh.service, docker.service)
rule {
source_labels = ["__journal__systemd_unit"]
target_label = "unit"
}
// Container name for Docker containers
rule {
source_labels = ["__journal_container_name"]
target_label = "container"
}
// Syslog priority (0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug)
rule {
source_labels = ["__journal_priority"]
target_label = "priority"
}
// Syslog identifier (program name)
rule {
source_labels = ["__journal_syslog_identifier"]
target_label = "syslog_identifier"
}
// Tag tuwunel container with its own job label
rule {
source_labels = ["__journal_container_name"]
regex = "tuwunel"
target_label = "job"
replacement = "tuwunel"
}
}
loki.process "journal" {
forward_to = [loki.write.logs.receiver]
// Extract log level from common formats: level=info, "level":"info", [INFO], etc.
stage.regex {
expression = "(?i)(level=|\"level\":\\s*\"|\\[)(?P<extracted_level>debug|info|warn|warning|error|err|fatal|panic|critical|crit|notice)(\\]|\"|\\s|$)"
}
// Map extracted level to numeric priority for consistent filtering
stage.template {
source = "level"
template = "{% raw %}{{ if .extracted_level }}{{ .extracted_level }}{{ else }}{{ .priority }}{{ end }}{% endraw %}"
}
stage.labels {
values = {
level = "",
}
}
}
loki.write "logs" {
endpoint {
url = "http://loki:{{ loki_port }}/loki/api/v1/push"
}
}
// Fail2ban log file collection (ban/unban details go to file, not journald)
local.file_match "fail2ban" {
path_targets = [{"__path__" = "/host/root/var/log/fail2ban.log"}]
}
loki.source.file "fail2ban" {
targets = local.file_match.fail2ban.targets
forward_to = [loki.process.fail2ban.receiver]
}
loki.process "fail2ban" {
forward_to = [loki.write.logs.receiver]
stage.static_labels {
values = {
job = "fail2ban",
unit = "fail2ban.service",
}
}
stage.regex {
expression = "(?i)\\s(?P<extracted_level>notice|warning|error|info)\\s"
}
stage.labels {
values = {
level = "extracted_level",
}
}
}
// Rspamd log file collection (logs to file inside mailserver, not stdout)
local.file_match "rspamd" {
path_targets = [{"__path__" = "/host/root/srv/mail/maillogs/rspamd.log"}]
}
loki.source.file "rspamd" {
targets = local.file_match.rspamd.targets
forward_to = [loki.process.rspamd.receiver]
}
loki.process "rspamd" {
forward_to = [loki.write.logs.receiver]
stage.static_labels {
values = {
container = "mailserver",
job = "rspamd",
}
}
}

View file

@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: 'Default'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
foldersFromFilesStructure: true

View file

@ -0,0 +1,17 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:{{ prometheus_port }}
isDefault: true
editable: false
jsonData:
timeInterval: 15s
- name: Loki
type: loki
access: proxy
url: http://loki:{{ loki_port }}
editable: false

View file

@ -0,0 +1,43 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
instance_addr: 127.0.0.1
kvstore:
store: inmemory
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
schema_config:
configs:
- from: 2020-10-24
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
limits_config:
retention_period: {{ loki_retention_days | default(prometheus_retention_days) }}d
reject_old_samples: true
reject_old_samples_max_age: 168h
compactor:
working_directory: /loki/compactor
compaction_interval: 10m

View file

@ -0,0 +1,20 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:{{ prometheus_port }}']
- job_name: 'alloy'
static_configs:
- targets: ['alloy:{{ alloy_port }}']
- job_name: 'caddy'
static_configs:
- targets: ['caddy:{{ caddy_metrics_port }}']
- job_name: 'forgejo'
static_configs:
- targets: ['forgejo:{{ forgejo_port }}']