From 75891c3271e3678baaa7effe9af15ad9d5bdd274 Mon Sep 17 00:00:00 2001 From: Matthias Johnson Date: Fri, 27 Feb 2026 15:09:25 -0700 Subject: [PATCH] initial commit Co-Authored-By: Claude Sonnet 4.6 --- .envrc | 22 + .gitignore | 37 + CLAUDE.md | 126 +++ README.md | 414 ++++++++ ansible.cfg | 12 + inventory/group_vars/all/config.yml.setup | 164 ++++ inventory/group_vars/all/dns.yml.setup | 128 +++ inventory/group_vars/all/vault.yml.setup | 55 ++ inventory/hosts.yml.example | 21 + playbooks/bootstrap.yml | 87 ++ playbooks/caddy.yml | 7 + playbooks/diun.yml | 8 + playbooks/dns.yml | 23 + playbooks/docker.yml | 8 + playbooks/fail2ban.yml | 8 + playbooks/forgejo.yml | 8 + playbooks/goaccess.yml | 8 + playbooks/mail.yml | 8 + playbooks/monitoring.yml | 8 + playbooks/nebula.yml | 8 + playbooks/provision.yml | 12 + playbooks/radicale.yml | 8 + playbooks/restic.yml | 8 + playbooks/site.yml | 46 + playbooks/tuwunel.yml | 8 + requirements.txt | 3 + requirements.yml | 4 + roles/caddy/defaults/main.yml | 14 + roles/caddy/handlers/main.yml | 8 + roles/caddy/tasks/main.yml | 104 ++ roles/caddy/templates/Caddyfile.j2 | 124 +++ roles/caddy/templates/compose.yml.j2 | 67 ++ roles/common/tasks/main.yml | 11 + roles/diun/defaults/main.yml | 6 + roles/diun/handlers/main.yml | 7 + roles/diun/tasks/main.yml | 29 + roles/diun/templates/compose.yml.j2 | 21 + roles/diun/templates/diun.yml.j2 | 30 + roles/dns/defaults/main.yml | 2 + roles/dns/tasks/main.yml | 25 + roles/docker/handlers/main.yml | 6 + roles/docker/tasks/main.yml | 76 ++ roles/docker_network/tasks/main.yml | 39 + roles/fail2ban/files/filter.d/caddy-auth.conf | 3 + .../files/filter.d/caddy-scanners.conf | 3 + .../files/filter.d/docker-mailserver.conf | 8 + .../fail2ban/files/filter.d/forgejo-auth.conf | 4 + roles/fail2ban/files/jail.d/caddy.conf | 16 + roles/fail2ban/files/jail.d/forgejo.conf | 8 + roles/fail2ban/files/jail.d/mailserver.conf | 9 + roles/fail2ban/handlers/main.yml | 5 + roles/fail2ban/tasks/main.yml | 36 + roles/fail2ban/templates/jail.local.j2 | 8 + roles/forgejo/defaults/main.yml | 26 + roles/forgejo/handlers/main.yml | 6 + roles/forgejo/tasks/main.yml | 128 +++ roles/forgejo/templates/app.ini.j2 | 71 ++ roles/forgejo/templates/compose.yml.j2 | 49 + roles/forgejo/templates/runner-config.yml.j2 | 14 + roles/goaccess/defaults/main.yml | 3 + roles/goaccess/handlers/main.yml | 4 + roles/goaccess/tasks/main.yml | 91 ++ .../templates/goaccess-report.service.j2 | 7 + .../goaccess/templates/goaccess-report.sh.j2 | 52 + .../templates/goaccess-report.timer.j2 | 9 + .../templates/goaccess-sync.service.j2 | 7 + roles/goaccess/templates/goaccess-sync.sh.j2 | 7 + .../goaccess/templates/goaccess-sync.timer.j2 | 9 + roles/goaccess/templates/goaccess.conf.j2 | 9 + roles/mail/defaults/main.yml | 6 + roles/mail/handlers/main.yml | 21 + roles/mail/tasks/aliases.yml | 19 + roles/mail/tasks/main.yml | 139 +++ roles/mail/tasks/rainloop.yml | 21 + roles/mail/tasks/users.yml | 26 + roles/mail/templates/compose.yml.j2 | 60 ++ roles/mail/templates/mailserver.env.j2 | 33 + roles/mail/templates/worker-controller.inc.j2 | 2 + roles/monitoring/defaults/main.yml | 21 + roles/monitoring/files/caddy-dashboard.json | 667 +++++++++++++ roles/monitoring/files/forgejo-dashboard.json | 703 +++++++++++++ .../files/mailserver-dashboard.json | 642 ++++++++++++ .../files/node-exporter-dashboard.json | 364 +++++++ .../files/service-overview-dashboard.json | 922 ++++++++++++++++++ .../files/system-metrics-dashboard.json | 553 +++++++++++ roles/monitoring/handlers/main.yml | 30 + roles/monitoring/tasks/main.yml | 127 +++ roles/monitoring/templates/compose.yml.j2 | 93 ++ roles/monitoring/templates/config.alloy.j2 | 163 ++++ roles/monitoring/templates/dashboards.yml.j2 | 13 + roles/monitoring/templates/datasources.yml.j2 | 17 + .../monitoring/templates/loki-config.yaml.j2 | 43 + roles/monitoring/templates/prometheus.yml.j2 | 20 + roles/nebula/defaults/main.yml | 3 + roles/nebula/handlers/main.yml | 4 + roles/nebula/tasks/main.yml | 69 ++ roles/nebula/templates/config.yml.j2 | 40 + roles/nebula/templates/nebula.service.j2 | 13 + roles/provision/defaults/main.yml | 4 + roles/provision/tasks/hetzner.yml | 40 + roles/provision/tasks/main.yml | 3 + roles/radicale/defaults/main.yml | 2 + roles/radicale/handlers/main.yml | 7 + roles/radicale/tasks/main.yml | 81 ++ roles/radicale/templates/compose.yml.j2 | 17 + roles/radicale/templates/config.j2 | 10 + roles/restic/defaults/main.yml | 29 + roles/restic/files/restic_backup | 1 + roles/restic/handlers/main.yml | 4 + roles/restic/tasks/backend.yml | 19 + roles/restic/tasks/backend_sftp.yml | 30 + roles/restic/tasks/backup.yml | 34 + roles/restic/tasks/config.yml | 24 + roles/restic/tasks/install.yml | 6 + roles/restic/tasks/main.yml | 15 + .../restic/templates/restic-backup.service.j2 | 9 + roles/restic/templates/restic-backup.sh.j2 | 59 ++ roles/restic/templates/restic-backup.timer.j2 | 10 + .../restic/templates/restic-prune.service.j2 | 9 + roles/restic/templates/restic-prune.sh.j2 | 56 ++ roles/restic/templates/restic-prune.timer.j2 | 10 + roles/restic/templates/restic-ssh-config.j2 | 4 + roles/restic/templates/restic.env.j2 | 7 + roles/tuwunel/defaults/main.yml | 7 + roles/tuwunel/handlers/main.yml | 6 + roles/tuwunel/tasks/main.yml | 30 + roles/tuwunel/templates/compose.yml.j2 | 16 + roles/tuwunel/templates/tuwunel.toml.j2 | 10 + setup.sh | 243 +++++ 129 files changed, 8046 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 ansible.cfg create mode 100644 inventory/group_vars/all/config.yml.setup create mode 100644 inventory/group_vars/all/dns.yml.setup create mode 100644 inventory/group_vars/all/vault.yml.setup create mode 100644 inventory/hosts.yml.example create mode 100644 playbooks/bootstrap.yml create mode 100644 playbooks/caddy.yml create mode 100644 playbooks/diun.yml create mode 100644 playbooks/dns.yml create mode 100644 playbooks/docker.yml create mode 100644 playbooks/fail2ban.yml create mode 100644 playbooks/forgejo.yml create mode 100644 playbooks/goaccess.yml create mode 100644 playbooks/mail.yml create mode 100644 playbooks/monitoring.yml create mode 100644 playbooks/nebula.yml create mode 100644 playbooks/provision.yml create mode 100644 playbooks/radicale.yml create mode 100644 playbooks/restic.yml create mode 100644 playbooks/site.yml create mode 100644 playbooks/tuwunel.yml create mode 100644 requirements.txt create mode 100644 requirements.yml create mode 100644 roles/caddy/defaults/main.yml create mode 100644 roles/caddy/handlers/main.yml create mode 100644 roles/caddy/tasks/main.yml create mode 100644 roles/caddy/templates/Caddyfile.j2 create mode 100644 roles/caddy/templates/compose.yml.j2 create mode 100644 roles/common/tasks/main.yml create mode 100644 roles/diun/defaults/main.yml create mode 100644 roles/diun/handlers/main.yml create mode 100644 roles/diun/tasks/main.yml create mode 100644 roles/diun/templates/compose.yml.j2 create mode 100644 roles/diun/templates/diun.yml.j2 create mode 100644 roles/dns/defaults/main.yml create mode 100644 roles/dns/tasks/main.yml create mode 100644 roles/docker/handlers/main.yml create mode 100644 roles/docker/tasks/main.yml create mode 100644 roles/docker_network/tasks/main.yml create mode 100644 roles/fail2ban/files/filter.d/caddy-auth.conf create mode 100644 roles/fail2ban/files/filter.d/caddy-scanners.conf create mode 100644 roles/fail2ban/files/filter.d/docker-mailserver.conf create mode 100644 roles/fail2ban/files/filter.d/forgejo-auth.conf create mode 100644 roles/fail2ban/files/jail.d/caddy.conf create mode 100644 roles/fail2ban/files/jail.d/forgejo.conf create mode 100644 roles/fail2ban/files/jail.d/mailserver.conf create mode 100644 roles/fail2ban/handlers/main.yml create mode 100644 roles/fail2ban/tasks/main.yml create mode 100644 roles/fail2ban/templates/jail.local.j2 create mode 100644 roles/forgejo/defaults/main.yml create mode 100644 roles/forgejo/handlers/main.yml create mode 100644 roles/forgejo/tasks/main.yml create mode 100644 roles/forgejo/templates/app.ini.j2 create mode 100644 roles/forgejo/templates/compose.yml.j2 create mode 100644 roles/forgejo/templates/runner-config.yml.j2 create mode 100644 roles/goaccess/defaults/main.yml create mode 100644 roles/goaccess/handlers/main.yml create mode 100644 roles/goaccess/tasks/main.yml create mode 100644 roles/goaccess/templates/goaccess-report.service.j2 create mode 100644 roles/goaccess/templates/goaccess-report.sh.j2 create mode 100644 roles/goaccess/templates/goaccess-report.timer.j2 create mode 100644 roles/goaccess/templates/goaccess-sync.service.j2 create mode 100644 roles/goaccess/templates/goaccess-sync.sh.j2 create mode 100644 roles/goaccess/templates/goaccess-sync.timer.j2 create mode 100644 roles/goaccess/templates/goaccess.conf.j2 create mode 100644 roles/mail/defaults/main.yml create mode 100644 roles/mail/handlers/main.yml create mode 100644 roles/mail/tasks/aliases.yml create mode 100644 roles/mail/tasks/main.yml create mode 100644 roles/mail/tasks/rainloop.yml create mode 100644 roles/mail/tasks/users.yml create mode 100644 roles/mail/templates/compose.yml.j2 create mode 100644 roles/mail/templates/mailserver.env.j2 create mode 100644 roles/mail/templates/worker-controller.inc.j2 create mode 100644 roles/monitoring/defaults/main.yml create mode 100644 roles/monitoring/files/caddy-dashboard.json create mode 100644 roles/monitoring/files/forgejo-dashboard.json create mode 100644 roles/monitoring/files/mailserver-dashboard.json create mode 100644 roles/monitoring/files/node-exporter-dashboard.json create mode 100644 roles/monitoring/files/service-overview-dashboard.json create mode 100644 roles/monitoring/files/system-metrics-dashboard.json create mode 100644 roles/monitoring/handlers/main.yml create mode 100644 roles/monitoring/tasks/main.yml create mode 100644 roles/monitoring/templates/compose.yml.j2 create mode 100644 roles/monitoring/templates/config.alloy.j2 create mode 100644 roles/monitoring/templates/dashboards.yml.j2 create mode 100644 roles/monitoring/templates/datasources.yml.j2 create mode 100644 roles/monitoring/templates/loki-config.yaml.j2 create mode 100644 roles/monitoring/templates/prometheus.yml.j2 create mode 100644 roles/nebula/defaults/main.yml create mode 100644 roles/nebula/handlers/main.yml create mode 100644 roles/nebula/tasks/main.yml create mode 100644 roles/nebula/templates/config.yml.j2 create mode 100644 roles/nebula/templates/nebula.service.j2 create mode 100644 roles/provision/defaults/main.yml create mode 100644 roles/provision/tasks/hetzner.yml create mode 100644 roles/provision/tasks/main.yml create mode 100644 roles/radicale/defaults/main.yml create mode 100644 roles/radicale/handlers/main.yml create mode 100644 roles/radicale/tasks/main.yml create mode 100644 roles/radicale/templates/compose.yml.j2 create mode 100644 roles/radicale/templates/config.j2 create mode 100644 roles/restic/defaults/main.yml create mode 120000 roles/restic/files/restic_backup create mode 100644 roles/restic/handlers/main.yml create mode 100644 roles/restic/tasks/backend.yml create mode 100644 roles/restic/tasks/backend_sftp.yml create mode 100644 roles/restic/tasks/backup.yml create mode 100644 roles/restic/tasks/config.yml create mode 100644 roles/restic/tasks/install.yml create mode 100644 roles/restic/tasks/main.yml create mode 100644 roles/restic/templates/restic-backup.service.j2 create mode 100644 roles/restic/templates/restic-backup.sh.j2 create mode 100644 roles/restic/templates/restic-backup.timer.j2 create mode 100644 roles/restic/templates/restic-prune.service.j2 create mode 100644 roles/restic/templates/restic-prune.sh.j2 create mode 100644 roles/restic/templates/restic-prune.timer.j2 create mode 100644 roles/restic/templates/restic-ssh-config.j2 create mode 100644 roles/restic/templates/restic.env.j2 create mode 100644 roles/tuwunel/defaults/main.yml create mode 100644 roles/tuwunel/handlers/main.yml create mode 100644 roles/tuwunel/tasks/main.yml create mode 100644 roles/tuwunel/templates/compose.yml.j2 create mode 100644 roles/tuwunel/templates/tuwunel.toml.j2 create mode 100755 setup.sh diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..8bf73c2 --- /dev/null +++ b/.envrc @@ -0,0 +1,22 @@ +# Stack selection — set LINDERHOF_STACK before sourcing, or create a .stack file +if [[ -z "${LINDERHOF_STACK:-}" ]]; then + if [[ -f "$PWD/.stack" ]]; then + LINDERHOF_STACK="$(cat "$PWD/.stack")" + fi +fi + +if [[ -z "${LINDERHOF_STACK:-}" ]]; then + echo "linderhof: LINDERHOF_STACK is not set" >&2 + echo " set it in your environment, or run: echo > .stack" >&2 +else + export LINDERHOF_STACK + export LINDERHOF_DIR="${XDG_CONFIG_HOME:-$HOME/.config}/linderhof/$LINDERHOF_STACK" + export ANSIBLE_INVENTORY="$LINDERHOF_DIR/hosts.yml" + export ANSIBLE_VAULT_PASSWORD_FILE="$LINDERHOF_DIR/vault-pass" + + # Per-stack overrides: DOCKER_HOST, etc. — written by setup.sh + if [[ -f "$LINDERHOF_DIR/stack.env" ]]; then + # shellcheck source=/dev/null + source "$LINDERHOF_DIR/stack.env" + fi +fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..18112ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +# Stack selection file +.stack + +# Generated config (lives in $XDG_CONFIG_HOME/linderhof//) +inventory/hosts.yml +inventory/group_vars/all/config.yml +inventory/group_vars/all/vault.yml +inventory/group_vars/all/dns.yml +inventory/group_vars/all/overrides.yml + +# Vault password file +.vault-pass +*vault-pass* + +# Ansible retry files +*.retry + +# Python +__pycache__/ +*.py[cod] +.venv/ +venv/ + +# Editor +*.swp +*.swo +*~ +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db + +# Temporary/backup files +*._s +*... diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..7c53f20 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,126 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Island is an Ansible-based self-hosting infrastructure stack that deploys email, web server, git hosting, Matrix homeserver, monitoring, and backup services using Docker Compose on Ubuntu servers. + +## Common Commands + +```bash +# Select a stack (one-time per clone) +echo > .stack && direnv allow +# or: export LINDERHOF_STACK= + +# Run all playbooks +ansible-playbook playbooks/site.yml + +# Run a specific playbook +ansible-playbook playbooks/mail.yml + +# Run specific tags only +ansible-playbook playbooks/site.yml --tags mail,monitoring + +# Edit encrypted secrets +ansible-vault edit $LINDERHOF_DIR/group_vars/all/vault.yml + +# Encrypt/decrypt vault +ansible-vault encrypt $LINDERHOF_DIR/group_vars/all/vault.yml +ansible-vault decrypt $LINDERHOF_DIR/group_vars/all/vault.yml +``` + +Note: Inventory and vault password are set via `ANSIBLE_INVENTORY` and `ANSIBLE_VAULT_PASSWORD_FILE` in `.envrc`, driven by `LINDERHOF_STACK`. No extra flags needed once the stack is selected. + +## Architecture + +**Deployment Pattern:** Each service is deployed to `/srv//` on the target host with a `compose.yml` and environment files. + +**Standalone Playbooks** (not in `site.yml`): +- `provision.yml` - Provision a cloud VM (Hetzner). Usage: `ansible-playbook playbooks/provision.yml` +- `dns.yml` - Manage DNS zones/records via Hetzner DNS API + +**Playbook Execution Order** (via `site.yml`): +1. bootstrap.yml - SSH, sudo, users, base packages (manual only) +2. docker.yml - Docker engine installation +3. docker_network.yml - Pre-create all Docker networks (must run before any service) +4. nebula.yml - Overlay network (Nebula) +5. caddy.yml - Web server / reverse proxy +6. mail.yml - Email (docker-mailserver + rainloop) +7. forgejo.yml - Git server +8. tuwunel.yml - Matrix homeserver (Tuwunel) +9. monitoring.yml - Prometheus, Grafana, Loki, Alloy +10. goaccess.yml - Web analytics +11. diun.yml - Docker image update notifications +12. restic.yml - Encrypted backups +13. fail2ban.yml - Intrusion prevention + +**Role Structure:** Each role in `roles/` contains: +- `tasks/main.yml` - Core provisioning tasks +- `templates/` - Jinja2 templates (compose.yml.j2, config files) +- `handlers/main.yml` - Service restart handlers +- `files/` - Static configuration files + +**Configuration** (lives outside the repo in `$XDG_CONFIG_HOME/linderhof//`): +- `$LINDERHOF_DIR/hosts.yml` - Host connection info only +- `$LINDERHOF_DIR/group_vars/all/config.yml` - All public configuration +- `$LINDERHOF_DIR/group_vars/all/vault.yml` - All secrets (encrypted) +- `$LINDERHOF_DIR/group_vars/all/dns.yml` - DNS zone definitions +- `$LINDERHOF_DIR/group_vars/all/overrides.yml` - Per-stack variable overrides (optional) +- `$LINDERHOF_DIR/stack.env` - Per-stack shell vars (DOCKER_HOST, etc.) +- `$LINDERHOF_DIR/vault-pass` - Vault encryption key (chmod 600) + +**Template files** (in the repo, used by `setup.sh`): +- `inventory/group_vars/all/config.yml.setup` - Config template +- `inventory/group_vars/all/vault.yml.setup` - Vault template +- `inventory/group_vars/all/dns.yml.setup` - DNS zones template + +**Overriding variables** without editing config.yml — create `overrides.yml`: +```yaml +# Example: override mail hostname during migration +mail_hostname: mail2.example.com +# Example: add extra static sites to Caddy +caddy_sites: + - example.com + - example2.com +``` + +**Service Toggles:** Set `enable_: false` in config.yml to disable: +- `enable_mail` +- `enable_forgejo` +- `enable_tuwunel` +- `enable_monitoring` +- `enable_restic` +- `enable_fail2ban` +- `enable_nebula` +- `enable_diun` + +**Docker Networks:** All networks are pre-created by the `docker_network` role before any service deploys. Services declare all networks as `external: true` in their `compose.yml.j2` — no service creates its own network. Networks are created conditionally based on `enable_*` flags: + +| Network | Created when | +|---------|-------------| +| `caddy` | always | +| `mail` | `enable_mail` | +| `webmail` | `enable_mail` | +| `git` | `enable_forgejo` | +| `monitoring` | `enable_monitoring` | +| `tuwunel` | `enable_tuwunel` | +| `radicale` | `enable_radicale` | + +Caddy's `compose.yml.j2` also conditionally declares network references using the same `enable_*` flags so it never references a network that wasn't created. + +**Adding a new service:** create the network in `docker_network/tasks/main.yml` with the appropriate `when:` condition, declare it `external: true` in the service compose template, and add it to caddy's compose template if caddy needs to reach it. + +## Available Tags + +- `bootstrap` - Initial server setup (use `--tags bootstrap`) +- `docker` - Docker installation +- `mail` - Mail server +- `forgejo` - Git server +- `tuwunel` - Matrix homeserver +- `monitoring` - Monitoring stack +- `restic` - Backup configuration +- `fail2ban` - Intrusion prevention +- `nebula` - Overlay network +- `diun` - Docker image update notifications +- `config` - Configuration-only updates diff --git a/README.md b/README.md new file mode 100644 index 0000000..c25809d --- /dev/null +++ b/README.md @@ -0,0 +1,414 @@ +# Linderhof + +> *Linderhof* — the smallest and most intimate of Ludwig II's Bavarian palaces, the only one he lived to see completed; built entirely to his own vision as a private retreat. ([Wikipedia](https://en.wikipedia.org/wiki/Linderhof_Palace)) + +a self-hosting stack based on ansible and docker compose that comes with + +- email + - [docker-mailserver](https://github.com/docker-mailserver/docker-mailserver) + - [rainloop](https://www.rainloop.net/) +- web server + - [caddy](https://caddyserver.com/) +- git server + - [forgejo](https://forgejo.org/) +- matrix homeserver + - [tuwunel](https://github.com/matrix-construct/tuwunel) +- monitoring + - [alloy](https://github.com/grafana/alloy) + - [grafana](https://grafana.com/) + - [prometheus](https://prometheus.io/) + - [loki](https://github.com/grafana/loki) +- web analytics + - [goaccess](https://goaccess.io/) +- backups + - [restic](https://github.com/restic/restic) +- overlay network + - [nebula](https://github.com/slackhq/nebula) +- docker image update notifications + - [diun](https://github.com/crazy-max/diun) +- intrusion prevention + - [fail2ban](https://github.com/fail2ban/fail2ban) + + +other features include: +- runs on opensource +- no databases / no external services + + +## setup + +### prerequisites + +- [direnv](https://direnv.net/) (optional, loads `.envrc` automatically) +- a [Hetzner Cloud](https://console.hetzner.cloud/) account with an API token (Read & Write) +- a [Hetzner Storage Box](https://www.hetzner.com/storage/storage-box/) (for restic backups, optional) + +install python dependencies and ansible collections: + +```bash +pip install -r requirements.txt +ansible-galaxy collection install -r requirements.yml +``` + +### quickstart + +```bash +./setup.sh +``` + +the setup script walks you through everything interactively: stack name, SSH key, vault password, server details, domain, and secrets. it writes all generated config outside the repo to `$XDG_CONFIG_HOME/linderhof//` and won't overwrite existing files. + +after setup, activate the stack and review the generated config: + +```bash +direnv allow # reads .stack file — or: export LINDERHOF_STACK= + +vi $LINDERHOF_DIR/group_vars/all/config.yml +ansible-vault edit $LINDERHOF_DIR/group_vars/all/vault.yml +``` + +then provision and deploy: + +```bash +ansible-playbook playbooks/provision.yml +ansible-playbook playbooks/dns.yml +ansible-playbook playbooks/site.yml +``` + +### multiple stacks + +each stack is an independent deployment with its own inventory, vault, and secrets. to create a second stack: + +```bash +./setup.sh # enter a different stack name when prompted +echo other-stack > .stack && direnv allow +``` + +switch between stacks by changing `LINDERHOF_STACK` or updating `.stack`: + +```bash +echo home > .stack && direnv allow +echo work > .stack && direnv allow +``` + +stack config lives at `$XDG_CONFIG_HOME/linderhof//`: + +``` +/ + hosts.yml # server connection info + vault-pass # vault encryption key (chmod 600) + stack.env # per-stack shell vars (DOCKER_HOST, etc.) + group_vars/ + all/ + config.yml # public ansible settings + vault.yml # encrypted secrets + dns.yml # DNS zone definitions + overrides.yml # optional: variable overrides +``` + +### overriding variables + +to override any variable without editing `config.yml`, create `overrides.yml` in the stack's `group_vars/all/`. ansible loads all files there automatically, so any key here wins over `config.yml`: + +```bash +vi $LINDERHOF_DIR/group_vars/all/overrides.yml +``` + +```yaml +# override mail hostname (e.g. during migration) +mail_hostname: mail2.example.com + +# add extra static sites to caddy +caddy_sites: + - example.com + - example2.com + +# add extra mail-hosted domains +mail_domains: + - example.com + - example2.com + - example3.com +``` + +## upstream documentation + +- [docker-mailserver](https://docker-mailserver.github.io/docker-mailserver/latest/) +- [rainloop](https://www.rainloop.net/docs/configuration/) +- [caddy](https://caddyserver.com/docs/) +- [forgejo](https://forgejo.org/docs/latest/) +- [tuwunel](https://github.com/matrix-construct/tuwunel) +- [alloy (Grafana Alloy)](https://grafana.com/docs/alloy/latest/) +- [grafana](https://grafana.com/docs/grafana/latest/) +- [prometheus](https://prometheus.io/docs/) +- [loki](https://grafana.com/docs/loki/latest/) +- [goaccess](https://goaccess.io/man) +- [restic](https://restic.readthedocs.io/) +- [nebula](https://nebula.defined.net/docs/) +- [diun](https://crazymax.dev/diun/) +- [fail2ban](https://fail2ban.readthedocs.io/) + +## secrets + +sensitive data like passwords and DKIM keys is stored in `$LINDERHOF_DIR/group_vars/all/vault.yml` and encrypted with ansible-vault. see the [setup](#setup) section for what goes in there. + +after first mail deployment, retrieve and add the DKIM public key: + +```bash +docker exec mailserver cat /tmp/docker-mailserver/rspamd/dkim//mail.pub +ansible-vault edit $LINDERHOF_DIR/group_vars/all/vault.yml +# add: dkim_keys: +# example.com: "v=DKIM1; k=rsa; p=..." +``` + +then uncomment the `mail._domainkey` record in `dns.yml` and re-run `ansible-playbook playbooks/dns.yml`. + +```bash +# edit secrets (decrypts in place, opens editor, re-encrypts on save) +ansible-vault edit $LINDERHOF_DIR/group_vars/all/vault.yml + +# decrypt for manual editing +ansible-vault decrypt $LINDERHOF_DIR/group_vars/all/vault.yml + +# re-encrypt after editing +ansible-vault encrypt $LINDERHOF_DIR/group_vars/all/vault.yml +``` + +## provisioning + +provision a new cloud VM (currently supports Hetzner): + +```bash +# provision with defaults (server_name and cloud_provider from config.yml) +ansible-playbook playbooks/provision.yml + +# override server name or type +ansible-playbook playbooks/provision.yml -e server_name=aspen -e hcloud_server_type=cpx21 +``` + +this registers your SSH key, creates the server, waits for SSH, and updates `$LINDERHOF_DIR/hosts.yml` with the new IP. after provisioning, update DNS and run the stack: + +```bash +ansible-playbook playbooks/dns.yml +ansible-playbook playbooks/site.yml --tags bootstrap +ansible-playbook playbooks/site.yml +``` + +## ansible playbooks + +Run everything: + +```bash +ansible-playbook playbooks/site.yml +``` + +Run playbooks individually for initial setup (in this order): + +```bash +# 1. Bootstrap the server (users, packages, ssh, etc.) +ansible-playbook playbooks/bootstrap.yml + +# 2. Install docker +ansible-playbook playbooks/docker.yml + +# 3. Set up nebula overlay network +ansible-playbook playbooks/nebula.yml + +# 4. Set up the web server +ansible-playbook playbooks/caddy.yml + +# 5. Set up the mail server +ansible-playbook playbooks/mail.yml + +# 6. Set up forgejo (git server) +ansible-playbook playbooks/forgejo.yml + +# 7. Set up tuwunel (matrix homeserver) +ansible-playbook playbooks/tuwunel.yml + +# 8. Set up monitoring (prometheus, grafana, loki, alloy) +ansible-playbook playbooks/monitoring.yml + +# 9. Set up goaccess (web analytics) +ansible-playbook playbooks/goaccess.yml + +# 10. Set up diun (docker image update notifier) +ansible-playbook playbooks/diun.yml + +# 11. Set up restic backups +ansible-playbook playbooks/restic.yml + +# 12. Set up fail2ban +ansible-playbook playbooks/fail2ban.yml +``` + +Run only specific tags: + +```bash +ansible-playbook playbooks/site.yml --tags mail,monitoring +``` + +## common operations + +Services are deployed to `/srv/`. Each has a `compose.yml` and can be managed with docker compose. + +### running docker compose commands + +```bash +# Always cd to the service directory first +cd /srv/mail && docker compose logs -f +cd /srv/caddy && docker compose restart +cd /srv/forgejo && docker compose ps +cd /srv/tuwunel && docker compose up -d +cd /srv/monitoring && docker compose up -d +``` + +### reloading caddy + +```bash +# Reload caddy configuration without downtime +cd /srv/caddy && docker compose exec caddy caddy reload --config /etc/caddy/Caddyfile +``` + +### managing email users + +```bash +# List all email accounts +docker exec mailserver setup email list + +# Add a new email account +docker exec mailserver setup email add user@domain.com password + +# Delete an email account +docker exec mailserver setup email del user@domain.com + +# Update password +docker exec mailserver setup email update user@domain.com newpassword +``` + +To add users via ansible, add them to `mail_users` in the vault and run: +```bash +ansible-playbook --tags users playbooks/mail.yml +``` + +### managing email aliases + +```bash +# List aliases +docker exec mailserver setup alias list + +# Add an alias +docker exec mailserver setup alias add alias@domain.com target@domain.com + +# Delete an alias +docker exec mailserver setup alias del alias@domain.com +``` + +### managing forgejo + +```bash +# Access the forgejo CLI +docker exec -it forgejo forgejo + +# List users +docker exec forgejo forgejo admin user list + +# Create a new user +docker exec forgejo forgejo admin user create --username myuser --password mypassword --email user@domain.com + +# Reset a user's password +docker exec forgejo forgejo admin user change-password --username myuser --password newpassword + +# Delete a user +docker exec forgejo forgejo admin user delete --username myuser +``` + +### managing tuwunel (matrix) + +```bash +# View tuwunel logs +cd /srv/tuwunel && docker compose logs -f + +# Restart tuwunel +cd /srv/tuwunel && docker compose restart + +# Check federation status +curl https://chat.example.com/_matrix/federation/v1/version + +# Check well-known delegation +curl https://example.com/.well-known/matrix/server +curl https://example.com/.well-known/matrix/client +``` + +### monitoring stack + +```bash +# Reload prometheus configuration +docker exec prometheus kill -HUP 1 + +# Restart alloy to pick up config changes +cd /srv/monitoring && docker compose restart alloy + +# Check prometheus targets +curl -s localhost:9090/api/v1/targets | jq '.data.activeTargets[] | {job: .labels.job, health: .health}' + +# Check alloy status +curl -s localhost:12345/-/ready +``` + +### viewing logs + +```bash +cd /srv/mail && docker compose logs -f mailserver +cd /srv/caddy && docker compose logs -f caddy +cd /srv/forgejo && docker compose logs -f forgejo +cd /srv/tuwunel && docker compose logs -f tuwunel +cd /srv/monitoring && docker compose logs -f grafana +cd /srv/monitoring && docker compose logs -f prometheus +cd /srv/monitoring && docker compose logs -f loki +cd /srv/monitoring && docker compose logs -f alloy +``` + +### managing nebula + +Nebula runs directly on the host (not in Docker). The CA key and certificates are stored in `/etc/nebula/`. + +```bash +# Sign a client certificate +ssh server +cd /etc/nebula +nebula-cert sign -name "laptop" -ip "192.168.100.2/24" +# Copy laptop.crt, laptop.key, and ca.crt to client device +``` + +On the client, install Nebula and create a config with `am_lighthouse: false` and a `static_host_map` pointing to the server's public IP: + +```yaml +static_host_map: + "192.168.100.1": ["YOUR_SERVER_PUBLIC_IP:4242"] + +lighthouse: + am_lighthouse: false + hosts: + - "192.168.100.1" +``` + +### dns management + +DNS records are managed via the Hetzner DNS API: + +```bash +ansible-playbook playbooks/dns.yml +``` + +### backups + +```bash +# Check backup status +docker exec restic restic snapshots + +# Run a manual backup +docker exec restic restic backup /data + +# Restore from backup +docker exec restic restic restore latest --target /restore +``` diff --git a/ansible.cfg b/ansible.cfg new file mode 100644 index 0000000..d209ff8 --- /dev/null +++ b/ansible.cfg @@ -0,0 +1,12 @@ +[defaults] +roles_path = roles +host_key_checking = False +interpreter_python = auto_silent + +# Inventory and vault password are set via ANSIBLE_INVENTORY and +# ANSIBLE_VAULT_PASSWORD_FILE in .envrc, driven by LINDERHOF_STACK. +# Run: echo > .stack then direnv allow + +[privilege_escalation] +become = True +become_method = sudo diff --git a/inventory/group_vars/all/config.yml.setup b/inventory/group_vars/all/config.yml.setup new file mode 100644 index 0000000..566a259 --- /dev/null +++ b/inventory/group_vars/all/config.yml.setup @@ -0,0 +1,164 @@ +--- +# ============================================================ +# Linderhof Configuration +# ============================================================ +# Generated by setup.sh — edit freely to match your needs. +# Secrets are stored separately in vault.yml. +# Tunable defaults live in each role's defaults/main.yml. +# +# To override any variable for this stack without editing this file, +# create $LINDERHOF_DIR/group_vars/all/overrides.yml, e.g.: +# mail_hostname: mail2.$domain +# caddy_sites: +# - $domain +# - example2.com +# ============================================================ + +# ============================================================ +# Services — set to false to disable +# ============================================================ +enable_mail: true +enable_forgejo: true +enable_monitoring: true +enable_restic: true +enable_fail2ban: true +enable_tuwunel: true +enable_nebula: true +enable_diun: true +enable_goaccess: true + +# ============================================================ +# System +# ============================================================ +domain: $domain +server_name: $server_name +server_ip: $server_ip +admin_user: $admin_user +admin_ssh_key: "{{ lookup('file', '$ssh_key_pub') }}" +timezone: UTC + +# ============================================================ +# Image versions (update when Diun notifies of new releases) +# ============================================================ +caddy_version: "2" +mailserver_version: "latest" +rainloop_version: "latest" +forgejo_version: "11" +prometheus_version: "latest" +alloy_version: "latest" +grafana_version: "latest" +loki_version: "latest" +diun_version: "latest" +tuwunel_version: "latest" +radicale_version: "latest" +nebula_version: "1.9.5" + +# ============================================================ +# Caddy (web server / reverse proxy) +# ============================================================ +# Static sites served as file servers — each gets /srv/caddy/sites// +# Override in overrides.yml to add more domains. +caddy_sites: + - $domain + +# Service subdomains — override individually in overrides.yml +webmail_domain: webmail.$domain +rspamd_domain: rspamd.$domain +grafana_domain: watch.$domain +goaccess_domain: stats.$domain +radicale_domain: cal.$domain + +# Service ports — defined here so caddy can reference them when run standalone +rainloop_port: 8888 +rspamd_port: 11334 +forgejo_port: 3000 +grafana_port: 3000 +tuwunel_port: 6167 +radicale_port: 5232 +caddy_metrics_port: 9000 + +# ============================================================ +# Mail (docker-mailserver + rainloop) +# ============================================================ +# Override mail_hostname in overrides.yml if migrating (e.g. mail2.$domain) +mail_hostname: mail.$domain + +mail_domains: + - $domain +# Add more domains this mail server should handle: +# mail_domains: +# - $domain +# - example2.com + +mail_users: + - address: $admin_user@$domain + password: "{{ mail_passwords['$admin_user@$domain'] }}" + - address: git@$domain + password: "{{ mail_passwords['git@$domain'] }}" + - address: notifications@$domain + password: "{{ mail_passwords['notifications@$domain'] }}" + +mail_aliases: + - from: root@$domain + to: $admin_user@$domain + - from: dmarc@$domain + to: $admin_user@$domain + - from: postmaster@$domain + to: $admin_user@$domain + - from: hostmaster@$domain + to: $admin_user@$domain + - from: webmaster@$domain + to: $admin_user@$domain + - from: abuse@$domain + to: $admin_user@$domain + +# ============================================================ +# Forgejo (git hosting) +# ============================================================ +forgejo_domain: code.$domain + +# ============================================================ +# Monitoring +# ============================================================ +grafana_root_url: "https://{{ grafana_domain }}" + +# ============================================================ +# Restic (encrypted backups) +# ============================================================ +restic_backend_type: "sftp" +# restic_host: "uXXXXXX.your-storagebox.de" +# restic_user: uXXXXXX +# restic_ssh_port: 23 +# restic_remote_path: "backups/$server_name" +# restic_ssh_key: "/root/.ssh/island_restic_backup" + +# ============================================================ +# GoAccess (web analytics) +# ============================================================ +goaccess_sites: + - $domain + - code.$domain + - watch.$domain + - webmail.$domain + - rspamd.$domain +goaccess_user: admin + +# ============================================================ +# Diun (Docker Image Update Notifier) +# ============================================================ +diun_notify_email: true +diun_email_user: notifications@$domain +## diun_email_password: defined in vault.yml +diun_email_to: $admin_user@$domain + +# ============================================================ +# Tuwunel (Matrix homeserver) +# ============================================================ +tuwunel_server_name: $domain +tuwunel_domain: chat.$domain + +# ============================================================ +# Nebula (overlay network) +# ============================================================ +nebula_subnet: "192.168.100.0/24" +nebula_lighthouse_ip: "192.168.100.1" diff --git a/inventory/group_vars/all/dns.yml.setup b/inventory/group_vars/all/dns.yml.setup new file mode 100644 index 0000000..43f0a9f --- /dev/null +++ b/inventory/group_vars/all/dns.yml.setup @@ -0,0 +1,128 @@ +--- +# ============================================================ +# Linderhof DNS Zones +# ============================================================ +# Generated by setup.sh — edit to match your DNS needs. +# This file is loaded automatically by Ansible as part of group_vars. +# +# After first mail deployment, retrieve DKIM keys with: +# docker exec mailserver cat /tmp/docker-mailserver/rspamd/dkim/$domain/mail.pub +# Add them to vault.yml and uncomment the mail._domainkey records below. +# ============================================================ + +dns_zones: + - zone: $domain + records: + # Root domain + - name: "@" + type: A + records: + - value: $server_ip + + - name: "@" + type: MX + records: + - value: "10 {{ mail_hostname }}." + + - name: "@" + type: TXT + records: + - value: "{{ 'v=spf1 mx -all' | hetzner.hcloud.txt_record }}" + + # Server A record + - name: $server_name + type: A + records: + - value: $server_ip + + - name: www + type: A + records: + - value: $server_ip + + # Mail subdomain A record (for the mail hostname itself) + - name: "{{ mail_hostname.split('.')[0] }}" + type: A + records: + - value: $server_ip + + # Service CNAMEs + - name: webmail + type: CNAME + records: + - value: $server_name.$domain. + + - name: code + type: CNAME + records: + - value: $server_name.$domain. + + - name: watch + type: CNAME + records: + - value: $server_name.$domain. + + - name: rspamd + type: CNAME + records: + - value: $server_name.$domain. + + - name: stats + type: CNAME + records: + - value: $server_name.$domain. + + - name: chat + type: CNAME + records: + - value: $server_name.$domain. + + - name: cal + type: CNAME + records: + - value: $server_name.$domain. + + # DMARC + - name: _dmarc + type: TXT + records: + - value: "{{ 'v=DMARC1; p=none; rua=mailto:dmarc@$domain' | hetzner.hcloud.txt_record }}" + + # DKIM — uncomment after first mail deployment and add key to vault.yml + # - name: mail._domainkey + # type: TXT + # records: + # - value: "{{ dkim_keys['$domain'] | hetzner.hcloud.txt_record }}" + +# Extra domains (additional mail-hosted domains) — add as needed: +# - zone: example2.com +# records: +# - name: "@" +# type: A +# records: +# - value: $server_ip +# +# - name: "@" +# type: MX +# records: +# - value: "10 {{ mail_hostname }}." +# +# - name: "@" +# type: TXT +# records: +# - value: "{{ 'v=spf1 mx -all' | hetzner.hcloud.txt_record }}" +# +# - name: www +# type: CNAME +# records: +# - value: example2.com. +# +# - name: _dmarc +# type: TXT +# records: +# - value: "{{ 'v=DMARC1; p=none; rua=mailto:dmarc@example2.com' | hetzner.hcloud.txt_record }}" +# +# # - name: mail._domainkey +# # type: TXT +# # records: +# # - value: "{{ dkim_keys['example2.com'] | hetzner.hcloud.txt_record }}" diff --git a/inventory/group_vars/all/vault.yml.setup b/inventory/group_vars/all/vault.yml.setup new file mode 100644 index 0000000..49e3784 --- /dev/null +++ b/inventory/group_vars/all/vault.yml.setup @@ -0,0 +1,55 @@ +--- +# ============================================================ +# Linderhof Secrets +# ============================================================ +# Generated by setup.sh +# Edit with: ansible-vault edit $LINDERHOF_DIR/group_vars/all/vault.yml +# ============================================================ + +# hetzner +hcloud_token: "$hcloud_token" + +# mail +# passwords generated with: openssl rand -base64 32 +mail_passwords: + $admin_user@$domain: "$admin_mail_password" + git@$domain: "$git_mail_password" + notifications@$domain: "$notifications_mail_password" +rspamd_web_password: "$rspamd_web_password" +rainloop_admin_password: "$rainloop_admin_password" + +# forgejo +# keys generated with: openssl rand -hex 32 +forgejo_secret_key: "$forgejo_secret_key" +forgejo_internal_token: "$forgejo_internal_token" +forgejo_jwt_secret: "$forgejo_jwt_secret" +forgejo_smtp_password: "$notifications_mail_password" + +# monitoring +# password generated with: openssl rand -base64 32 +grafana_admin_password: "$grafana_admin_password" + +# tuwunel +# token generated with: openssl rand -base64 32 +tuwunel_registration_token: "$tuwunel_registration_token" + +# goaccess +# password generated with: openssl rand -base64 32 +goaccess_password: "$goaccess_password" + +# diun (uses the notifications mail account) +diun_email_password: "$notifications_mail_password" + +# restic +# password generated with: openssl rand -base64 32 +restic_password: "$restic_password" + +# fail2ban (optional — IPs/CIDRs to whitelist) +# fail2ban_ignoreip: "your-home-ip/32" + +# DKIM public keys — one entry per domain +# Retrieve after first mail deployment: +# docker exec mailserver cat /tmp/docker-mailserver/rspamd/dkim/$domain/mail.pub +# Format: "v=DKIM1; k=rsa; p=" +dkim_keys: + $domain: "" diff --git a/inventory/hosts.yml.example b/inventory/hosts.yml.example new file mode 100644 index 0000000..1c30387 --- /dev/null +++ b/inventory/hosts.yml.example @@ -0,0 +1,21 @@ +--- +# ============================================================ +# Linderhof Inventory +# ============================================================ +# Copy this file to hosts.yml (gitignored) and fill in your values. +# For single-host deployments, just update the connection info. +# All configuration is in group_vars/all/config.yml +# +# For multi-host, add hosts and override variables per-host: +# hostname: +# ansible_host: 1.2.3.4 +# mail_hostname: mail.example.com # override +# ============================================================ + +all: + hosts: + my-server: + ansible_host: 1.2.3.4 + ansible_user: deploy + ansible_become: true + ansible_become_method: sudo diff --git a/playbooks/bootstrap.yml b/playbooks/bootstrap.yml new file mode 100644 index 0000000..75559fe --- /dev/null +++ b/playbooks/bootstrap.yml @@ -0,0 +1,87 @@ +--- +- name: Bootstrap Ubuntu server + hosts: all + become: true + + pre_tasks: + - name: Ensure apt cache is up to date + apt: + update_cache: true + cache_valid_time: 3600 + + tasks: + - name: Set timezone + timezone: + name: "{{ timezone }}" + + - name: Create admin user + user: + name: "{{ admin_user }}" + groups: sudo + shell: "{{ admin_shell }}" + append: true + create_home: true + + - name: Authorize SSH key for admin user + authorized_key: + user: "{{ admin_user }}" + key: "{{ admin_ssh_key }}" + + - name: Disable root SSH login + lineinfile: + path: /etc/ssh/sshd_config + regexp: '^PermitRootLogin' + line: 'PermitRootLogin no' + notify: restart ssh + + - name: Disable password authentication + lineinfile: + path: /etc/ssh/sshd_config + regexp: '^PasswordAuthentication' + line: 'PasswordAuthentication no' + notify: restart ssh + + - name: Install base packages + apt: + name: + - ca-certificates + - curl + - git + - tmux + - neovim + - ripgrep + - fd-find + - zsh + - ufw + - fail2ban + - rclone + - bat + - lsb-release + - rsync + state: present + + - name: Enable UFW + ufw: + state: enabled + policy: deny + + - name: Allow SSH + ufw: + rule: allow + port: 22 + proto: tcp + + - name: Enable fail2ban + systemd: + name: fail2ban + enabled: true + state: started + + handlers: + - name: restart ssh + service: + name: ssh + state: restarted + + roles: + - role: docker diff --git a/playbooks/caddy.yml b/playbooks/caddy.yml new file mode 100644 index 0000000..92d4c38 --- /dev/null +++ b/playbooks/caddy.yml @@ -0,0 +1,7 @@ +--- +- name: Deploy Caddy + hosts: all + become: true + + roles: + - caddy diff --git a/playbooks/diun.yml b/playbooks/diun.yml new file mode 100644 index 0000000..8c7317e --- /dev/null +++ b/playbooks/diun.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy Diun (Docker Image Update Notifier) + hosts: all + become: true + + roles: + - role: diun + when: enable_diun | default(true) diff --git a/playbooks/dns.yml b/playbooks/dns.yml new file mode 100644 index 0000000..e464379 --- /dev/null +++ b/playbooks/dns.yml @@ -0,0 +1,23 @@ +--- +# Manage DNS zones on Hetzner Cloud +# +# Zone definitions live in $LINDERHOF_DIR/group_vars/all/dns.yml +# (generated from inventory/group_vars/all/dns.yml.setup by setup.sh). +# +# To add DKIM keys after first mail deployment: +# docker exec mailserver cat /tmp/docker-mailserver/rspamd/dkim//mail.pub +# Then add to vault.yml: +# ansible-vault edit $LINDERHOF_DIR/group_vars/all/vault.yml +# dkim_keys: +# example.com: "v=DKIM1; k=rsa; p=..." +# And uncomment the mail._domainkey record in dns.yml. +# +# Usage: ansible-playbook playbooks/dns.yml +- name: Manage DNS zones on Hetzner Cloud + hosts: localhost + connection: local + gather_facts: false + + roles: + - role: dns + tags: dns diff --git a/playbooks/docker.yml b/playbooks/docker.yml new file mode 100644 index 0000000..4494567 --- /dev/null +++ b/playbooks/docker.yml @@ -0,0 +1,8 @@ +--- +- name: Install Docker and prepare filesystem + hosts: all + become: true + + roles: + - docker + - docker_network diff --git a/playbooks/fail2ban.yml b/playbooks/fail2ban.yml new file mode 100644 index 0000000..7936548 --- /dev/null +++ b/playbooks/fail2ban.yml @@ -0,0 +1,8 @@ +--- +- name: Setup fail2ban + hosts: all + become: true + + roles: + - role: fail2ban + when: enable_fail2ban | default(true) diff --git a/playbooks/forgejo.yml b/playbooks/forgejo.yml new file mode 100644 index 0000000..ae9dc98 --- /dev/null +++ b/playbooks/forgejo.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy Forgejo Git Server + hosts: all + become: true + + roles: + - role: forgejo + when: enable_forgejo | default(true) diff --git a/playbooks/goaccess.yml b/playbooks/goaccess.yml new file mode 100644 index 0000000..9167dd2 --- /dev/null +++ b/playbooks/goaccess.yml @@ -0,0 +1,8 @@ +--- +- name: Configure GoAccess Analytics + hosts: all + become: true + + roles: + - role: goaccess + when: enable_goaccess | default(true) diff --git a/playbooks/mail.yml b/playbooks/mail.yml new file mode 100644 index 0000000..af93cdd --- /dev/null +++ b/playbooks/mail.yml @@ -0,0 +1,8 @@ +--- +- name: Mail server setup + hosts: all + become: true + + roles: + - role: mail + when: enable_mail | default(true) diff --git a/playbooks/monitoring.yml b/playbooks/monitoring.yml new file mode 100644 index 0000000..9829d2c --- /dev/null +++ b/playbooks/monitoring.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy monitoring stack + hosts: all + become: true + + roles: + - role: monitoring + when: enable_monitoring | default(true) diff --git a/playbooks/nebula.yml b/playbooks/nebula.yml new file mode 100644 index 0000000..bba2b48 --- /dev/null +++ b/playbooks/nebula.yml @@ -0,0 +1,8 @@ +--- +- name: Setup Nebula overlay network + hosts: all + become: true + + roles: + - role: nebula + when: enable_nebula | default(false) diff --git a/playbooks/provision.yml b/playbooks/provision.yml new file mode 100644 index 0000000..b944fb4 --- /dev/null +++ b/playbooks/provision.yml @@ -0,0 +1,12 @@ +--- +# Provision a cloud VM +# Usage: ansible-playbook playbooks/provision.yml +# Override defaults: -e server_name=aspen -e hcloud_server_type=cpx21 +- name: Provision cloud server + hosts: localhost + connection: local + gather_facts: false + + roles: + - role: provision + tags: provision diff --git a/playbooks/radicale.yml b/playbooks/radicale.yml new file mode 100644 index 0000000..533fc63 --- /dev/null +++ b/playbooks/radicale.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy Radicale (CalDAV/CardDAV) + hosts: all + become: true + + roles: + - role: radicale + when: enable_radicale | default(true) diff --git a/playbooks/restic.yml b/playbooks/restic.yml new file mode 100644 index 0000000..785c0a2 --- /dev/null +++ b/playbooks/restic.yml @@ -0,0 +1,8 @@ +--- +- name: Configure Restic Backups + hosts: all + become: true + + roles: + - role: restic + when: enable_restic | default(true) diff --git a/playbooks/site.yml b/playbooks/site.yml new file mode 100644 index 0000000..d726b36 --- /dev/null +++ b/playbooks/site.yml @@ -0,0 +1,46 @@ +--- +# Master playbook - runs all components in order +# Usage: ansible-playbook playbooks/site.yml +# +# To run specific components, use tags: +# ansible-playbook playbooks/site.yml --tags mail +# ansible-playbook playbooks/site.yml --tags monitoring,restic + +- import_playbook: bootstrap.yml + tags: [bootstrap, never] # only runs when explicitly tagged + +- import_playbook: docker.yml + tags: [docker] + +- import_playbook: nebula.yml + tags: [nebula] + +- import_playbook: caddy.yml + tags: [caddy] + +- import_playbook: mail.yml + tags: [mail] + +- import_playbook: forgejo.yml + tags: [forgejo] + +- import_playbook: monitoring.yml + tags: [monitoring] + +- import_playbook: tuwunel.yml + tags: [tuwunel] + +- import_playbook: radicale.yml + tags: [radicale] + +- import_playbook: diun.yml + tags: [diun] + +- import_playbook: restic.yml + tags: [restic] + +- import_playbook: fail2ban.yml + tags: [fail2ban] + +- import_playbook: goaccess.yml + tags: [goaccess] diff --git a/playbooks/tuwunel.yml b/playbooks/tuwunel.yml new file mode 100644 index 0000000..ca84fae --- /dev/null +++ b/playbooks/tuwunel.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy Tuwunel Matrix Server + hosts: all + become: true + + roles: + - role: tuwunel + when: enable_tuwunel | default(true) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..20b2c01 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +ansible +passlib +bcrypt diff --git a/requirements.yml b/requirements.yml new file mode 100644 index 0000000..ca82f8a --- /dev/null +++ b/requirements.yml @@ -0,0 +1,4 @@ +--- +collections: + - name: hetzner.hcloud + version: ">=6.0.0" diff --git a/roles/caddy/defaults/main.yml b/roles/caddy/defaults/main.yml new file mode 100644 index 0000000..33450f3 --- /dev/null +++ b/roles/caddy/defaults/main.yml @@ -0,0 +1,14 @@ +--- +# Caddy metrics port (scraped by Prometheus) +caddy_metrics_port: 9000 + +# Static sites served by Caddy — override in config.yml +# Each entry gets /srv/caddy/sites// and www → apex redirect +caddy_sites: + - "{{ domain }}" + +# Service domain defaults — override individually in config.yml or overrides.yml +webmail_domain: "webmail.{{ domain }}" +rspamd_domain: "rspamd.{{ domain }}" +goaccess_domain: "stats.{{ domain }}" +grafana_domain: "watch.{{ domain }}" diff --git a/roles/caddy/handlers/main.yml b/roles/caddy/handlers/main.yml new file mode 100644 index 0000000..92735e6 --- /dev/null +++ b/roles/caddy/handlers/main.yml @@ -0,0 +1,8 @@ +--- +- name: Restart Caddy + # full restart (not caddy reload) avoids stale bind-mount inodes when Caddyfile changes on host + community.docker.docker_compose_v2: + project_src: /srv/caddy + state: present + build: never + diff --git a/roles/caddy/tasks/main.yml b/roles/caddy/tasks/main.yml new file mode 100644 index 0000000..db38c66 --- /dev/null +++ b/roles/caddy/tasks/main.yml @@ -0,0 +1,104 @@ +- name: Allow HTTP traffic + ufw: + rule: allow + port: 80 + proto: tcp + +- name: Allow HTTPS traffic + ufw: + rule: allow + port: 443 + proto: tcp + +- name: Allow HTTPS/QUIC (HTTP/3) traffic + ufw: + rule: allow + port: 443 + proto: udp + +- name: Create Caddy directories + file: + path: "/srv/caddy/{{ item }}" + state: directory + owner: root + group: docker + mode: "0755" + loop: + - "" + - data + - config + - sites + +- name: Create site roots + file: + path: "/srv/caddy/sites/{{ item }}" + state: directory + owner: root + group: docker + mode: "0775" # also allow members of the docker group to write + loop: "{{ caddy_sites }}" + +- name: Install Caddyfile + template: + src: Caddyfile.j2 + dest: /srv/caddy/Caddyfile + owner: root + group: docker + mode: "0644" + notify: Restart Caddy + tags: config + +- name: Check for cached goaccess hash + ansible.builtin.stat: + path: /srv/caddy/.goaccess_hash + register: _goaccess_hash_stat + when: enable_goaccess | default(true) + +- name: Read goaccess hash from cache + ansible.builtin.slurp: + src: /srv/caddy/.goaccess_hash + register: _goaccess_hash_file + when: enable_goaccess | default(true) and _goaccess_hash_stat.stat.exists + +- name: Set goaccess hash fact from cache + ansible.builtin.set_fact: + caddy_goaccess_hash_stdout: "{{ _goaccess_hash_file.content | b64decode | trim }}" + when: enable_goaccess | default(true) and _goaccess_hash_stat.stat.exists + +- name: Generate goaccess password hash + ansible.builtin.command: + argv: + - docker + - run + - --rm + - "caddy:{{ caddy_version }}" + - caddy + - hash-password + - --plaintext + - "{{ goaccess_password }}" + register: _goaccess_hash_result + changed_when: false + no_log: true + when: enable_goaccess | default(true) and not _goaccess_hash_stat.stat.exists + +- name: Cache goaccess hash + ansible.builtin.copy: + content: "{{ _goaccess_hash_result.stdout }}" + dest: /srv/caddy/.goaccess_hash + mode: "0600" + when: enable_goaccess | default(true) and not _goaccess_hash_stat.stat.exists + +- name: Set goaccess hash fact from generation + ansible.builtin.set_fact: + caddy_goaccess_hash_stdout: "{{ _goaccess_hash_result.stdout }}" + when: enable_goaccess | default(true) and not _goaccess_hash_stat.stat.exists + +- name: Deploy Caddy compose.yml + template: + src: compose.yml.j2 + dest: /srv/caddy/compose.yml + owner: root + group: docker + mode: "0644" + notify: Restart Caddy + tags: config diff --git a/roles/caddy/templates/Caddyfile.j2 b/roles/caddy/templates/Caddyfile.j2 new file mode 100644 index 0000000..ad7f861 --- /dev/null +++ b/roles/caddy/templates/Caddyfile.j2 @@ -0,0 +1,124 @@ +{ + email {{ admin_user }}@{{ domain }} + log { + output stdout + } + metrics { + per_host + } +} + +(access_log) { + log +} + +:{{ caddy_metrics_port }} { + metrics +} + +{% for site in caddy_sites %} +# Redirect www → apex +www.{{ site }} { + import access_log + redir https://{{ site }}{uri} permanent +} + +{{ site }} { + import access_log + root * /srv/sites/{{ site }} + encode zstd gzip + file_server + + header { + Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" + X-Content-Type-Options "nosniff" + X-Frame-Options "DENY" + Referrer-Policy "strict-origin-when-cross-origin" + } +{% if site == domain and enable_tuwunel | default(false) %} + + handle /.well-known/matrix/server { + header Content-Type application/json + respond `{"m.server": "{{ tuwunel_domain }}:443"}` + } + + handle /.well-known/matrix/client { + header Content-Type application/json + header Access-Control-Allow-Origin * + respond `{"m.homeserver": {"base_url": "https://{{ tuwunel_domain }}"}}` + } +{% endif %} +} + +{% endfor %} +{% if enable_mail | default(false) %} +{{ webmail_domain }} { + import access_log + reverse_proxy rainloop:{{ rainloop_port }} +} + +{{ rspamd_domain }} { + import access_log + reverse_proxy mailserver:{{ rspamd_port }} +} + +{% endif %} +{% if enable_forgejo | default(false) %} +{{ forgejo_domain }} { + import access_log + reverse_proxy forgejo:{{ forgejo_port }} + + header { + Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" + X-Content-Type-Options "nosniff" + X-Frame-Options "SAMEORIGIN" + Referrer-Policy "strict-origin-when-cross-origin" + } +} + +{% endif %} +{% if enable_monitoring | default(false) %} +{{ grafana_domain }} { + import access_log + reverse_proxy grafana:{{ grafana_port }} { + header_up Host {host} + header_up X-Real-IP {remote_host} + } +} + +{% endif %} +{% if enable_tuwunel | default(false) %} +{{ tuwunel_domain }} { + import access_log + reverse_proxy tuwunel:{{ tuwunel_port }} + + header { + Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" + X-Content-Type-Options "nosniff" + X-Frame-Options "DENY" + Referrer-Policy "strict-origin-when-cross-origin" + } +} + +{% endif %} +{% if enable_radicale | default(false) %} +{{ radicale_domain }} { + import access_log + + redir /.well-known/caldav / permanent + redir /.well-known/carddav / permanent + + reverse_proxy radicale:{{ radicale_port }} +} + +{% endif %} +{% if enable_goaccess | default(false) %} +{{ goaccess_domain }} { + import access_log + root * /srv/goaccess/reports + file_server browse + basic_auth { + {$GOACCESS_USER} {$GOACCESS_HASH} + } +} +{% endif %} diff --git a/roles/caddy/templates/compose.yml.j2 b/roles/caddy/templates/compose.yml.j2 new file mode 100644 index 0000000..9f37ab6 --- /dev/null +++ b/roles/caddy/templates/compose.yml.j2 @@ -0,0 +1,67 @@ +services: + caddy: + image: caddy:{{ caddy_version }} + container_name: caddy + restart: unless-stopped + ports: + - "80:80" + - "443:443" + - "443:443/udp" + healthcheck: + test: ["CMD-SHELL", "wget -q -O /dev/null http://localhost:{{ caddy_metrics_port }}/metrics || exit 1"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s + volumes: + - /srv/caddy/Caddyfile:/etc/caddy/Caddyfile:ro + - /srv/caddy/data:/data + - /srv/caddy/config:/config + - /srv/caddy/sites:/srv/sites:ro + - /srv/goaccess/reports:/srv/goaccess/reports:ro + environment: +{% if enable_goaccess | default(true) %} + GOACCESS_USER: "{{ goaccess_user }}" + GOACCESS_HASH: "{{ caddy_goaccess_hash_stdout | replace('$', '$$') }}" +{% endif %} + networks: + - caddy +{% if enable_mail | default(true) %} + - webmail +{% endif %} +{% if enable_forgejo | default(true) %} + - git +{% endif %} +{% if enable_monitoring | default(true) %} + - monitoring +{% endif %} +{% if enable_tuwunel | default(true) %} + - tuwunel +{% endif %} +{% if enable_radicale | default(false) %} + - radicale +{% endif %} + +networks: + caddy: + external: true +{% if enable_mail | default(true) %} + webmail: + external: true +{% endif %} +{% if enable_forgejo | default(true) %} + git: + external: true +{% endif %} +{% if enable_monitoring | default(true) %} + monitoring: + external: true +{% endif %} +{% if enable_tuwunel | default(true) %} + tuwunel: + external: true +{% endif %} +{% if enable_radicale | default(false) %} + radicale: + external: true +{% endif %} diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml new file mode 100644 index 0000000..f096c14 --- /dev/null +++ b/roles/common/tasks/main.yml @@ -0,0 +1,11 @@ +- name: Ensure base packages are installed + apt: + name: + - ca-certificates + - curl + - gnupg + - lsb-release + - rsync + state: present + update_cache: true + diff --git a/roles/diun/defaults/main.yml b/roles/diun/defaults/main.yml new file mode 100644 index 0000000..70cadce --- /dev/null +++ b/roles/diun/defaults/main.yml @@ -0,0 +1,6 @@ +--- +# Check schedule (cron format) +diun_schedule: "0 */6 * * *" + +# Notification toggles +diun_notify_matrix: false diff --git a/roles/diun/handlers/main.yml b/roles/diun/handlers/main.yml new file mode 100644 index 0000000..44917d5 --- /dev/null +++ b/roles/diun/handlers/main.yml @@ -0,0 +1,7 @@ +--- +- name: restart diun + community.docker.docker_compose_v2: + project_src: /srv/diun + state: present + recreate: always + build: never diff --git a/roles/diun/tasks/main.yml b/roles/diun/tasks/main.yml new file mode 100644 index 0000000..e66edd5 --- /dev/null +++ b/roles/diun/tasks/main.yml @@ -0,0 +1,29 @@ +--- +- name: Create diun directories + file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - /srv/diun + - /srv/diun/data + +- name: Create diun configuration + template: + src: diun.yml.j2 + dest: /srv/diun/data/diun.yml + mode: '0644' + notify: restart diun + +- name: Create compose file + template: + src: compose.yml.j2 + dest: /srv/diun/compose.yml + mode: '0644' + notify: restart diun + +- name: Deploy diun + community.docker.docker_compose_v2: + project_src: /srv/diun + state: present + build: never diff --git a/roles/diun/templates/compose.yml.j2 b/roles/diun/templates/compose.yml.j2 new file mode 100644 index 0000000..3d9489b --- /dev/null +++ b/roles/diun/templates/compose.yml.j2 @@ -0,0 +1,21 @@ +services: + diun: + image: crazymax/diun:{{ diun_version }} + container_name: diun + restart: unless-stopped + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - /srv/diun/data:/data + environment: + CONFIG: /data/diun.yml + TZ: {{ timezone | default('UTC') }} + LOG_LEVEL: info + LOG_JSON: "true" + labels: + diun.enable: "false" + networks: + - monitoring + +networks: + monitoring: + external: true diff --git a/roles/diun/templates/diun.yml.j2 b/roles/diun/templates/diun.yml.j2 new file mode 100644 index 0000000..be49c5f --- /dev/null +++ b/roles/diun/templates/diun.yml.j2 @@ -0,0 +1,30 @@ +watch: + schedule: "{{ diun_schedule | default('0 */6 * * *') }}" + firstCheckNotif: false + +providers: + docker: + watchByDefault: true + watchStopped: false + +{% if diun_notify_email | default(false) or diun_notify_matrix | default(false) %} +notif: +{% if diun_notify_email | default(false) %} + mail: + host: {{ mail_hostname }} + port: 587 + ssl: false + insecureSkipVerify: false + username: {{ diun_email_user }} + password: {{ diun_email_password }} + from: {{ diun_email_user }} + to: {{ diun_email_to }} +{% endif %} +{% if diun_notify_matrix | default(false) %} + matrix: + homeserverURL: https://{{ tuwunel_domain }} + user: {{ diun_matrix_user }} + password: {{ diun_matrix_password }} + roomID: {{ diun_matrix_room_id }} +{% endif %} +{% endif %} diff --git a/roles/dns/defaults/main.yml b/roles/dns/defaults/main.yml new file mode 100644 index 0000000..0db6919 --- /dev/null +++ b/roles/dns/defaults/main.yml @@ -0,0 +1,2 @@ +--- +dns_zones: [] diff --git a/roles/dns/tasks/main.yml b/roles/dns/tasks/main.yml new file mode 100644 index 0000000..93886ca --- /dev/null +++ b/roles/dns/tasks/main.yml @@ -0,0 +1,25 @@ +--- +- name: Ensure DNS zone exists + hetzner.hcloud.zone: + name: "{{ item.zone }}" + mode: primary + api_token: "{{ hcloud_token }}" + state: present + loop: "{{ dns_zones }}" + loop_control: + label: "{{ item.zone }}" + tags: dns + +- name: Manage DNS records + hetzner.hcloud.zone_rrset: + zone: "{{ item.0.zone }}" + name: "{{ item.1.name }}" + type: "{{ item.1.type }}" + ttl: "{{ item.1.ttl | default(300) }}" + records: "{{ item.1.records }}" + api_token: "{{ hcloud_token }}" + state: present + loop: "{{ dns_zones | subelements('records') }}" + loop_control: + label: "{{ item.0.zone }} {{ item.1.name }} {{ item.1.type }}" + tags: dns diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml new file mode 100644 index 0000000..bd7dbb7 --- /dev/null +++ b/roles/docker/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: restart docker + systemd: + name: docker + state: restarted + diff --git a/roles/docker/tasks/main.yml b/roles/docker/tasks/main.yml new file mode 100644 index 0000000..eb04fb1 --- /dev/null +++ b/roles/docker/tasks/main.yml @@ -0,0 +1,76 @@ +--- +- name: Install prerequisite packages + apt: + name: + - ca-certificates + - curl + - gnupg + - lsb-release + state: present + update_cache: true + +- name: Add Docker GPG key + apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + +- name: Add Docker apt repository + apt_repository: + repo: > + deb [arch=amd64] + https://download.docker.com/linux/ubuntu + {{ ansible_facts['distribution_release'] }} stable + state: present + filename: docker + +- name: Install Docker engine and compose plugin + apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + state: present + update_cache: true + +- name: Enable and start Docker + systemd: + name: docker + enabled: true + state: started + +- name: Ensure Docker config directory + file: + path: /etc/docker + state: directory + mode: '0755' + +- name: Configure Docker to use journald + copy: + dest: /etc/docker/daemon.json + content: | + { + "log-driver": "journald" + } + mode: '0644' + notify: restart docker + +- name: Add admin user to docker group + user: + name: "{{ admin_user }}" + groups: docker + append: true + +# ---- Filesystem layout ---- + +- name: Create base directory + file: + path: "{{ item }}" + state: directory + owner: root + group: root + mode: "0755" + loop: + - /srv + diff --git a/roles/docker_network/tasks/main.yml b/roles/docker_network/tasks/main.yml new file mode 100644 index 0000000..d1fe0eb --- /dev/null +++ b/roles/docker_network/tasks/main.yml @@ -0,0 +1,39 @@ +--- +# caddy is always deployed +- name: Ensure caddy Docker network exists + community.docker.docker_network: + name: caddy + state: present + +- name: Ensure mail Docker networks exist + community.docker.docker_network: + name: "{{ item }}" + state: present + loop: + - mail + - webmail + when: enable_mail | default(true) + +- name: Ensure monitoring Docker network exists + community.docker.docker_network: + name: monitoring + state: present + when: enable_monitoring | default(true) + +- name: Ensure git Docker network exists + community.docker.docker_network: + name: git + state: present + when: enable_forgejo | default(true) + +- name: Ensure tuwunel Docker network exists + community.docker.docker_network: + name: tuwunel + state: present + when: enable_tuwunel | default(true) + +- name: Ensure radicale Docker network exists + community.docker.docker_network: + name: radicale + state: present + when: enable_radicale | default(false) diff --git a/roles/fail2ban/files/filter.d/caddy-auth.conf b/roles/fail2ban/files/filter.d/caddy-auth.conf new file mode 100644 index 0000000..50058b4 --- /dev/null +++ b/roles/fail2ban/files/filter.d/caddy-auth.conf @@ -0,0 +1,3 @@ +[Definition] +# matches 401 responses using remote_ip extracted from X-Real-IP by Caddy in JSON access logs +failregex = .*"remote_ip":"".*"status":401.* diff --git a/roles/fail2ban/files/filter.d/caddy-scanners.conf b/roles/fail2ban/files/filter.d/caddy-scanners.conf new file mode 100644 index 0000000..ab58f6e --- /dev/null +++ b/roles/fail2ban/files/filter.d/caddy-scanners.conf @@ -0,0 +1,3 @@ +[Definition] +# admin.php is intentionally specific; broader /admin would match legitimate paths (e.g. /admin-api) +failregex = .*"remote_ip":"".*"uri":"\/(wp-admin|wp-login|phpmyadmin|xmlrpc|\.env|\.git|cgi-bin|admin\.php|setup\.php|eval-stdin).*".* diff --git a/roles/fail2ban/files/filter.d/docker-mailserver.conf b/roles/fail2ban/files/filter.d/docker-mailserver.conf new file mode 100644 index 0000000..a62d042 --- /dev/null +++ b/roles/fail2ban/files/filter.d/docker-mailserver.conf @@ -0,0 +1,8 @@ +[Definition] +# Postfix SASL auth failures: warning: unknown[IP]: SASL ... authentication failed +failregex = ^.*warning: .*\[\]: SASL .* authentication failed.*$ +# Dovecot auth failures: auth failed ... rip=IP + ^.*dovecot: (?:imap|pop3)-login: .*\(auth failed.*rip=,.*$ + +ignoreregex = + diff --git a/roles/fail2ban/files/filter.d/forgejo-auth.conf b/roles/fail2ban/files/filter.d/forgejo-auth.conf new file mode 100644 index 0000000..450200c --- /dev/null +++ b/roles/fail2ban/files/filter.d/forgejo-auth.conf @@ -0,0 +1,4 @@ +[Definition] +# Matches both web login failures and SSH auth failures +failregex = ^.*Failed authentication attempt from (:\d+)?$ + ^.*Failed login for user '[^']*' from $ diff --git a/roles/fail2ban/files/jail.d/caddy.conf b/roles/fail2ban/files/jail.d/caddy.conf new file mode 100644 index 0000000..412d730 --- /dev/null +++ b/roles/fail2ban/files/jail.d/caddy.conf @@ -0,0 +1,16 @@ +[caddy-scanners] +enabled = true +journalmatch = CONTAINER_NAME=caddy +filter = caddy-scanners +maxretry = 3 +findtime = 10m +bantime = 24h + +# high maxretry/short bantime: Grafana auth can be slow; strict limits cause false positives +[caddy-auth] +enabled = true +journalmatch = CONTAINER_NAME=caddy +filter = caddy-auth +maxretry = 40 +findtime = 10m +bantime = 1h diff --git a/roles/fail2ban/files/jail.d/forgejo.conf b/roles/fail2ban/files/jail.d/forgejo.conf new file mode 100644 index 0000000..fc30ba3 --- /dev/null +++ b/roles/fail2ban/files/jail.d/forgejo.conf @@ -0,0 +1,8 @@ +[forgejo] +enabled = true +backend = systemd +journalmatch = CONTAINER_NAME=forgejo +filter = forgejo-auth +maxretry = 5 +findtime = 10m +bantime = 24h diff --git a/roles/fail2ban/files/jail.d/mailserver.conf b/roles/fail2ban/files/jail.d/mailserver.conf new file mode 100644 index 0000000..c9fd88e --- /dev/null +++ b/roles/fail2ban/files/jail.d/mailserver.conf @@ -0,0 +1,9 @@ +[mailserver] +enabled = true +backend = systemd +journalmatch = CONTAINER_NAME=mailserver +filter = docker-mailserver +maxretry = 5 +findtime = 10m +bantime = 24h + diff --git a/roles/fail2ban/handlers/main.yml b/roles/fail2ban/handlers/main.yml new file mode 100644 index 0000000..d2a1f09 --- /dev/null +++ b/roles/fail2ban/handlers/main.yml @@ -0,0 +1,5 @@ +- name: Reload fail2ban + service: + name: fail2ban + state: reloaded + diff --git a/roles/fail2ban/tasks/main.yml b/roles/fail2ban/tasks/main.yml new file mode 100644 index 0000000..eee981c --- /dev/null +++ b/roles/fail2ban/tasks/main.yml @@ -0,0 +1,36 @@ +- name: Ensure fail2ban directories exist + file: + path: "/etc/fail2ban/{{ item }}" + state: directory + mode: '0755' + loop: + - "" + - jail.d + - filter.d + +- name: Remove obsolete grafana fail2ban configs + file: + path: "/etc/fail2ban/{{ item }}" + state: absent + loop: + - jail.d/grafana.conf + - filter.d/grafana-auth.conf + notify: Reload fail2ban + +- name: Deploy fail2ban jail.local + template: + src: jail.local.j2 + dest: /etc/fail2ban/jail.local + mode: '0644' + notify: Reload fail2ban + +- name: Copy fail2ban jail and filter configs + copy: + src: "{{ item }}" + dest: "/etc/fail2ban/{{ item | regex_replace('^.*/files/', '') }}" + mode: '0644' + with_fileglob: + - "{{ role_path }}/files/jail.d/*" + - "{{ role_path }}/files/filter.d/*" + notify: Reload fail2ban + diff --git a/roles/fail2ban/templates/jail.local.j2 b/roles/fail2ban/templates/jail.local.j2 new file mode 100644 index 0000000..d4c42b8 --- /dev/null +++ b/roles/fail2ban/templates/jail.local.j2 @@ -0,0 +1,8 @@ +[DEFAULT] +backend = systemd +bantime = 24h +findtime = 10m +maxretry = 5 +{% if fail2ban_ignoreip | default([]) | length > 0 %} +ignoreip = 127.0.0.1/8 ::1 {{ fail2ban_ignoreip | default([]) | join(' ') }} +{% endif %} diff --git a/roles/forgejo/defaults/main.yml b/roles/forgejo/defaults/main.yml new file mode 100644 index 0000000..3eeaad0 --- /dev/null +++ b/roles/forgejo/defaults/main.yml @@ -0,0 +1,26 @@ +--- +# Display name shown in the UI, emails, and page title +forgejo_app_name: "Forgejo" + +# Ports (internal to docker network) +forgejo_port: 3000 +forgejo_ssh_port: 2222 + +# Registration and access +forgejo_disable_registration: true +forgejo_require_signin: false + +# Timezone for the Forgejo UI — defaults to the system timezone +forgejo_timezone: "{{ timezone | default('UTC') }}" + +# Email notifications (set to true and configure smtp vars to enable) +forgejo_mailer_enabled: false +# forgejo_smtp_host: mail.example.com +# forgejo_smtp_port: 587 +# forgejo_smtp_user: notifications@example.com +# forgejo_mailer_from: "Forgejo " +# forgejo_smtp_password: defined in vault.yml + +# Actions runner +forgejo_runner_name: default-runner +forgejo_runner_labels: "docker:docker://node:20-bookworm,ubuntu-latest:docker://ubuntu:latest,ubuntu-22.04:docker://ubuntu:22.04" diff --git a/roles/forgejo/handlers/main.yml b/roles/forgejo/handlers/main.yml new file mode 100644 index 0000000..0a55e6e --- /dev/null +++ b/roles/forgejo/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart forgejo + community.docker.docker_compose_v2: + project_src: /srv/forgejo + state: restarted + build: never diff --git a/roles/forgejo/tasks/main.yml b/roles/forgejo/tasks/main.yml new file mode 100644 index 0000000..38d9177 --- /dev/null +++ b/roles/forgejo/tasks/main.yml @@ -0,0 +1,128 @@ +--- +- name: Allow Forgejo SSH traffic + ufw: + rule: allow + port: "{{ forgejo_ssh_port }}" + proto: tcp + +- name: Create Forgejo directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - /srv/forgejo + +- name: Create Forgejo data directory + ansible.builtin.file: + path: /srv/forgejo/data + state: directory + mode: '0755' + +# stat+chown: avoids UID/GID lookup warnings for container-internal UIDs not present on host +- name: Stat Forgejo data directory + ansible.builtin.stat: + path: /srv/forgejo/data + register: forgejo_data_stat + +- name: Set Forgejo data directory ownership + ansible.builtin.command: chown 1000:1000 /srv/forgejo/data + when: forgejo_data_stat.stat.uid != 1000 or forgejo_data_stat.stat.gid != 1000 + +- name: Create runner data directory + ansible.builtin.file: + path: /srv/forgejo/runner + state: directory + mode: '0755' + when: enable_forgejo_runner | default(true) + +# stat+chown: avoids UID/GID lookup warnings for container-internal UIDs not present on host +- name: Stat runner data directory + ansible.builtin.stat: + path: /srv/forgejo/runner + register: forgejo_runner_stat + when: enable_forgejo_runner | default(true) + +- name: Set runner data directory ownership + ansible.builtin.command: chown 1000:1000 /srv/forgejo/runner + when: (enable_forgejo_runner | default(true)) and (forgejo_runner_stat.stat.uid != 1000 or forgejo_runner_stat.stat.gid != 1000) + +- name: Deploy Forgejo docker-compose file + ansible.builtin.template: + src: compose.yml.j2 + dest: /srv/forgejo/compose.yml + mode: '0644' + notify: Restart forgejo + +- name: Deploy Forgejo app.ini configuration + ansible.builtin.template: + src: app.ini.j2 + dest: /srv/forgejo/data/gitea/conf/app.ini + mode: '0644' + notify: Restart forgejo + +- name: Start Forgejo server + community.docker.docker_compose_v2: + project_src: /srv/forgejo + services: + - forgejo + state: present + build: never + register: forgejo_output + +- name: Wait for Forgejo to be ready + ansible.builtin.uri: + url: "http://localhost:{{ forgejo_port }}" + status_code: 200 + retries: 30 + delay: 2 + when: forgejo_output.changed + +# Runner registration (one-time) +- name: Check if runner is already registered + ansible.builtin.stat: + path: /srv/forgejo/runner/.runner + register: runner_file + when: enable_forgejo_runner | default(true) + +- name: Generate runner registration token + community.docker.docker_container_exec: + container: forgejo + command: forgejo forgejo-cli actions generate-runner-token + user: git + register: runner_token + when: + - enable_forgejo_runner | default(true) + - not runner_file.stat.exists + +- name: Deploy runner config + ansible.builtin.template: + src: runner-config.yml.j2 + dest: /srv/forgejo/runner/config.yml + mode: '0644' + when: enable_forgejo_runner | default(true) + notify: Restart forgejo + +- name: Register Forgejo runner + ansible.builtin.command: + cmd: >- + docker run --rm + --network git + -v /srv/forgejo/runner:/data + code.forgejo.org/forgejo/runner:{{ forgejo_runner_version }} + forgejo-runner register --no-interactive + --instance http://forgejo:3000 + --token {{ runner_token.stdout | trim }} + --name {{ forgejo_runner_name }} + --labels {{ forgejo_runner_labels }} + when: + - enable_forgejo_runner | default(true) + - not runner_file.stat.exists + notify: Restart forgejo + +- name: Start all Forgejo services + community.docker.docker_compose_v2: + project_src: /srv/forgejo + state: present + build: never + when: enable_forgejo_runner | default(true) diff --git a/roles/forgejo/templates/app.ini.j2 b/roles/forgejo/templates/app.ini.j2 new file mode 100644 index 0000000..915ab61 --- /dev/null +++ b/roles/forgejo/templates/app.ini.j2 @@ -0,0 +1,71 @@ +APP_NAME = {{ forgejo_app_name }} +RUN_MODE = prod +WORK_PATH = /data/gitea + +[server] +DOMAIN = {{ forgejo_domain }} +ROOT_URL = https://{{ forgejo_domain }}/ +HTTP_PORT = 3000 +SSH_DOMAIN = {{ forgejo_domain }} +SSH_PORT = {{ forgejo_ssh_port }} +START_SSH_SERVER = true + +[database] +DB_TYPE = sqlite3 +PATH = /data/gitea/gitea.db + +[repository] +ROOT = /data/git/repositories + +[log] +MODE = console +LEVEL = Info + +[security] +INSTALL_LOCK = true +SECRET_KEY = {{ forgejo_secret_key }} +INTERNAL_TOKEN = {{ forgejo_internal_token }} + +[service] +DISABLE_REGISTRATION = {{ forgejo_disable_registration }} +REQUIRE_SIGNIN_VIEW = {{ forgejo_require_signin }} +DEFAULT_KEEP_EMAIL_PRIVATE = true + +[mailer] +ENABLED = {{ forgejo_mailer_enabled }} +{% if forgejo_mailer_enabled %} +FROM = {{ forgejo_mailer_from }} +PROTOCOL = smtp +SMTP_ADDR = {{ forgejo_smtp_host }} +SMTP_PORT = {{ forgejo_smtp_port }} +USER = {{ forgejo_smtp_user }} +PASSWD = {{ forgejo_smtp_password }} +{% endif %} + +[session] +PROVIDER = file + +[picture] +DISABLE_GRAVATAR = false +ENABLE_FEDERATED_AVATAR = true + +[openid] +ENABLE_OPENID_SIGNIN = false +ENABLE_OPENID_SIGNUP = false + +[oauth2] +JWT_SECRET = {{ forgejo_jwt_secret }} + +[attachment] +ENABLED = true +MAX_SIZE = 50 + +[time] +DEFAULT_UI_LOCATION = {{ forgejo_timezone }} + +[metrics] +ENABLED = true + +[actions] +ENABLED = true +DEFAULT_ACTIONS_URL = https://code.forgejo.org diff --git a/roles/forgejo/templates/compose.yml.j2 b/roles/forgejo/templates/compose.yml.j2 new file mode 100644 index 0000000..5af5104 --- /dev/null +++ b/roles/forgejo/templates/compose.yml.j2 @@ -0,0 +1,49 @@ +services: + forgejo: + image: codeberg.org/forgejo/forgejo:{{ forgejo_version }} + container_name: forgejo + restart: unless-stopped + environment: + - USER_UID=1000 + - USER_GID=1000 + volumes: + - /srv/forgejo/data:/data + - /etc/timezone:/etc/timezone:ro + - /etc/localtime:/etc/localtime:ro + ports: + - "{{ forgejo_port }}:3000" + - "{{ forgejo_ssh_port }}:2222" + healthcheck: + test: ["CMD-SHELL", "wget -q -O /dev/null http://localhost:3000/api/v1/version || exit 1"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 60s + networks: + - git + - monitoring + +{% if enable_forgejo_runner | default(true) %} + runner: + image: code.forgejo.org/forgejo/runner:{{ forgejo_runner_version }} + container_name: forgejo-runner + restart: unless-stopped + user: "0:0" + depends_on: + forgejo: + condition: service_healthy + volumes: + - /srv/forgejo/runner:/data + - /var/run/docker.sock:/var/run/docker.sock + environment: + DOCKER_HOST: unix:///var/run/docker.sock + command: forgejo-runner daemon --config /data/config.yml + networks: + - git +{% endif %} + +networks: + git: + external: true + monitoring: + external: true diff --git a/roles/forgejo/templates/runner-config.yml.j2 b/roles/forgejo/templates/runner-config.yml.j2 new file mode 100644 index 0000000..8af786b --- /dev/null +++ b/roles/forgejo/templates/runner-config.yml.j2 @@ -0,0 +1,14 @@ +log: + level: info + +runner: + file: .runner + capacity: 1 + timeout: 3h + +container: + # job containers must be on this network to resolve the forgejo hostname for git operations + network: "git" + privileged: false + valid_volumes: + - '**' diff --git a/roles/goaccess/defaults/main.yml b/roles/goaccess/defaults/main.yml new file mode 100644 index 0000000..044f1b3 --- /dev/null +++ b/roles/goaccess/defaults/main.yml @@ -0,0 +1,3 @@ +--- +# Time to sync access logs and regenerate reports (daily) +goaccess_sync_time: "05:00:00" diff --git a/roles/goaccess/handlers/main.yml b/roles/goaccess/handlers/main.yml new file mode 100644 index 0000000..c28484f --- /dev/null +++ b/roles/goaccess/handlers/main.yml @@ -0,0 +1,4 @@ +--- +- name: Reload systemd + ansible.builtin.systemd: + daemon_reload: true diff --git a/roles/goaccess/tasks/main.yml b/roles/goaccess/tasks/main.yml new file mode 100644 index 0000000..5470b44 --- /dev/null +++ b/roles/goaccess/tasks/main.yml @@ -0,0 +1,91 @@ +--- +- name: Install GoAccess and jq + ansible.builtin.apt: + name: + - goaccess + - jq + state: present + +- name: Create GoAccess directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: root + group: root + mode: "0755" + loop: + - /srv/goaccess + - /srv/goaccess/data + - /srv/goaccess/reports + +- name: Deploy GoAccess config + ansible.builtin.template: + src: goaccess.conf.j2 + dest: /srv/goaccess/goaccess.conf + owner: root + group: root + mode: "0644" + +- name: Deploy report generation script + ansible.builtin.template: + src: goaccess-report.sh.j2 + dest: /usr/local/bin/goaccess-report + owner: root + group: root + mode: "0755" + +- name: Deploy report generation systemd service + ansible.builtin.template: + src: goaccess-report.service.j2 + dest: /etc/systemd/system/goaccess-report.service + owner: root + group: root + mode: "0644" + notify: Reload systemd + +- name: Deploy report generation systemd timer + ansible.builtin.template: + src: goaccess-report.timer.j2 + dest: /etc/systemd/system/goaccess-report.timer + owner: root + group: root + mode: "0644" + notify: Reload systemd + +- name: Deploy sync script + ansible.builtin.template: + src: goaccess-sync.sh.j2 + dest: /usr/local/bin/goaccess-sync + owner: root + group: root + mode: "0755" + +- name: Deploy sync systemd service + ansible.builtin.template: + src: goaccess-sync.service.j2 + dest: /etc/systemd/system/goaccess-sync.service + owner: root + group: root + mode: "0644" + notify: Reload systemd + +- name: Deploy sync systemd timer + ansible.builtin.template: + src: goaccess-sync.timer.j2 + dest: /etc/systemd/system/goaccess-sync.timer + owner: root + group: root + mode: "0644" + notify: Reload systemd + +- name: Flush handlers to reload systemd + ansible.builtin.meta: flush_handlers + +- name: Enable and start GoAccess timers + ansible.builtin.systemd: + name: "{{ item }}" + enabled: true + state: started + loop: + - goaccess-report.timer + - goaccess-sync.timer diff --git a/roles/goaccess/templates/goaccess-report.service.j2 b/roles/goaccess/templates/goaccess-report.service.j2 new file mode 100644 index 0000000..da4c1b3 --- /dev/null +++ b/roles/goaccess/templates/goaccess-report.service.j2 @@ -0,0 +1,7 @@ +[Unit] +Description=GoAccess Report Generation +After=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/goaccess-report diff --git a/roles/goaccess/templates/goaccess-report.sh.j2 b/roles/goaccess/templates/goaccess-report.sh.j2 new file mode 100644 index 0000000..b23e64c --- /dev/null +++ b/roles/goaccess/templates/goaccess-report.sh.j2 @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPORTS_DIR="/srv/goaccess/reports" +DATA_DIR="/srv/goaccess/data" +CONF="/srv/goaccess/goaccess.conf" + +SITES=( +{% for site in goaccess_sites %} + "{{ site }}" +{% endfor %} +) + +# Fetch logs once from journald +LOGS=$(journalctl CONTAINER_NAME=caddy --since "2 hours ago" --output=cat 2>/dev/null || true) + +# Skip if no logs +if [[ -z "$LOGS" ]]; then + echo "No Caddy logs found, skipping." + exit 0 +fi + +# Generate per-site reports +for site in "${SITES[@]}"; do + db_path="${DATA_DIR}/${site}" + mkdir -p "$db_path" + + echo "$LOGS" \ + | jq -c "select(.request.host == \"${site}\")" 2>/dev/null \ + | goaccess \ + --log-format=CADDY \ + --persist \ + --restore \ + --db-path="$db_path" \ + -o "${REPORTS_DIR}/${site}.html" \ + - || echo "Warning: GoAccess failed for ${site}" +done + +# Generate combined "all sites" report +all_db="${DATA_DIR}/all" +mkdir -p "$all_db" + +echo "$LOGS" \ + | goaccess \ + --log-format=CADDY \ + --persist \ + --restore \ + --db-path="$all_db" \ + -o "${REPORTS_DIR}/index.html" \ + - || echo "Warning: GoAccess failed for combined report" + +echo "Reports generated at $(date -Iseconds)" diff --git a/roles/goaccess/templates/goaccess-report.timer.j2 b/roles/goaccess/templates/goaccess-report.timer.j2 new file mode 100644 index 0000000..b6cad8f --- /dev/null +++ b/roles/goaccess/templates/goaccess-report.timer.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=GoAccess Report Generation + +[Timer] +OnCalendar=*-*-* *:00:00 +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/roles/goaccess/templates/goaccess-sync.service.j2 b/roles/goaccess/templates/goaccess-sync.service.j2 new file mode 100644 index 0000000..d38857a --- /dev/null +++ b/roles/goaccess/templates/goaccess-sync.service.j2 @@ -0,0 +1,7 @@ +[Unit] +Description=GoAccess Report Sync to Storage Box +After=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/goaccess-sync diff --git a/roles/goaccess/templates/goaccess-sync.sh.j2 b/roles/goaccess/templates/goaccess-sync.sh.j2 new file mode 100644 index 0000000..5193a80 --- /dev/null +++ b/roles/goaccess/templates/goaccess-sync.sh.j2 @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail + +rsync -az --delete \ + -e "ssh -i {{ restic_ssh_key }} -p {{ restic_ssh_port }} -o StrictHostKeyChecking=no -o BatchMode=yes" \ + /srv/goaccess/reports/ \ + {{ restic_user }}@{{ restic_host }}:analytics/ diff --git a/roles/goaccess/templates/goaccess-sync.timer.j2 b/roles/goaccess/templates/goaccess-sync.timer.j2 new file mode 100644 index 0000000..0c07d19 --- /dev/null +++ b/roles/goaccess/templates/goaccess-sync.timer.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=GoAccess Report Sync to Storage Box + +[Timer] +OnCalendar=*-*-* {{ goaccess_sync_time }} +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/roles/goaccess/templates/goaccess.conf.j2 b/roles/goaccess/templates/goaccess.conf.j2 new file mode 100644 index 0000000..8dc6446 --- /dev/null +++ b/roles/goaccess/templates/goaccess.conf.j2 @@ -0,0 +1,9 @@ +log-format CADDY +date-format %s +time-format %s + +# Persist parsed data to disk +db-path /srv/goaccess/data + +# HTML report settings +html-report-title GoAccess Analytics diff --git a/roles/mail/defaults/main.yml b/roles/mail/defaults/main.yml new file mode 100644 index 0000000..36ab622 --- /dev/null +++ b/roles/mail/defaults/main.yml @@ -0,0 +1,6 @@ +--- +# Rainloop webmail port (internal to docker network) +rainloop_port: 8888 + +# Rspamd web UI port (internal to docker network) +rspamd_port: 11334 diff --git a/roles/mail/handlers/main.yml b/roles/mail/handlers/main.yml new file mode 100644 index 0000000..73ef878 --- /dev/null +++ b/roles/mail/handlers/main.yml @@ -0,0 +1,21 @@ +- name: Restart mailserver + community.docker.docker_compose_v2: + project_src: /srv/mail + services: + - mailserver + state: restarted + build: never + +- name: Restart Rainloop + community.docker.docker_compose_v2: + project_src: /srv/mail + services: + - rainloop + state: restarted + build: never + +- name: Restart mail stack + community.docker.docker_compose_v2: + project_src: /srv/mail + state: restarted + build: never diff --git a/roles/mail/tasks/aliases.yml b/roles/mail/tasks/aliases.yml new file mode 100644 index 0000000..a8fcaf7 --- /dev/null +++ b/roles/mail/tasks/aliases.yml @@ -0,0 +1,19 @@ +# read-only docker exec always reports changed; changed_when: false suppresses spurious output +- name: List existing mail aliases + command: docker exec mailserver setup alias list + register: mail_alias_list + changed_when: false + tags: + - users + +- name: Create mail aliases if missing + command: > + docker exec mailserver + setup alias add {{ item.from }} + {{ item.to if item.to is string else item.to | join(',') }} + loop: "{{ mail_aliases }}" + when: item.from not in mail_alias_list.stdout + tags: + - users + + diff --git a/roles/mail/tasks/main.yml b/roles/mail/tasks/main.yml new file mode 100644 index 0000000..ead7274 --- /dev/null +++ b/roles/mail/tasks/main.yml @@ -0,0 +1,139 @@ +- name: Allow SMTP traffic + ufw: + rule: allow + port: 25 + proto: tcp + +- name: Allow mail submission traffic + ufw: + rule: allow + port: 587 + proto: tcp + +- name: Allow IMAP over TLS traffic + ufw: + rule: allow + port: 993 + proto: tcp + +- name: Create docker-mailserver directory + file: + path: "/srv/mail" + state: directory + owner: root + group: docker # to allow access to the compose file + mode: '0755' + +- name: Create docker-mailserver directories + file: + path: "/srv/mail/{{ item }}" + state: directory + owner: root + group: docker + mode: '0750' + loop: + - env + - config/rspamd/override.d + +- name: Create maillogs directory + file: + path: /srv/mail/maillogs + state: directory + mode: '0755' # container startup script needs to traverse and chown subdirs + +# stat+chown: avoids UID/GID lookup warnings for container-internal UIDs not present on host +- name: Stat maillogs directory + stat: + path: /srv/mail/maillogs + register: maillogs_stat + +- name: Set maillogs directory ownership + command: chown 113:0 /srv/mail/maillogs + when: maillogs_stat.stat.uid != 113 or maillogs_stat.stat.gid != 0 + +- name: Create mailstate directory + file: + path: /srv/mail/mailstate + state: directory + owner: root + group: root + mode: '0755' # container startup script needs to traverse and chown subdirs + +- name: Create maildata directory + file: + path: /srv/mail/maildata + state: directory + mode: '0751' # container startup script needs to traverse and chown subdirs + +- name: Create config directory + file: + path: /srv/mail/config + state: directory + mode: '0751' # container startup script needs to traverse and chown subdirs + +- name: Create rainloop data directory + file: + path: /srv/mail/rainloop/data + state: directory + mode: '0755' + +# stat+chown: avoids UID/GID lookup warnings for container-internal UIDs not present on host +- name: Stat rainloop data directory + stat: + path: /srv/mail/rainloop/data + register: rainloop_data_stat + +- name: Set rainloop data directory ownership + command: chown 991:991 /srv/mail/rainloop/data + when: rainloop_data_stat.stat.uid != 991 or rainloop_data_stat.stat.gid != 991 + +- name: Ensure certbot is installed + apt: + name: certbot + state: present + +- name: Obtain a Let's Encrypt certificate for {{ mail_hostname }} + command: > + certbot certonly --standalone + -d {{ mail_hostname }} + --non-interactive --agree-tos -m postmaster@{{ domain }} + args: + creates: /etc/letsencrypt/live/{{ mail_hostname }}/fullchain.pem + tags: config + +- name: Deploy mail compose file + template: + src: compose.yml.j2 + dest: /srv/mail/compose.yml + notify: Restart mail stack + tags: config + +- name: Deploy mailserver environment file + template: + src: mailserver.env.j2 + dest: /srv/mail/env/mailserver.env + mode: '0640' + owner: root + group: docker + notify: Restart mailserver + tags: config + +- name: Deploy rspamd web UI config + template: + src: worker-controller.inc.j2 + dest: /srv/mail/config/rspamd/override.d/worker-controller.inc + mode: '0644' + notify: Restart mailserver + tags: config + +- name: Start mailserver + community.docker.docker_compose_v2: + project_src: /srv/mail + state: present + build: never + tags: config + +- import_tasks: users.yml +- import_tasks: aliases.yml +# webmail interface +- import_tasks: rainloop.yml diff --git a/roles/mail/tasks/rainloop.yml b/roles/mail/tasks/rainloop.yml new file mode 100644 index 0000000..9891c8a --- /dev/null +++ b/roles/mail/tasks/rainloop.yml @@ -0,0 +1,21 @@ +- name: Ensure Rainloop allowed domains are set + ini_file: + path: /srv/mail/rainloop/data/_data_/_default_/configs/application.ini + section: security + option: AllowedDomains + value: "{{ mail_domains | join(',') }}" + backup: yes + notify: + - Restart Rainloop + +- name: Set proper mode of Rainloop data directory + file: + path: /srv/mail/rainloop + state: directory + recurse: yes + mode: u+rwX,g+rX + +# chown -R always exits 0; changed_when: false suppresses spurious "changed" in playbook output +- name: Set proper ownership of Rainloop data directory + command: chown -R 991:991 /srv/mail/rainloop + changed_when: false diff --git a/roles/mail/tasks/users.yml b/roles/mail/tasks/users.yml new file mode 100644 index 0000000..945392d --- /dev/null +++ b/roles/mail/tasks/users.yml @@ -0,0 +1,26 @@ +# read-only docker exec always reports changed; changed_when: false suppresses spurious output +- name: Check if mail user exists + command: docker exec mailserver setup email list + register: mail_user_list + changed_when: false + tags: + - users + +- name: Create mail users if missing + ansible.builtin.command: + argv: + - docker + - exec + - mailserver + - setup + - email + - add + - "{{ item.address }}" + - "{{ item.password }}" + loop: "{{ mail_users }}" + when: item.address not in mail_user_list.stdout + no_log: true + ignore_errors: yes + tags: + - users + diff --git a/roles/mail/templates/compose.yml.j2 b/roles/mail/templates/compose.yml.j2 new file mode 100644 index 0000000..73a1c2d --- /dev/null +++ b/roles/mail/templates/compose.yml.j2 @@ -0,0 +1,60 @@ +services: + + mailserver: + image: docker.io/mailserver/docker-mailserver:{{ mailserver_version }} + container_name: mailserver + hostname: {{ mail_hostname.split('.')[0] }} + domainname: {{ domain }} + env_file: env/mailserver.env + ports: + - "25:25" + - "587:587" + - "993:993" + healthcheck: + test: ["CMD-SHELL", "supervisorctl status | grep -E 'postfix|dovecot' | grep -q RUNNING"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s + volumes: + - /srv/mail/config:/tmp/docker-mailserver + - /srv/mail/maildata:/var/mail + - /srv/mail/mailstate:/var/mail-state + - /srv/mail/maillogs:/var/log/mail + - /etc/localtime:/etc/localtime:ro + - /etc/letsencrypt:/etc/letsencrypt:ro + restart: unless-stopped + cap_add: + - NET_ADMIN + networks: + - mail + - webmail + + rainloop: + image: hardware/rainloop:{{ rainloop_version }} + container_name: rainloop + restart: unless-stopped + depends_on: + mailserver: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://localhost:{{ rainloop_port }}/ || exit 1"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s + environment: + - RAINLOOP_ADMIN=admin + - RAINLOOP_ADMIN_PASSWORD={{ rainloop_admin_password }} + volumes: + - /srv/mail/rainloop/data:/rainloop/data + ports: [] # no host ports, only accessible via Docker network + networks: + - mail + - webmail + +networks: + mail: + external: true + webmail: + external: true diff --git a/roles/mail/templates/mailserver.env.j2 b/roles/mail/templates/mailserver.env.j2 new file mode 100644 index 0000000..1524e8c --- /dev/null +++ b/roles/mail/templates/mailserver.env.j2 @@ -0,0 +1,33 @@ +DMS_DEBUG=0 +# PERMIT_DOCKER=network + +# Hostname + primary domain (split from mail_hostname variable) +HOSTNAME={{ mail_hostname.split('.')[0] }} +DOMAINNAME={{ domain }} + +# Let's Encrypt +ENABLE_TLS=1 +SSL_TYPE=letsencrypt +LETSENCRYPT_DOMAIN={{ mail_hostname }} +LETSENCRYPT_EMAIL={{ admin_user }}@{{ domain }} + +# Override hostname (FQDN) and domains handled by this server +OVERRIDE_HOSTNAME={{ mail_hostname }} +OVERRIDE_DOMAIN={{ mail_domains | join(',') }} + +# Other docker-mailserver options +POSTMASTER_ADDRESS=postmaster@{{ domain }} +ONE_DIR=1 +ENABLE_OPENDKIM=0 +# rspamd handles DMARC and SPF natively; enabling these would duplicate validation +ENABLE_OPENDMARC=0 +ENABLE_POLICYD_SPF=0 +ENABLE_AMAVIS=0 +ENABLE_CLAMAV=0 +ENABLE_FAIL2BAN=0 +ENABLE_SPAMASSASSIN=0 +ENABLE_RSPAMD=1 + +RSPAMD_LEARN=1 + +POSTFIX_MESSAGE_SIZE_LIMIT=26214400 diff --git a/roles/mail/templates/worker-controller.inc.j2 b/roles/mail/templates/worker-controller.inc.j2 new file mode 100644 index 0000000..19ae754 --- /dev/null +++ b/roles/mail/templates/worker-controller.inc.j2 @@ -0,0 +1,2 @@ +# Rspamd web UI password +password = "{{ rspamd_web_password }}"; diff --git a/roles/monitoring/defaults/main.yml b/roles/monitoring/defaults/main.yml new file mode 100644 index 0000000..1b79317 --- /dev/null +++ b/roles/monitoring/defaults/main.yml @@ -0,0 +1,21 @@ +--- +# Data storage path on host +monitoring_data_path: /srv/monitoring + +# Grafana +grafana_port: 3000 # internal container port +grafana_expose_port: [] # host port to expose (empty = not exposed outside docker) + +# Prometheus +prometheus_port: 9090 +prometheus_expose_port: false +prometheus_retention_days: 15 + +# Loki (log aggregation) +loki_port: 3100 +loki_expose_port: false +loki_retention_days: 7 + +# Alloy (metrics/logs collector) +alloy_port: 12345 +alloy_expose_port: false diff --git a/roles/monitoring/files/caddy-dashboard.json b/roles/monitoring/files/caddy-dashboard.json new file mode 100644 index 0000000..f284fc1 --- /dev/null +++ b/roles/monitoring/files/caddy-dashboard.json @@ -0,0 +1,667 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by (host) (rate(caddy_http_requests_total[5m]))", + "refId": "A", + "legendFormat": "{{host}}" + } + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (le, host) (rate(caddy_http_request_duration_seconds_bucket[5m])))", + "refId": "A", + "legendFormat": "{{host}} - p95" + }, + { + "expr": "histogram_quantile(0.50, sum by (le, host) (rate(caddy_http_request_duration_seconds_bucket[5m])))", + "refId": "B", + "legendFormat": "{{host}} - p50" + } + ], + "title": "Request Duration", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by (host) (rate(caddy_http_response_duration_seconds_count{code=~\"2..\"}[5m]))", + "refId": "A", + "legendFormat": "{{host}} - 2xx" + }, + { + "expr": "sum by (host) (rate(caddy_http_response_duration_seconds_count{code=~\"4..\"}[5m]))", + "refId": "B", + "legendFormat": "{{host}} - 4xx" + }, + { + "expr": "sum by (host) (rate(caddy_http_response_duration_seconds_count{code=~\"5..\"}[5m]))", + "refId": "C", + "legendFormat": "{{host}} - 5xx" + } + ], + "title": "Response Status Codes", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*Sent.*" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by (host) (rate(caddy_http_response_size_bytes_sum[5m]))", + "refId": "A", + "legendFormat": "{{host}} - Sent" + }, + { + "expr": "sum by (host) (rate(caddy_http_request_size_bytes_sum[5m]))", + "refId": "B", + "legendFormat": "{{host}} - Received" + } + ], + "title": "Bandwidth", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(rate(caddy_http_requests_total[5m]))", + "refId": "A" + } + ], + "title": "Total Requests/sec", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.5 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 16 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(rate(caddy_http_response_duration_seconds_count{code=~\"5..\"}[5m])) / sum(rate(caddy_http_requests_total[5m])) OR on() vector(0)", + "refId": "A" + } + ], + "title": "Error Rate (5xx)", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 16 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(caddy_http_request_duration_seconds_bucket[5m])) by (le))", + "refId": "A" + } + ], + "title": "p95 Latency (Overall)", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 16 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(caddy_http_requests_in_flight)", + "refId": "A" + } + ], + "title": "Requests In Flight", + "type": "stat" + }, + { + "datasource": "Loki", + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 9, + "options": { + "showTime": true, + "showLabels": false, + "showCommonLabels": false, + "wrapLogMessage": false, + "prettifyLogMessage": false, + "enableLogDetails": true, + "dedupStrategy": "none", + "sortOrder": "Descending" + }, + "targets": [ + { + "expr": "{container=\"caddy\"}", + "refId": "A" + } + ], + "title": "Caddy Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["caddy", "web"], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Caddy Web Server", + "uid": "caddy-metrics", + "version": 1 +} diff --git a/roles/monitoring/files/forgejo-dashboard.json b/roles/monitoring/files/forgejo-dashboard.json new file mode 100644 index 0000000..bb9cc87 --- /dev/null +++ b/roles/monitoring/files/forgejo-dashboard.json @@ -0,0 +1,703 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "gitea_repositories", + "refId": "A", + "legendFormat": "Repositories" + }, + { + "expr": "gitea_users", + "refId": "B", + "legendFormat": "Users" + } + ], + "title": "Repositories & Users Over Time", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "gitea_issues_open", + "refId": "A", + "legendFormat": "Open Issues" + }, + { + "expr": "gitea_issues_closed", + "refId": "B", + "legendFormat": "Closed Issues" + } + ], + "title": "Issues Over Time", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "gitea_stars", + "refId": "A", + "legendFormat": "Stars" + }, + { + "expr": "gitea_watches", + "refId": "B", + "legendFormat": "Watchers" + }, + { + "expr": "gitea_follows", + "refId": "C", + "legendFormat": "Follows" + } + ], + "title": "Engagement Metrics", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "gitea_releases", + "refId": "A", + "legendFormat": "Releases" + }, + { + "expr": "gitea_milestones", + "refId": "B", + "legendFormat": "Milestones" + }, + { + "expr": "gitea_projects", + "refId": "C", + "legendFormat": "Projects" + } + ], + "title": "Project Metrics", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "gitea_repositories", + "refId": "A" + } + ], + "title": "Repositories", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 16 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "gitea_users", + "refId": "A" + } + ], + "title": "Users", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 16 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "gitea_issues", + "refId": "A" + } + ], + "title": "Total Issues", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 16 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "gitea_issues_open", + "refId": "A" + } + ], + "title": "Open Issues", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 16 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "gitea_organizations", + "refId": "A" + } + ], + "title": "Organizations", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 16 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "gitea_publickeys", + "refId": "A" + } + ], + "title": "SSH Keys", + "type": "stat" + }, + { + "datasource": "Loki", + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 11, + "options": { + "showTime": true, + "showLabels": false, + "showCommonLabels": false, + "wrapLogMessage": false, + "prettifyLogMessage": false, + "enableLogDetails": true, + "dedupStrategy": "none", + "sortOrder": "Descending" + }, + "targets": [ + { + "expr": "{container=\"forgejo\"}", + "refId": "A" + } + ], + "title": "Forgejo Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["forgejo", "git"], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Forgejo", + "uid": "forgejo-metrics", + "version": 1 +} diff --git a/roles/monitoring/files/mailserver-dashboard.json b/roles/monitoring/files/mailserver-dashboard.json new file mode 100644 index 0000000..33c0e85 --- /dev/null +++ b/roles/monitoring/files/mailserver-dashboard.json @@ -0,0 +1,642 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"status=sent\" [5m])) or vector(0)", + "refId": "A", + "legendFormat": "Sent" + }, + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"status=deferred\" [5m])) or vector(0)", + "refId": "B", + "legendFormat": "Deferred" + }, + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"status=bounced\" [5m])) or vector(0)", + "refId": "C", + "legendFormat": "Bounced" + } + ], + "title": "Mail Delivery Status (5m)", + "type": "timeseries" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"dovecot.*IMAP.*Login\" [5m])) or vector(0)", + "refId": "A", + "legendFormat": "IMAP Logins" + }, + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"dovecot.*auth.*unknown user\" [5m])) or vector(0)", + "refId": "B", + "legendFormat": "Failed Auth" + }, + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"postfix/submission.*auth=1\" [5m])) or vector(0)", + "refId": "C", + "legendFormat": "SMTP Auth Success" + } + ], + "title": "Authentication Activity (5m)", + "type": "timeseries" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"postfix/smtpd.*connect from\" [5m])) or vector(0)", + "refId": "A", + "legendFormat": "Incoming Connections" + }, + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"postscreen.*PREGREET\" [5m])) or vector(0)", + "refId": "B", + "legendFormat": "Blocked (PREGREET)" + }, + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"postfix.*reject\" [5m])) or vector(0)", + "refId": "C", + "legendFormat": "Rejected" + } + ], + "title": "SMTP Connections & Filtering (5m)", + "type": "timeseries" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"Connection timed out\" [5m])) or vector(0)", + "refId": "A", + "legendFormat": "Connection Timeouts" + }, + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"Network is unreachable\" [5m])) or vector(0)", + "refId": "B", + "legendFormat": "Network Unreachable" + } + ], + "title": "Delivery Errors (5m)", + "type": "timeseries" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["sum"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"status=sent\" [$__range])) or vector(0)", + "refId": "A", + "instant": true + } + ], + "title": "Total Sent (Current Range)", + "type": "stat" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "red", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 16 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["sum"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"status=deferred\" [$__range])) or vector(0)", + "refId": "A", + "instant": true + } + ], + "title": "Total Deferred (Current Range)", + "type": "stat" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "red", + "value": 20 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 16 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["sum"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"unknown user\" [$__range])) or vector(0)", + "refId": "A", + "instant": true + } + ], + "title": "Failed Auth Attempts (Current Range)", + "type": "stat" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "red", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 16 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["sum"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(count_over_time({container=\"mailserver\"} |~ \"PREGREET\" [$__range])) or vector(0)", + "refId": "A", + "instant": true + } + ], + "title": "Spam Blocked (Current Range)", + "type": "stat" + }, + { + "datasource": "Loki", + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 9, + "options": { + "showTime": true, + "showLabels": false, + "showCommonLabels": false, + "wrapLogMessage": false, + "prettifyLogMessage": false, + "enableLogDetails": true, + "dedupStrategy": "none", + "sortOrder": "Descending" + }, + "targets": [ + { + "expr": "{container=\"mailserver\"}", + "refId": "A" + } + ], + "title": "Mailserver Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["mail", "postfix", "dovecot"], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Mail Server", + "uid": "mailserver-logs", + "version": 1 +} diff --git a/roles/monitoring/files/node-exporter-dashboard.json b/roles/monitoring/files/node-exporter-dashboard.json new file mode 100644 index 0000000..6663bd9 --- /dev/null +++ b/roles/monitoring/files/node-exporter-dashboard.json @@ -0,0 +1,364 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "refId": "A", + "legendFormat": "CPU Usage" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "100 * (1 - ((node_memory_MemAvailable_bytes or node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes))", + "refId": "A", + "legendFormat": "Memory Usage" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"rootfs\"})", + "refId": "A", + "legendFormat": "Disk Usage /" + } + ], + "title": "Disk Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "irate(node_network_receive_bytes_total{device!=\"lo\"}[5m])", + "refId": "A", + "legendFormat": "{{device}} - Receive" + }, + { + "expr": "irate(node_network_transmit_bytes_total{device!=\"lo\"}[5m])", + "refId": "B", + "legendFormat": "{{device}} - Transmit" + } + ], + "title": "Network Traffic", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["node-exporter"], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Node Exporter System Metrics", + "uid": "node-exporter", + "version": 1 +} diff --git a/roles/monitoring/files/service-overview-dashboard.json b/roles/monitoring/files/service-overview-dashboard.json new file mode 100644 index 0000000..d4d992c --- /dev/null +++ b/roles/monitoring/files/service-overview-dashboard.json @@ -0,0 +1,922 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 17, + "panels": [], + "title": "Logs (use Level and Search filters above)", + "type": "row" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + } + }, + "overrides": [ + {"matcher": {"id": "byName", "options": "fatal"}, "properties": [{"id": "color", "value": {"fixedColor": "dark-red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "panic"}, "properties": [{"id": "color", "value": {"fixedColor": "dark-red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "critical"}, "properties": [{"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "crit"}, "properties": [{"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "error"}, "properties": [{"id": "color", "value": {"fixedColor": "orange", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "err"}, "properties": [{"id": "color", "value": {"fixedColor": "orange", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "warn"}, "properties": [{"id": "color", "value": {"fixedColor": "yellow", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "warning"}, "properties": [{"id": "color", "value": {"fixedColor": "yellow", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "notice"}, "properties": [{"id": "color", "value": {"fixedColor": "blue", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "info"}, "properties": [{"id": "color", "value": {"fixedColor": "green", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "debug"}, "properties": [{"id": "color", "value": {"fixedColor": "purple", "mode": "fixed"}}]} + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 16, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by (level) (count_over_time({level=~\"${level:regex}\"} |~ \"(?i)$search\" [$__interval]))", + "legendFormat": "{{level}}", + "refId": "A" + } + ], + "title": "Log Volume by Level", + "type": "timeseries" + }, + { + "datasource": "Loki", + "gridPos": { + "h": 8, + "w": 16, + "x": 8, + "y": 1 + }, + "id": 10, + "options": { + "showTime": true, + "showLabels": true, + "showCommonLabels": false, + "wrapLogMessage": false, + "prettifyLogMessage": false, + "enableLogDetails": true, + "dedupStrategy": "none", + "sortOrder": "Descending", + "noDataMessage": " " + }, + "targets": [ + { + "expr": "{level=~\"${level:regex}\"} |~ \"(?i)$search\"", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": {"color": "red", "text": "DOWN"}, + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "green", "value": 1} + ] + } + } + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 9}, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "textMode": "value" + }, + "targets": [{"expr": "up{job=\"caddy\"}", "refId": "A"}], + "title": "Caddy", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": {"color": "red", "text": "DOWN"}, + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "green", "value": 1} + ] + } + } + }, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 9}, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "textMode": "value" + }, + "targets": [{"expr": "up{job=\"forgejo\"}", "refId": "A"}], + "title": "Forgejo", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + } + } + }, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": {"color": "red", "text": "DOWN"}, + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "green", "value": 1} + ] + } + } + }, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 9}, + "id": 20, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "textMode": "value" + }, + "targets": [{"expr": "count(container_last_seen{name=\"tuwunel\"})", "refId": "A"}], + "title": "Tuwunel", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + } + } + }, + "gridPos": {"h": 4, "w": 3, "x": 12, "y": 9}, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "textMode": "value" + }, + "targets": [{"expr": "1", "refId": "A"}], + "title": "Mailserver", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": {"color": "red", "text": "DOWN"}, + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "green", "value": 1} + ] + } + } + }, + "gridPos": {"h": 4, "w": 3, "x": 15, "y": 9}, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "textMode": "value" + }, + "targets": [{"expr": "up{job=\"prometheus\"}", "refId": "A"}], + "title": "Prometheus", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": {"color": "red", "text": "DOWN"}, + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "green", "value": 1} + ] + } + } + }, + "gridPos": {"h": 4, "w": 3, "x": 18, "y": 9}, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "textMode": "value" + }, + "targets": [{"expr": "up{job=\"alloy\"}", "refId": "A"}], + "title": "Alloy", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "1": {"color": "green", "text": "UP"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + } + } + }, + "gridPos": {"h": 4, "w": 3, "x": 21, "y": 9}, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "textMode": "value" + }, + "targets": [{"expr": "1", "refId": "A"}], + "title": "Loki", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": {"color": "red", "text": "FAILED"}, + "1": {"color": "green", "text": "OK"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "yellow", "value": 0.5}, + {"color": "green", "value": 1} + ] + } + } + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 13}, + "id": 11, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "text": {"valueSize": 32}, + "textMode": "value" + }, + "targets": [{"expr": "restic_backup_success", "refId": "A"}], + "title": "Last Backup", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 86400}, + {"color": "red", "value": 172800} + ] + }, + "unit": "dtdurations" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 13}, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "text": {"valueSize": 24}, + "textMode": "value" + }, + "targets": [{"expr": "time() - restic_backup_timestamp_seconds", "refId": "A"}], + "title": "Time Since Last Backup", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": {"color": "red", "text": "FAILED"}, + "1": {"color": "green", "text": "OK"} + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "yellow", "value": 0.5}, + {"color": "green", "value": 1} + ] + } + } + }, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 13}, + "id": 13, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "text": {"valueSize": 32}, + "textMode": "value" + }, + "targets": [{"expr": "restic_prune_success", "refId": "A"}], + "title": "Last Prune", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 604800}, + {"color": "red", "value": 1209600} + ] + }, + "unit": "dtdurations" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 13}, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "text": {"valueSize": 24}, + "textMode": "value" + }, + "targets": [{"expr": "time() - restic_prune_timestamp_seconds", "refId": "A"}], + "title": "Time Since Last Prune", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + }, + "unit": "short" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 13}, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["lastNotNull"], "fields": ""}, + "text": {"valueSize": 32}, + "textMode": "value" + }, + "targets": [ + { + "expr": "count(container_last_seen{name=~\".+\"})", + "refId": "A" + } + ], + "title": "Active Containers", + "type": "stat" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 50}, + {"color": "red", "value": 200} + ] + }, + "unit": "short" + } + }, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 13}, + "id": 18, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": {"values": false, "calcs": ["sum"], "fields": ""}, + "text": {"valueSize": 32}, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(count_over_time({job=\"rspamd\"} |~ \"\\\\(reject\\\\)|\\\\(greylist\\\\)\" [$__range]))", + "refId": "A", + "instant": true + } + ], + "title": "Spam Blocked", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 60}, + {"color": "red", "value": 80} + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 17}, + "id": 7, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + { + "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "refId": "A", + "legendFormat": "CPU Usage" + } + ], + "title": "System CPU", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 70}, + {"color": "red", "value": 85} + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 17}, + "id": 8, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + { + "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))", + "refId": "A", + "legendFormat": "Memory Usage" + } + ], + "title": "System Memory", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": {"tooltip": false, "viz": false, "legend": false}, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": false, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 70}, + {"color": "red", "value": 85} + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 17}, + "id": 9, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{mountpoint=\"/\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\"})", + "refId": "A", + "legendFormat": "Disk Usage" + } + ], + "title": "System Disk", + "type": "timeseries" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": {"legend": false, "tooltip": false, "viz": false}, + "lineWidth": 1, + "scaleDistribution": {"type": "linear"}, + "stacking": {"group": "A", "mode": "normal"} + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{"color": "green", "value": null}] + } + }, + "overrides": [ + {"matcher": {"id": "byName", "options": "Rejected"}, "properties": [{"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Greylisted"}, "properties": [{"id": "color", "value": {"fixedColor": "orange", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Spam Header"}, "properties": [{"id": "color", "value": {"fixedColor": "yellow", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Clean"}, "properties": [{"id": "color", "value": {"fixedColor": "green", "mode": "fixed"}}]} + ] + }, + "gridPos": {"h": 6, "w": 24, "x": 0, "y": 25}, + "id": 19, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + { + "expr": "sum(count_over_time({job=\"rspamd\"} |~ \"\\\\(reject\\\\)\" [$__interval]))", + "legendFormat": "Rejected", + "refId": "A" + }, + { + "expr": "sum(count_over_time({job=\"rspamd\"} |~ \"\\\\(greylist\\\\)\" [$__interval]))", + "legendFormat": "Greylisted", + "refId": "B" + }, + { + "expr": "sum(count_over_time({job=\"rspamd\"} |~ \"\\\\(add header\\\\)\" [$__interval]))", + "legendFormat": "Spam Header", + "refId": "C" + }, + { + "expr": "sum(count_over_time({job=\"rspamd\"} |~ \"\\\\(no action\\\\)\" [$__interval]))", + "legendFormat": "Clean", + "refId": "D" + } + ], + "title": "Rspamd Actions", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["overview"], + "templating": { + "list": [ + { + "allValue": ".*", + "current": { + "selected": true, + "text": ["error", "err", "fatal", "panic", "critical", "crit"], + "value": ["error", "err", "fatal", "panic", "critical", "crit"] + }, + "description": "Log levels to display (extracted from log content)", + "hide": 0, + "includeAll": true, + "label": "Level", + "multi": true, + "name": "level", + "options": [ + {"selected": true, "text": "fatal", "value": "fatal"}, + {"selected": true, "text": "panic", "value": "panic"}, + {"selected": true, "text": "critical", "value": "critical"}, + {"selected": true, "text": "crit", "value": "crit"}, + {"selected": true, "text": "error", "value": "error"}, + {"selected": true, "text": "err", "value": "err"}, + {"selected": false, "text": "warn", "value": "warn"}, + {"selected": false, "text": "warning", "value": "warning"}, + {"selected": false, "text": "notice", "value": "notice"}, + {"selected": false, "text": "info", "value": "info"}, + {"selected": false, "text": "debug", "value": "debug"} + ], + "query": "fatal, panic, critical, crit, error, err, warn, warning, notice, info, debug", + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "description": "Filter logs by text (regex supported)", + "hide": 0, + "label": "Search", + "name": "search", + "options": [], + "query": "", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Service Overview", + "uid": "service-overview", + "version": 1 +} diff --git a/roles/monitoring/files/system-metrics-dashboard.json b/roles/monitoring/files/system-metrics-dashboard.json new file mode 100644 index 0000000..fff7a63 --- /dev/null +++ b/roles/monitoring/files/system-metrics-dashboard.json @@ -0,0 +1,553 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 60 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "refId": "A", + "legendFormat": "CPU Usage" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 60 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))", + "refId": "A", + "legendFormat": "Memory Usage" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{mountpoint=\"/\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\"})", + "refId": "A", + "legendFormat": "Root Disk Usage" + } + ], + "title": "Disk Usage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*Receive.*" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": ["mean", "lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "expr": "irate(node_network_receive_bytes_total{device!=\"lo\"}[5m])", + "refId": "A", + "legendFormat": "{{device}} Receive" + }, + { + "expr": "irate(node_network_transmit_bytes_total{device!=\"lo\"}[5m])", + "refId": "B", + "legendFormat": "{{device}} Transmit" + } + ], + "title": "Network Traffic", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "node_load1", + "refId": "A" + } + ], + "title": "Load Average (1m)", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "node_filesystem_avail_bytes{mountpoint=\"/\"}", + "refId": "A" + } + ], + "title": "Disk Space Available", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "node_memory_MemAvailable_bytes", + "refId": "A" + } + ], + "title": "Memory Available", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["system"], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "System Metrics", + "uid": "system-metrics", + "version": 1 +} diff --git a/roles/monitoring/handlers/main.yml b/roles/monitoring/handlers/main.yml new file mode 100644 index 0000000..ff58261 --- /dev/null +++ b/roles/monitoring/handlers/main.yml @@ -0,0 +1,30 @@ +--- +- name: restart prometheus + community.docker.docker_container: + name: prometheus + state: started + restart: true + +- name: restart alloy + community.docker.docker_container: + name: alloy + state: started + restart: true + +- name: restart loki + community.docker.docker_container: + name: loki + state: started + restart: true + +- name: restart grafana + community.docker.docker_container: + name: grafana + state: started + restart: true + +- name: reload monitoring stack + community.docker.docker_compose_v2: + project_src: "{{ monitoring_data_path }}" + state: present + build: never diff --git a/roles/monitoring/tasks/main.yml b/roles/monitoring/tasks/main.yml new file mode 100644 index 0000000..29f7458 --- /dev/null +++ b/roles/monitoring/tasks/main.yml @@ -0,0 +1,127 @@ +--- +- name: Create monitoring directories + file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ monitoring_data_path }}" + - "{{ monitoring_data_path }}/prometheus" + - "{{ monitoring_data_path }}/alloy" + - "{{ monitoring_data_path }}/grafana" + - "{{ monitoring_data_path }}/grafana/provisioning" + - "{{ monitoring_data_path }}/grafana/provisioning/datasources" + - "{{ monitoring_data_path }}/grafana/provisioning/dashboards" + - "{{ monitoring_data_path }}/loki" + +- name: Create textfile collector directory for node_exporter metrics + file: + path: /var/lib/node_exporter/textfile_collector + state: directory + mode: '0755' + owner: root + group: root + +- name: Create container data directories with proper ownership + ansible.builtin.shell: | + mkdir -p "{{ item.path }}" + chmod 755 "{{ item.path }}" + chown {{ item.uid }}:{{ item.gid }} "{{ item.path }}" + args: + creates: "{{ item.path }}" + loop: + - { path: "{{ monitoring_data_path }}/prometheus/data", uid: 65534, gid: 65534 } + - { path: "{{ monitoring_data_path }}/grafana/data", uid: 472, gid: 472 } + - { path: "{{ monitoring_data_path }}/loki/data", uid: 10001, gid: 10001 } + loop_control: + label: "{{ item.path }}" + +- name: Create Prometheus configuration + template: + src: prometheus.yml.j2 + dest: "{{ monitoring_data_path }}/prometheus/prometheus.yml" + mode: '0644' + notify: restart prometheus + +- name: Create Alloy configuration + template: + src: config.alloy.j2 + dest: "{{ monitoring_data_path }}/alloy/config.alloy" + mode: '0644' + force: true + notify: restart alloy + +- name: Create Loki configuration + template: + src: loki-config.yaml.j2 + dest: "{{ monitoring_data_path }}/loki/loki-config.yaml" + mode: '0644' + force: true + notify: restart loki + +- name: Create Grafana datasource configuration + template: + src: datasources.yml.j2 + dest: "{{ monitoring_data_path }}/grafana/provisioning/datasources/datasources.yml" + mode: '0644' + +- name: Create Grafana dashboard provisioning config + template: + src: dashboards.yml.j2 + dest: "{{ monitoring_data_path }}/grafana/provisioning/dashboards/dashboards.yml" + mode: '0644' + +- name: Copy Node Exporter dashboard + copy: + src: node-exporter-dashboard.json + dest: "{{ monitoring_data_path }}/grafana/provisioning/dashboards/node-exporter.json" + mode: '0644' + +- name: Copy System Metrics dashboard + copy: + src: system-metrics-dashboard.json + dest: "{{ monitoring_data_path }}/grafana/provisioning/dashboards/system-metrics.json" + mode: '0644' + +- name: Copy Caddy dashboard + copy: + src: caddy-dashboard.json + dest: "{{ monitoring_data_path }}/grafana/provisioning/dashboards/caddy.json" + mode: '0644' + +- name: Copy Mailserver dashboard + copy: + src: mailserver-dashboard.json + dest: "{{ monitoring_data_path }}/grafana/provisioning/dashboards/mailserver.json" + mode: '0644' + +- name: Copy Forgejo dashboard + copy: + src: forgejo-dashboard.json + dest: "{{ monitoring_data_path }}/grafana/provisioning/dashboards/forgejo.json" + mode: '0644' + +- name: Copy Service Overview dashboard + copy: + src: service-overview-dashboard.json + dest: "{{ monitoring_data_path }}/grafana/provisioning/dashboards/service-overview.json" + mode: '0644' + +- name: Create compose file + template: + src: compose.yml.j2 + dest: "{{ monitoring_data_path }}/compose.yml" + mode: '0644' + register: compose_file + notify: reload monitoring stack + +- name: Deploy monitoring stack + community.docker.docker_compose_v2: + project_src: "{{ monitoring_data_path }}" + state: present + build: never + register: compose_output + +- name: Show deployment status + debug: + msg: "Monitoring stack deployed. Grafana available at {{ grafana_root_url }}" diff --git a/roles/monitoring/templates/compose.yml.j2 b/roles/monitoring/templates/compose.yml.j2 new file mode 100644 index 0000000..5965af4 --- /dev/null +++ b/roles/monitoring/templates/compose.yml.j2 @@ -0,0 +1,93 @@ +services: + prometheus: + image: prom/prometheus:{{ prometheus_version }} + container_name: prometheus + restart: unless-stopped + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time={{ prometheus_retention_days }}d' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + - '--web.enable-remote-write-receiver' + volumes: + - {{ monitoring_data_path }}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - {{ monitoring_data_path }}/prometheus/data:/prometheus + networks: + - monitoring + - caddy +{% if prometheus_expose_port | default(true) %} + ports: + - "{{ prometheus_port }}:9090" +{% endif %} + + alloy: + image: grafana/alloy:{{ alloy_version }} + container_name: alloy + restart: unless-stopped + privileged: true + command: + - run + - --server.http.listen-addr=0.0.0.0:{{ alloy_port }} + - --storage.path=/var/lib/alloy/data + - /etc/alloy/config.alloy + volumes: + - {{ monitoring_data_path }}/alloy/config.alloy:/etc/alloy/config.alloy:ro + - /:/host/root:ro + - /sys:/host/sys:ro + - /proc:/host/proc:ro + - /var/log:/var/log:ro + - /run/log/journal:/run/log/journal:ro + - /etc/machine-id:/etc/machine-id:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - /sys/fs/cgroup:/sys/fs/cgroup:ro + - /var/lib/docker:/var/lib/docker:ro + environment: + HOSTNAME: {{ ansible_facts["hostname"] }} + networks: + - monitoring +{% if alloy_expose_port | default(true) %} + ports: + - "{{ alloy_port }}:{{ alloy_port }}" +{% endif %} + + grafana: + image: grafana/grafana:{{ grafana_version }} + container_name: grafana + restart: unless-stopped + volumes: + - {{ monitoring_data_path }}/grafana/data:/var/lib/grafana + - {{ monitoring_data_path }}/grafana/provisioning:/etc/grafana/provisioning + environment: + GF_SECURITY_ADMIN_PASSWORD: {{ grafana_admin_password }} + GF_USERS_ALLOW_SIGN_UP: "false" + GF_SERVER_ROOT_URL: {{ grafana_root_url | default('http://localhost:3000') }} + GF_SERVER_SERVE_FROM_SUB_PATH: "false" + networks: + - monitoring + - caddy +{% if grafana_expose_port | default([]) %} + ports: + - "{{ grafana_expose_port }}:3000" +{% endif %} + + loki: + image: grafana/loki:{{ loki_version }} + container_name: loki + restart: unless-stopped + command: -config.file=/etc/loki/local-config.yaml + volumes: + - {{ monitoring_data_path }}/loki/loki-config.yaml:/etc/loki/local-config.yaml:ro + - {{ monitoring_data_path }}/loki/data:/loki + networks: + - monitoring +{% if loki_expose_port | default(true) %} + ports: + - "{{ loki_port | default(3100) }}:3100" +{% endif %} + +networks: + monitoring: + external: true + caddy: + external: true diff --git a/roles/monitoring/templates/config.alloy.j2 b/roles/monitoring/templates/config.alloy.j2 new file mode 100644 index 0000000..41effa6 --- /dev/null +++ b/roles/monitoring/templates/config.alloy.j2 @@ -0,0 +1,163 @@ +// Prometheus metrics collection +prometheus.exporter.unix "node" { + rootfs_path = "/host/root" + sysfs_path = "/host/sys" + procfs_path = "/host/proc" + textfile { + directory = "/host/root/var/lib/node_exporter/textfile_collector" + } + + set_collectors = ["cpu", "loadavg", "meminfo", "diskstats", "filesystem", "netdev", "textfile"] +} + +prometheus.scrape "node_exporter" { + targets = prometheus.exporter.unix.node.targets + forward_to = [prometheus.remote_write.metrics.receiver] +} + +prometheus.scrape "alloy" { + targets = [{ + __address__ = "localhost:{{ alloy_port }}", + }] + forward_to = [prometheus.remote_write.metrics.receiver] +} + +prometheus.exporter.cadvisor "docker" { + docker_host = "unix:///var/run/docker.sock" + docker_only = true +} + +prometheus.scrape "cadvisor" { + targets = prometheus.exporter.cadvisor.docker.targets + forward_to = [prometheus.remote_write.metrics.receiver] +} + +prometheus.remote_write "metrics" { + endpoint { + url = "http://prometheus:{{ prometheus_port }}/api/v1/write" + } +} + +// Journal log collection (includes both system logs and Docker containers) +loki.source.journal "journal" { + forward_to = [loki.process.journal.receiver] + relabel_rules = loki.relabel.journal.rules + labels = { + job = "journal", + } +} + +loki.relabel "journal" { + forward_to = [] + + // Systemd unit (e.g., ssh.service, docker.service) + rule { + source_labels = ["__journal__systemd_unit"] + target_label = "unit" + } + + // Container name for Docker containers + rule { + source_labels = ["__journal_container_name"] + target_label = "container" + } + + // Syslog priority (0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug) + rule { + source_labels = ["__journal_priority"] + target_label = "priority" + } + + // Syslog identifier (program name) + rule { + source_labels = ["__journal_syslog_identifier"] + target_label = "syslog_identifier" + } + + // Tag tuwunel container with its own job label + rule { + source_labels = ["__journal_container_name"] + regex = "tuwunel" + target_label = "job" + replacement = "tuwunel" + } +} + +loki.process "journal" { + forward_to = [loki.write.logs.receiver] + + // Extract log level from common formats: level=info, "level":"info", [INFO], etc. + stage.regex { + expression = "(?i)(level=|\"level\":\\s*\"|\\[)(?Pdebug|info|warn|warning|error|err|fatal|panic|critical|crit|notice)(\\]|\"|\\s|$)" + } + + // Map extracted level to numeric priority for consistent filtering + stage.template { + source = "level" + template = "{% raw %}{{ if .extracted_level }}{{ .extracted_level }}{{ else }}{{ .priority }}{{ end }}{% endraw %}" + } + + stage.labels { + values = { + level = "", + } + } +} + +loki.write "logs" { + endpoint { + url = "http://loki:{{ loki_port }}/loki/api/v1/push" + } +} + +// Fail2ban log file collection (ban/unban details go to file, not journald) +local.file_match "fail2ban" { + path_targets = [{"__path__" = "/host/root/var/log/fail2ban.log"}] +} + +loki.source.file "fail2ban" { + targets = local.file_match.fail2ban.targets + forward_to = [loki.process.fail2ban.receiver] +} + +loki.process "fail2ban" { + forward_to = [loki.write.logs.receiver] + + stage.static_labels { + values = { + job = "fail2ban", + unit = "fail2ban.service", + } + } + + stage.regex { + expression = "(?i)\\s(?Pnotice|warning|error|info)\\s" + } + + stage.labels { + values = { + level = "extracted_level", + } + } +} + +// Rspamd log file collection (logs to file inside mailserver, not stdout) +local.file_match "rspamd" { + path_targets = [{"__path__" = "/host/root/srv/mail/maillogs/rspamd.log"}] +} + +loki.source.file "rspamd" { + targets = local.file_match.rspamd.targets + forward_to = [loki.process.rspamd.receiver] +} + +loki.process "rspamd" { + forward_to = [loki.write.logs.receiver] + + stage.static_labels { + values = { + container = "mailserver", + job = "rspamd", + } + } +} diff --git a/roles/monitoring/templates/dashboards.yml.j2 b/roles/monitoring/templates/dashboards.yml.j2 new file mode 100644 index 0000000..9141a45 --- /dev/null +++ b/roles/monitoring/templates/dashboards.yml.j2 @@ -0,0 +1,13 @@ +apiVersion: 1 + +providers: + - name: 'Default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards + foldersFromFilesStructure: true diff --git a/roles/monitoring/templates/datasources.yml.j2 b/roles/monitoring/templates/datasources.yml.j2 new file mode 100644 index 0000000..732ff0e --- /dev/null +++ b/roles/monitoring/templates/datasources.yml.j2 @@ -0,0 +1,17 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:{{ prometheus_port }} + isDefault: true + editable: false + jsonData: + timeInterval: 15s + + - name: Loki + type: loki + access: proxy + url: http://loki:{{ loki_port }} + editable: false diff --git a/roles/monitoring/templates/loki-config.yaml.j2 b/roles/monitoring/templates/loki-config.yaml.j2 new file mode 100644 index 0000000..a0456e2 --- /dev/null +++ b/roles/monitoring/templates/loki-config.yaml.j2 @@ -0,0 +1,43 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + retention_period: {{ loki_retention_days | default(prometheus_retention_days) }}d + reject_old_samples: true + reject_old_samples_max_age: 168h + +compactor: + working_directory: /loki/compactor + compaction_interval: 10m diff --git a/roles/monitoring/templates/prometheus.yml.j2 b/roles/monitoring/templates/prometheus.yml.j2 new file mode 100644 index 0000000..941c653 --- /dev/null +++ b/roles/monitoring/templates/prometheus.yml.j2 @@ -0,0 +1,20 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:{{ prometheus_port }}'] + + - job_name: 'alloy' + static_configs: + - targets: ['alloy:{{ alloy_port }}'] + + - job_name: 'caddy' + static_configs: + - targets: ['caddy:{{ caddy_metrics_port }}'] + + - job_name: 'forgejo' + static_configs: + - targets: ['forgejo:{{ forgejo_port }}'] diff --git a/roles/nebula/defaults/main.yml b/roles/nebula/defaults/main.yml new file mode 100644 index 0000000..c974278 --- /dev/null +++ b/roles/nebula/defaults/main.yml @@ -0,0 +1,3 @@ +--- +# UDP port for nebula tunnel traffic +nebula_port: 4242 diff --git a/roles/nebula/handlers/main.yml b/roles/nebula/handlers/main.yml new file mode 100644 index 0000000..d4edd09 --- /dev/null +++ b/roles/nebula/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Restart nebula + service: + name: nebula + state: restarted diff --git a/roles/nebula/tasks/main.yml b/roles/nebula/tasks/main.yml new file mode 100644 index 0000000..cf7f27f --- /dev/null +++ b/roles/nebula/tasks/main.yml @@ -0,0 +1,69 @@ +- name: Allow Nebula UDP traffic + ufw: + rule: allow + port: "{{ nebula_port }}" + proto: udp + +- name: Download Nebula release + unarchive: + src: "https://github.com/slackhq/nebula/releases/download/v{{ nebula_version }}/nebula-linux-amd64.tar.gz" + dest: /usr/local/bin/ + remote_src: true + creates: /usr/local/bin/nebula + include: + - nebula + - nebula-cert + +- name: Create Nebula config directory + file: + path: /etc/nebula + state: directory + owner: root + group: root + mode: "0700" + +- name: Generate Nebula CA + command: > + nebula-cert ca + -name "linderhof" + -out-crt /etc/nebula/ca.crt + -out-key /etc/nebula/ca.key + args: + creates: /etc/nebula/ca.key + +- name: Generate host certificate + command: > + nebula-cert sign + -ca-crt /etc/nebula/ca.crt + -ca-key /etc/nebula/ca.key + -name "lighthouse" + -ip "{{ nebula_lighthouse_ip }}/{{ nebula_subnet.split('/')[1] }}" + -out-crt /etc/nebula/host.crt + -out-key /etc/nebula/host.key + args: + creates: /etc/nebula/host.key + +- name: Deploy Nebula config + template: + src: config.yml.j2 + dest: /etc/nebula/config.yml + owner: root + group: root + mode: "0600" + notify: Restart nebula + +- name: Deploy Nebula systemd unit + template: + src: nebula.service.j2 + dest: /etc/systemd/system/nebula.service + owner: root + group: root + mode: "0644" + notify: Restart nebula + +- name: Enable and start Nebula + systemd: + name: nebula + enabled: true + state: started + daemon_reload: true diff --git a/roles/nebula/templates/config.yml.j2 b/roles/nebula/templates/config.yml.j2 new file mode 100644 index 0000000..78c9da6 --- /dev/null +++ b/roles/nebula/templates/config.yml.j2 @@ -0,0 +1,40 @@ +pki: + ca: /etc/nebula/ca.crt + cert: /etc/nebula/host.crt + key: /etc/nebula/host.key + +static_host_map: {} + +lighthouse: + am_lighthouse: true + interval: 60 + +listen: + host: 0.0.0.0 + port: {{ nebula_port }} + +punchy: + punch: true + +tun: + dev: nebula1 + drop_local_broadcast: false + drop_multicast: false + +logging: + level: info + format: text + +firewall: + conntrack: + tcp_timeout: 12m + udp_timeout: 3m + default_timeout: 10m + outbound: + - port: any + proto: any + host: any + inbound: + - port: any + proto: any + host: any diff --git a/roles/nebula/templates/nebula.service.j2 b/roles/nebula/templates/nebula.service.j2 new file mode 100644 index 0000000..a094a65 --- /dev/null +++ b/roles/nebula/templates/nebula.service.j2 @@ -0,0 +1,13 @@ +[Unit] +Description=Nebula Overlay Network +Wants=network-online.target +After=network-online.target + +[Service] +Type=simple +ExecStart=/usr/local/bin/nebula -config /etc/nebula/config.yml +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/roles/provision/defaults/main.yml b/roles/provision/defaults/main.yml new file mode 100644 index 0000000..48ff453 --- /dev/null +++ b/roles/provision/defaults/main.yml @@ -0,0 +1,4 @@ +--- +hcloud_server_type: cx22 +hcloud_image: ubuntu-24.04 +hcloud_location: fsn1 diff --git a/roles/provision/tasks/hetzner.yml b/roles/provision/tasks/hetzner.yml new file mode 100644 index 0000000..52f1dd9 --- /dev/null +++ b/roles/provision/tasks/hetzner.yml @@ -0,0 +1,40 @@ +--- +- name: Register SSH key with Hetzner + hetzner.hcloud.ssh_key: + name: "{{ admin_user }}" + public_key: "{{ admin_ssh_key }}" + api_token: "{{ hcloud_token }}" + state: present + +- name: Create server + hetzner.hcloud.server: + name: "{{ server_name }}" + server_type: "{{ hcloud_server_type }}" + image: "{{ hcloud_image }}" + location: "{{ hcloud_location }}" + ssh_keys: + - "{{ admin_user }}" + api_token: "{{ hcloud_token }}" + state: present + register: server_result + +- name: Set server IP fact + ansible.builtin.set_fact: + server_ip: "{{ server_result.hcloud_server.ipv4_address }}" + +- name: Wait for SSH to become available + ansible.builtin.wait_for: + host: "{{ server_ip }}" + port: 22 + timeout: 300 + +- name: Update inventory with new IP + ansible.builtin.lineinfile: + path: "{{ inventory_dir }}/hosts.yml" + regexp: '^\s+ansible_host:' + line: " ansible_host: {{ server_ip }}" + delegate_to: localhost + +- name: Print server IP + ansible.builtin.debug: + msg: "Server '{{ server_name }}' provisioned at {{ server_ip }}" diff --git a/roles/provision/tasks/main.yml b/roles/provision/tasks/main.yml new file mode 100644 index 0000000..6728d3a --- /dev/null +++ b/roles/provision/tasks/main.yml @@ -0,0 +1,3 @@ +--- +- name: Include provider tasks + ansible.builtin.include_tasks: "{{ cloud_provider }}.yml" diff --git a/roles/radicale/defaults/main.yml b/roles/radicale/defaults/main.yml new file mode 100644 index 0000000..e31655a --- /dev/null +++ b/roles/radicale/defaults/main.yml @@ -0,0 +1,2 @@ +radicale_version: "latest" +radicale_port: 5232 diff --git a/roles/radicale/handlers/main.yml b/roles/radicale/handlers/main.yml new file mode 100644 index 0000000..d6588be --- /dev/null +++ b/roles/radicale/handlers/main.yml @@ -0,0 +1,7 @@ +--- +- name: restart radicale + community.docker.docker_compose_v2: + project_src: /srv/radicale + state: present + recreate: always + build: never diff --git a/roles/radicale/tasks/main.yml b/roles/radicale/tasks/main.yml new file mode 100644 index 0000000..6aac3c1 --- /dev/null +++ b/roles/radicale/tasks/main.yml @@ -0,0 +1,81 @@ +--- +- name: Create radicale directories + file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - /srv/radicale + - /srv/radicale/data + - /srv/radicale/config + +- name: Create radicale configuration + template: + src: config.j2 + dest: /srv/radicale/config/config + mode: '0644' + notify: restart radicale + +- name: Check for cached radicale hash + ansible.builtin.stat: + path: /srv/radicale/config/.radicale_hash + register: _radicale_hash_stat + +- name: Read radicale hash from cache + ansible.builtin.slurp: + src: /srv/radicale/config/.radicale_hash + register: _radicale_hash_file + when: _radicale_hash_stat.stat.exists + +- name: Set radicale hash fact from cache + ansible.builtin.set_fact: + _radicale_hash: "{{ _radicale_hash_file.content | b64decode | trim }}" + when: _radicale_hash_stat.stat.exists + +- name: Generate radicale password hash + command: + argv: + - docker + - run + - --rm + - caddy:2 + - caddy + - hash-password + - --plaintext + - "{{ radicale_password }}" + register: _radicale_hash_result + changed_when: false + no_log: true + when: not _radicale_hash_stat.stat.exists + +- name: Cache radicale hash + ansible.builtin.copy: + content: "{{ _radicale_hash_result.stdout }}" + dest: /srv/radicale/config/.radicale_hash + mode: "0600" + when: not _radicale_hash_stat.stat.exists + +- name: Set radicale hash fact from generation + ansible.builtin.set_fact: + _radicale_hash: "{{ _radicale_hash_result.stdout }}" + when: not _radicale_hash_stat.stat.exists + +- name: Create radicale users file + copy: + content: "{{ admin_user }}:{{ _radicale_hash }}" + dest: /srv/radicale/config/users + mode: '0644' + notify: restart radicale + +- name: Create compose file + template: + src: compose.yml.j2 + dest: /srv/radicale/compose.yml + mode: '0644' + notify: restart radicale + +- name: Deploy radicale + community.docker.docker_compose_v2: + project_src: /srv/radicale + state: present + build: never diff --git a/roles/radicale/templates/compose.yml.j2 b/roles/radicale/templates/compose.yml.j2 new file mode 100644 index 0000000..fbb0c58 --- /dev/null +++ b/roles/radicale/templates/compose.yml.j2 @@ -0,0 +1,17 @@ +services: + radicale: + image: tomsquest/docker-radicale:{{ radicale_version }} + container_name: radicale + restart: unless-stopped + volumes: + - /srv/radicale/data:/data + - /srv/radicale/config:/config:ro + networks: + - radicale + - monitoring + +networks: + radicale: + external: true + monitoring: + external: true diff --git a/roles/radicale/templates/config.j2 b/roles/radicale/templates/config.j2 new file mode 100644 index 0000000..872f323 --- /dev/null +++ b/roles/radicale/templates/config.j2 @@ -0,0 +1,10 @@ +[server] +hosts = 0.0.0.0:{{ radicale_port }} + +[auth] +type = htpasswd +htpasswd_filename = /config/users +htpasswd_encryption = bcrypt + +[storage] +filesystem_folder = /data/collections diff --git a/roles/restic/defaults/main.yml b/roles/restic/defaults/main.yml new file mode 100644 index 0000000..0f92b5e --- /dev/null +++ b/roles/restic/defaults/main.yml @@ -0,0 +1,29 @@ +restic_backend_type: "sftp" +restic_password: "" +# restic_repo: set explicitly when restic_backend_type is not 'sftp' + +restic_backup_paths: >- + {{ + ['/etc/letsencrypt', '/srv/caddy'] + + (['/etc/nebula'] if (enable_nebula | default(false)) else []) + + (['/srv/forgejo'] if (enable_forgejo | default(false)) else []) + + (['/srv/goaccess'] if (enable_goaccess | default(false)) else []) + + (['/srv/mail'] if (enable_mail | default(false)) else []) + + (['/srv/monitoring'] if (enable_monitoring | default(false)) else []) + + (['/srv/tuwunel'] if (enable_tuwunel | default(false)) else []) + + (['/srv/radicale'] if (enable_radicale | default(false)) else []) + + (['/srv/diun'] if (enable_diun | default(false)) else []) + }} + +restic_exclude_patterns: + - "**/tmp" + - "**/cache" + - "**/*.gz" + +restic_backup_time: "02:00:00" +restic_prune_time: "04:00:00" + +restic_retention: + daily: 7 + weekly: 4 + monthly: 6 diff --git a/roles/restic/files/restic_backup b/roles/restic/files/restic_backup new file mode 120000 index 0000000..3859134 --- /dev/null +++ b/roles/restic/files/restic_backup @@ -0,0 +1 @@ +/home/matthias/.ssh/island_restic_backup \ No newline at end of file diff --git a/roles/restic/handlers/main.yml b/roles/restic/handlers/main.yml new file mode 100644 index 0000000..7f4d6d1 --- /dev/null +++ b/roles/restic/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Reload systemd + systemd: + daemon_reload: true + diff --git a/roles/restic/tasks/backend.yml b/roles/restic/tasks/backend.yml new file mode 100644 index 0000000..d392f9f --- /dev/null +++ b/roles/restic/tasks/backend.yml @@ -0,0 +1,19 @@ +--- +- name: Ensure restic local repo directory exists + file: + path: "{{ restic_repo }}" + state: directory + owner: root + group: root + mode: "0700" + when: restic_repo is defined and restic_repo.startswith('/') # only local path + +- name: Ensure restic repo is initialized + ansible.builtin.shell: | + set -euo pipefail + source /etc/restic/restic.env + restic snapshots > /dev/null 2>&1 || restic init + touch /etc/restic/.initialized + args: + creates: /etc/restic/.initialized + diff --git a/roles/restic/tasks/backend_sftp.yml b/roles/restic/tasks/backend_sftp.yml new file mode 100644 index 0000000..9cfac45 --- /dev/null +++ b/roles/restic/tasks/backend_sftp.yml @@ -0,0 +1,30 @@ +--- +- name: Deploy Restic SSH key + ansible.builtin.copy: + src: restic_backup # local path in your playbook repo + dest: "{{ restic_ssh_key }}" # e.g. /root/.ssh/restic_backup + owner: root + group: root + mode: '0600' + +- name: Ensure restic repo directory exists on Storage Box + ansible.builtin.shell: | + ssh -i {{ restic_ssh_key }} -o BatchMode=yes -o StrictHostKeyChecking=no -p {{ restic_ssh_port }} {{ restic_user }}@{{ restic_host }} \ + "mkdir -p {{ restic_remote_path }} && chmod 700 {{ restic_remote_path }}" < /dev/null + changed_when: false + +- name: Write the ssh config for the root user + # TODO: this replaces roots config and should be much smarter, safe for me currently + template: + src: restic-ssh-config.j2 + dest: /root/.ssh/config + mode: "0644" + +- name: Initialize restic repo on Storage Box (if needed) + ansible.builtin.shell: | + source /etc/restic/restic.env + restic snapshots > /dev/null 2>&1 || restic init + touch /etc/restic/.initialized + args: + creates: /etc/restic/.initialized + diff --git a/roles/restic/tasks/backup.yml b/roles/restic/tasks/backup.yml new file mode 100644 index 0000000..a110402 --- /dev/null +++ b/roles/restic/tasks/backup.yml @@ -0,0 +1,34 @@ +- name: Install restic backup service + template: + src: restic-backup.service.j2 + dest: /etc/systemd/system/restic-backup.service + +- name: Install restic backup timer + template: + src: restic-backup.timer.j2 + dest: /etc/systemd/system/restic-backup.timer + +- name: Enable and start restic backup timer + systemd: + name: restic-backup.timer + enabled: true + state: started + daemon_reload: true + +- name: Install restic prune service + template: + src: restic-prune.service.j2 + dest: /etc/systemd/system/restic-prune.service + +- name: Install restic prune timer + template: + src: restic-prune.timer.j2 + dest: /etc/systemd/system/restic-prune.timer + +- name: Enable and start restic prune timer + systemd: + name: restic-prune.timer + enabled: true + state: started + daemon_reload: true + diff --git a/roles/restic/tasks/config.yml b/roles/restic/tasks/config.yml new file mode 100644 index 0000000..a15dd35 --- /dev/null +++ b/roles/restic/tasks/config.yml @@ -0,0 +1,24 @@ +- name: Create restic config directory + file: + path: /etc/restic + state: directory + mode: "0700" + +- name: Write restic environment file + template: + src: restic.env.j2 + dest: /etc/restic/restic.env + mode: "0600" + +- name: Write restic backup script + template: + src: restic-backup.sh.j2 + dest: /usr/local/bin/restic-backup + mode: "0750" + +- name: Write restic prune script + template: + src: restic-prune.sh.j2 + dest: /usr/local/bin/restic-prune + mode: "0750" + diff --git a/roles/restic/tasks/install.yml b/roles/restic/tasks/install.yml new file mode 100644 index 0000000..2cf7be0 --- /dev/null +++ b/roles/restic/tasks/install.yml @@ -0,0 +1,6 @@ +- name: Install restic + apt: + name: restic + state: present + update_cache: true + diff --git a/roles/restic/tasks/main.yml b/roles/restic/tasks/main.yml new file mode 100644 index 0000000..c18abe3 --- /dev/null +++ b/roles/restic/tasks/main.yml @@ -0,0 +1,15 @@ +--- +- name: Install restic binary + include_tasks: install.yml + +- name: Configure restic environment + include_tasks: config.yml + +- name: Prepare backup repository + include_tasks: "{{ backend_file }}" + vars: + backend_file: "{{ 'backend_sftp.yml' if restic_backend_type == 'sftp' else 'backend.yml' }}" + +- name: Create systemd backup timer and service + include_tasks: backup.yml + diff --git a/roles/restic/templates/restic-backup.service.j2 b/roles/restic/templates/restic-backup.service.j2 new file mode 100644 index 0000000..d68a3d8 --- /dev/null +++ b/roles/restic/templates/restic-backup.service.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=Restic Backup +Wants=network-online.target +After=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/restic-backup + diff --git a/roles/restic/templates/restic-backup.sh.j2 b/roles/restic/templates/restic-backup.sh.j2 new file mode 100644 index 0000000..194d137 --- /dev/null +++ b/roles/restic/templates/restic-backup.sh.j2 @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +set -euo pipefail + +source /etc/restic/restic.env + +# Metrics file for node_exporter +METRICS_DIR="/var/lib/node_exporter/textfile_collector" +METRICS_FILE="${METRICS_DIR}/restic_backup.prom" +mkdir -p "${METRICS_DIR}" + +# Temporary file for atomic writes +TEMP_FILE=$(mktemp) + +# Start backup +START_TIME=$(date +%s) + +if restic backup \ +{% for path in restic_backup_paths %} + {{ path }} \ +{% endfor %} +{% for pattern in restic_exclude_patterns %} + --exclude '{{ pattern }}' \ +{% endfor %} + --host {{ ansible_facts["hostname"] }}; then + + # Backup succeeded + STATUS=1 + echo "# HELP restic_backup_success Whether the last backup succeeded (1=success, 0=failure)" > "${TEMP_FILE}" + echo "# TYPE restic_backup_success gauge" >> "${TEMP_FILE}" + echo "restic_backup_success ${STATUS}" >> "${TEMP_FILE}" + + echo "# HELP restic_backup_timestamp_seconds Timestamp of last backup completion" >> "${TEMP_FILE}" + echo "# TYPE restic_backup_timestamp_seconds gauge" >> "${TEMP_FILE}" + echo "restic_backup_timestamp_seconds $(date +%s)" >> "${TEMP_FILE}" + + echo "# HELP restic_backup_duration_seconds Duration of last backup in seconds" >> "${TEMP_FILE}" + echo "# TYPE restic_backup_duration_seconds gauge" >> "${TEMP_FILE}" + echo "restic_backup_duration_seconds $(($(date +%s) - START_TIME))" >> "${TEMP_FILE}" + + # Move temp file to final location atomically + mv "${TEMP_FILE}" "${METRICS_FILE}" + + exit 0 +else + # Backup failed + STATUS=0 + echo "# HELP restic_backup_success Whether the last backup succeeded (1=success, 0=failure)" > "${TEMP_FILE}" + echo "# TYPE restic_backup_success gauge" >> "${TEMP_FILE}" + echo "restic_backup_success ${STATUS}" >> "${TEMP_FILE}" + + echo "# HELP restic_backup_timestamp_seconds Timestamp of last backup attempt" >> "${TEMP_FILE}" + echo "# TYPE restic_backup_timestamp_seconds gauge" >> "${TEMP_FILE}" + echo "restic_backup_timestamp_seconds $(date +%s)" >> "${TEMP_FILE}" + + # Move temp file to final location atomically + mv "${TEMP_FILE}" "${METRICS_FILE}" + + exit 1 +fi diff --git a/roles/restic/templates/restic-backup.timer.j2 b/roles/restic/templates/restic-backup.timer.j2 new file mode 100644 index 0000000..88ce120 --- /dev/null +++ b/roles/restic/templates/restic-backup.timer.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=Daily Restic Backup + +[Timer] +OnCalendar=*-*-* {{ restic_backup_time }} +Persistent=true + +[Install] +WantedBy=timers.target + diff --git a/roles/restic/templates/restic-prune.service.j2 b/roles/restic/templates/restic-prune.service.j2 new file mode 100644 index 0000000..f6a1bff --- /dev/null +++ b/roles/restic/templates/restic-prune.service.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=Restic Prune +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/restic-prune + diff --git a/roles/restic/templates/restic-prune.sh.j2 b/roles/restic/templates/restic-prune.sh.j2 new file mode 100644 index 0000000..61d0b56 --- /dev/null +++ b/roles/restic/templates/restic-prune.sh.j2 @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +source /etc/restic/restic.env + +# Metrics file for node_exporter +METRICS_DIR="/var/lib/node_exporter/textfile_collector" +METRICS_FILE="${METRICS_DIR}/restic_prune.prom" +mkdir -p "${METRICS_DIR}" + +# Temporary file for atomic writes +TEMP_FILE=$(mktemp) + +# Start prune +START_TIME=$(date +%s) + +if restic forget \ + --keep-daily {{ restic_retention.daily }} \ + --keep-weekly {{ restic_retention.weekly }} \ + --keep-monthly {{ restic_retention.monthly }} \ + --prune; then + + # Prune succeeded + STATUS=1 + echo "# HELP restic_prune_success Whether the last prune succeeded (1=success, 0=failure)" > "${TEMP_FILE}" + echo "# TYPE restic_prune_success gauge" >> "${TEMP_FILE}" + echo "restic_prune_success ${STATUS}" >> "${TEMP_FILE}" + + echo "# HELP restic_prune_timestamp_seconds Timestamp of last prune completion" >> "${TEMP_FILE}" + echo "# TYPE restic_prune_timestamp_seconds gauge" >> "${TEMP_FILE}" + echo "restic_prune_timestamp_seconds $(date +%s)" >> "${TEMP_FILE}" + + echo "# HELP restic_prune_duration_seconds Duration of last prune in seconds" >> "${TEMP_FILE}" + echo "# TYPE restic_prune_duration_seconds gauge" >> "${TEMP_FILE}" + echo "restic_prune_duration_seconds $(($(date +%s) - START_TIME))" >> "${TEMP_FILE}" + + # Move temp file to final location atomically + mv "${TEMP_FILE}" "${METRICS_FILE}" + + exit 0 +else + # Prune failed + STATUS=0 + echo "# HELP restic_prune_success Whether the last prune succeeded (1=success, 0=failure)" > "${TEMP_FILE}" + echo "# TYPE restic_prune_success gauge" >> "${TEMP_FILE}" + echo "restic_prune_success ${STATUS}" >> "${TEMP_FILE}" + + echo "# HELP restic_prune_timestamp_seconds Timestamp of last prune attempt" >> "${TEMP_FILE}" + echo "# TYPE restic_prune_timestamp_seconds gauge" >> "${TEMP_FILE}" + echo "restic_prune_timestamp_seconds $(date +%s)" >> "${TEMP_FILE}" + + # Move temp file to final location atomically + mv "${TEMP_FILE}" "${METRICS_FILE}" + + exit 1 +fi diff --git a/roles/restic/templates/restic-prune.timer.j2 b/roles/restic/templates/restic-prune.timer.j2 new file mode 100644 index 0000000..dea73a6 --- /dev/null +++ b/roles/restic/templates/restic-prune.timer.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=Daily Restic Prune + +[Timer] +OnCalendar=*-*-* {{ restic_prune_time }} +Persistent=true + +[Install] +WantedBy=timers.target + diff --git a/roles/restic/templates/restic-ssh-config.j2 b/roles/restic/templates/restic-ssh-config.j2 new file mode 100644 index 0000000..3b764f9 --- /dev/null +++ b/roles/restic/templates/restic-ssh-config.j2 @@ -0,0 +1,4 @@ +Host {{ restic_host }} + IdentityFile {{ restic_ssh_key }} + User {{ restic_user }} + Port {{ restic_ssh_port }} diff --git a/roles/restic/templates/restic.env.j2 b/roles/restic/templates/restic.env.j2 new file mode 100644 index 0000000..ad06716 --- /dev/null +++ b/roles/restic/templates/restic.env.j2 @@ -0,0 +1,7 @@ +{% if restic_backend_type == 'sftp' %} +export RESTIC_REPOSITORY="sftp:{{ restic_user }}@{{ restic_host }}:{{ restic_remote_path }}" +{% else %} +export RESTIC_REPOSITORY="{{ restic_repo }}" +{% endif %} +export RESTIC_PASSWORD="{{ restic_password }}" +export RESTIC_CACHE_DIR=/var/cache/restic diff --git a/roles/tuwunel/defaults/main.yml b/roles/tuwunel/defaults/main.yml new file mode 100644 index 0000000..91a404f --- /dev/null +++ b/roles/tuwunel/defaults/main.yml @@ -0,0 +1,7 @@ +--- +# Port (internal to docker network) +tuwunel_port: 6167 + +# Trusted Matrix servers for federation +tuwunel_trusted_servers: + - matrix.org diff --git a/roles/tuwunel/handlers/main.yml b/roles/tuwunel/handlers/main.yml new file mode 100644 index 0000000..d6730c8 --- /dev/null +++ b/roles/tuwunel/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart tuwunel + community.docker.docker_compose_v2: + project_src: /srv/tuwunel + state: restarted + build: never diff --git a/roles/tuwunel/tasks/main.yml b/roles/tuwunel/tasks/main.yml new file mode 100644 index 0000000..19831fa --- /dev/null +++ b/roles/tuwunel/tasks/main.yml @@ -0,0 +1,30 @@ +--- +- name: Create Tuwunel directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - /srv/tuwunel + - /srv/tuwunel/data + +- name: Deploy Tuwunel configuration + ansible.builtin.template: + src: tuwunel.toml.j2 + dest: /srv/tuwunel/tuwunel.toml + mode: '0644' + notify: Restart tuwunel + +- name: Deploy Tuwunel docker-compose file + ansible.builtin.template: + src: compose.yml.j2 + dest: /srv/tuwunel/compose.yml + mode: '0644' + notify: Restart tuwunel + +- name: Start Tuwunel service + community.docker.docker_compose_v2: + project_src: /srv/tuwunel + state: present + build: never + register: tuwunel_output diff --git a/roles/tuwunel/templates/compose.yml.j2 b/roles/tuwunel/templates/compose.yml.j2 new file mode 100644 index 0000000..27a4aa7 --- /dev/null +++ b/roles/tuwunel/templates/compose.yml.j2 @@ -0,0 +1,16 @@ +services: + tuwunel: + image: ghcr.io/matrix-construct/tuwunel:{{ tuwunel_version }} + container_name: tuwunel + restart: unless-stopped + environment: + TUWUNEL_CONFIG: /etc/tuwunel.toml + volumes: + - /srv/tuwunel/data:/var/lib/tuwunel + - /srv/tuwunel/tuwunel.toml:/etc/tuwunel.toml:ro + networks: + - tuwunel + +networks: + tuwunel: + external: true diff --git a/roles/tuwunel/templates/tuwunel.toml.j2 b/roles/tuwunel/templates/tuwunel.toml.j2 new file mode 100644 index 0000000..8438f24 --- /dev/null +++ b/roles/tuwunel/templates/tuwunel.toml.j2 @@ -0,0 +1,10 @@ +[global] +server_name = "{{ tuwunel_server_name }}" +database_path = "/var/lib/tuwunel" +port = {{ tuwunel_port }} +address = "0.0.0.0" +allow_registration = true +registration_token = "{{ tuwunel_registration_token }}" +allow_federation = true +trusted_servers = {{ tuwunel_trusted_servers | to_json }} +max_request_size = 20000000 diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..c24d106 --- /dev/null +++ b/setup.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +TEMPLATES="$SCRIPT_DIR/inventory/group_vars/all" + +# ── helpers ────────────────────────────────────────────────── +info() { printf '\033[1;34m::\033[0m %s\n' "$*"; } +ok() { printf '\033[1;32m::\033[0m %s\n' "$*"; } +warn() { printf '\033[1;33m::\033[0m %s\n' "$*"; } +die() { printf '\033[1;31merror:\033[0m %s\n' "$*" >&2; exit 1; } + +prompt() { + local var="$1" msg="$2" default="$3" + printf '%s [%s]: ' "$msg" "$default" + read -r input + eval "$var=\"\${input:-$default}\"" +} + +prompt_secret() { + local var="$1" msg="$2" + printf '%s: ' "$msg" + read -rs input + echo + eval "$var=\"\$input\"" +} + +# ── 1. check prerequisites ────────────────────────────────── +info "checking prerequisites..." +missing=() +for cmd in ansible ansible-galaxy ssh-keygen openssl envsubst; do + command -v "$cmd" &>/dev/null || missing+=("$cmd") +done +if (( ${#missing[@]} )); then + die "missing required commands: ${missing[*]}" +fi +ok "all prerequisites found" + +# ── 2. install ansible collections ────────────────────────── +info "installing ansible collections..." +ansible-galaxy collection install -r "$SCRIPT_DIR/requirements.yml" +ok "collections installed" + +# ── 3. stack name ──────────────────────────────────────────── +echo +info "stack setup" +prompt stack_name "Stack name" "home" + +LINDERHOF_CONFIG_DIR="${XDG_CONFIG_HOME:-$HOME/.config}/linderhof" +STACK_DIR="$LINDERHOF_CONFIG_DIR/$stack_name" +GROUP_VARS="$STACK_DIR/group_vars/all" +CONFIG="$GROUP_VARS/config.yml" +VAULT="$GROUP_VARS/vault.yml" +HOSTS="$STACK_DIR/hosts.yml" +STACK_ENV="$STACK_DIR/stack.env" +VAULT_PASS_FILE="$STACK_DIR/vault-pass" + +info "stack directory: $STACK_DIR" +mkdir -p "$GROUP_VARS" + +# ── 4. SSH key ─────────────────────────────────────────────── +prompt ssh_key_path "SSH key path" "$HOME/.ssh/id_ed25519" + +if [[ -f "$ssh_key_path" ]]; then + ok "SSH key already exists at $ssh_key_path" +else + printf 'No key at %s. Generate one? [Y/n]: ' "$ssh_key_path" + read -r yn + if [[ "${yn,,}" != "n" ]]; then + ssh-keygen -t ed25519 -f "$ssh_key_path" + ok "SSH key generated" + else + warn "skipping SSH key generation" + fi +fi + +# ── 5. vault password ─────────────────────────────────────── +if [[ -f "$VAULT_PASS_FILE" ]]; then + ok "vault password file already exists at $VAULT_PASS_FILE" +else + info "generating vault password..." + openssl rand -base64 32 > "$VAULT_PASS_FILE" + chmod 600 "$VAULT_PASS_FILE" + ok "vault password saved to $VAULT_PASS_FILE" +fi + +export ANSIBLE_VAULT_PASSWORD_FILE="$VAULT_PASS_FILE" + +# ── 6. server settings ─────────────────────────────────────── +echo +info "configure your server" +prompt admin_user "Admin username" "$USER" +prompt server_name "Server hostname" "$stack_name" +prompt server_ip "Server IP (or TBD)" "0.0.0.0" +prompt domain "Domain" "example.com" +prompt_secret hcloud_token "Hetzner API token (leave blank to skip)" + +if [[ -z "$hcloud_token" ]]; then + warn "no Hetzner token provided — add it to vault.yml manually if needed" +fi + +export admin_user server_name server_ip domain hcloud_token +export ssh_key_pub="${ssh_key_path}.pub" + +echo +info "using domain: $domain" +info " mail: mail.$domain" +info " forgejo: code.$domain" +info " grafana: watch.$domain" +info " tuwunel: chat.$domain" +info " webmail: webmail.$domain" +info " rspamd: rspamd.$domain" + +# ── 7. generate secrets ───────────────────────────────────── +info "generating secrets..." +export admin_mail_password notifications_mail_password git_mail_password +export grafana_admin_password rspamd_web_password goaccess_password rainloop_admin_password +export tuwunel_registration_token restic_password +export forgejo_secret_key forgejo_internal_token forgejo_jwt_secret + +admin_mail_password=$(openssl rand -base64 32) +notifications_mail_password=$(openssl rand -base64 32) +git_mail_password=$(openssl rand -base64 32) +grafana_admin_password=$(openssl rand -base64 32) +rspamd_web_password=$(openssl rand -base64 32) +goaccess_password=$(openssl rand -base64 32) +rainloop_admin_password=$(openssl rand -base64 32) +tuwunel_registration_token=$(openssl rand -base64 32) +restic_password=$(openssl rand -base64 32) +forgejo_secret_key=$(openssl rand -hex 32) +forgejo_internal_token=$(openssl rand -hex 32) +forgejo_jwt_secret=$(openssl rand -hex 32) +ok "secrets generated" + +# ── 8. write hosts.yml ─────────────────────────────────────── +if [[ -f "$HOSTS" ]]; then + warn "hosts.yml already exists — skipping (not overwriting)" +else + info "writing hosts.yml..." + cat > "$HOSTS" < "$STACK_ENV" < "$CONFIG" + ok "config.yml created" +fi + +# ── 10b. write dns.yml ──────────────────────────────────────── +DNS_CONFIG="$GROUP_VARS/dns.yml" +if [[ -f "$DNS_CONFIG" ]]; then + warn "dns.yml already exists — skipping (not overwriting)" +else + info "writing dns.yml..." + envsubst '$domain $server_ip $server_name' \ + < "$TEMPLATES/dns.yml.setup" > "$DNS_CONFIG" + ok "dns.yml created (uncomment DKIM records after first mail deployment)" +fi + +# ── 11. write vault.yml ─────────────────────────────────────── +if [[ -f "$VAULT" ]]; then + warn "vault.yml already exists — skipping (not overwriting)" +else + info "writing vault.yml..." + envsubst < "$TEMPLATES/vault.yml.setup" > "$VAULT" + ansible-vault encrypt "$VAULT" + ok "vault.yml created and encrypted" +fi + +# ── 12. write .stack file ───────────────────────────────────── +if [[ -f "$SCRIPT_DIR/.stack" ]]; then + warn ".stack already exists — skipping (not overwriting)" +else + printf '%s\n' "$stack_name" > "$SCRIPT_DIR/.stack" + ok ".stack file written ($stack_name)" +fi + +# ── 13. summary ─────────────────────────────────────────────── +echo +echo "============================================================" +ok "linderhof setup complete! (stack: $stack_name)" +echo "============================================================" +echo +echo " stack dir: $STACK_DIR" +echo " vault password: $VAULT_PASS_FILE" +echo " SSH key: $ssh_key_path" +echo " inventory: $HOSTS" +echo " config: $CONFIG" +echo " dns zones: $DNS_CONFIG" +echo " vault: $VAULT" +echo +echo "to override any variable (e.g. mail_hostname during migration):" +echo " vi $GROUP_VARS/overrides.yml" +echo +echo "activate the stack (if not already done):" +echo " direnv allow # reads .stack file automatically" +echo " # or: export LINDERHOF_STACK=$stack_name" +echo +echo "config.yml is plain text — edit it directly:" +echo " vi $CONFIG" +echo +echo "vault.yml is encrypted. to view or edit it:" +echo " ansible-vault view $VAULT" +echo " ansible-vault edit $VAULT" +echo +echo "Next steps:" +echo " 1. Review $CONFIG" +echo " 2. Review $VAULT (ansible-vault edit)" +echo " 3. Review $DNS_CONFIG" +echo " 4. Provision a server: ansible-playbook playbooks/provision.yml" +echo " 5. Update DNS: ansible-playbook playbooks/dns.yml" +echo " 6. Deploy: ansible-playbook playbooks/site.yml" +echo " 7. After mail deploys, retrieve DKIM keys and add to vault.yml:" +echo " docker exec mailserver cat /tmp/docker-mailserver/rspamd/dkim/$domain/mail.pub"